In [2]:
import os
import shutil
import random

def split_dataset(source_dir, train_dir, test_dir, test_ratio=0.2):
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    for class_name in os.listdir(source_dir):
        class_path = os.path.join(source_dir, class_name)
        if not os.path.isdir(class_path):
            continue

        images = os.listdir(class_path)
        random.shuffle(images)

        split_idx = int(len(images) * (1 - test_ratio))
        train_images = images[:split_idx]
        test_images = images[split_idx:]

        train_class_dir = os.path.join(train_dir, class_name)
        test_class_dir = os.path.join(test_dir, class_name)
        os.makedirs(train_class_dir, exist_ok=True)
        os.makedirs(test_class_dir, exist_ok=True)

        for img in train_images:
            shutil.copy(os.path.join(class_path, img), os.path.join(train_class_dir, img))
        for img in test_images:
            shutil.copy(os.path.join(class_path, img), os.path.join(test_class_dir, img))

split_dataset("/Users/ravanryj/Desktop/team-00-project/Hezi_Jiang_411/archive", "dataset/train", "dataset/test", test_ratio=0.2)

In [3]:
import os
import numpy as np
from PIL import Image
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score

def load_images_from_folder(folder, image_size=(28, 28)):
    X, y = [], []
    for label in os.listdir(folder):
        label_path = os.path.join(folder, label)
        if not os.path.isdir(label_path):
            continue
        for filename in os.listdir(label_path):
            img_path = os.path.join(label_path, filename)
            try:
                img = Image.open(img_path).convert('L')
                img = img.resize(image_size)
                img_array = np.array(img).flatten()
                X.append(img_array)
                y.append(label)
            except:
                continue
    return np.array(X), np.array(y)

X_train, y_train = load_images_from_folder("dataset/train")
X_test, y_test = load_images_from_folder("dataset/test")

knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.5755654804783172
              precision    recall  f1-score   support

           +       0.22      0.94      0.35        16
           0       0.77      0.95      0.85       804
           2       0.39      0.65      0.49       371
           3       0.62      0.72      0.67       341
           4       0.29      0.26      0.27       230
           5       0.45      0.41      0.43       146
           6       0.86      0.84      0.85       386
           7       0.72      0.55      0.62       389
           8       0.94      0.59      0.73       343
           9       0.79      0.63      0.70       379
           B       0.49      0.64      0.56       220
       Delta       0.86      0.71      0.77        17
           E       0.57      0.43      0.49        69
           R       0.53      0.47      0.50       228
           a       0.84      0.79      0.82      1068
       alpha       0.65      0.56      0.60       224
        beta       0.48      0.60      0.53        9

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [4]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
X_train = X_train / 255.0
X_test = X_test / 255.0

svm = SVC(kernel='rbf', C=1.0, gamma='scale')
svm.fit(X_train, y_train)

y_pred = svm.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.8232243192623542
              precision    recall  f1-score   support

           +       0.00      0.00      0.00        16
           0       0.94      0.99      0.96       804
           2       0.77      0.84      0.80       371
           3       0.87      0.89      0.88       341
           4       0.73      0.47      0.58       230
           5       0.76      0.72      0.74       146
           6       0.98      0.99      0.98       386
           7       0.95      0.66      0.78       389
           8       0.97      0.98      0.98       343
           9       0.91      0.95      0.93       379
           B       0.90      0.76      0.83       220
       Delta       1.00      0.88      0.94        17
           E       0.88      0.62      0.73        69
           R       0.92      0.79      0.85       228
           a       0.91      0.97      0.94      1068
       alpha       0.84      0.74      0.79       224
        beta       1.00      0.97      0.98        9

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [5]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

clf = RandomForestClassifier(n_estimators=100)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.8799164385535225


In [None]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F

transform = transforms.Compose([
    transforms.Grayscale(), 
    transforms.Resize((28, 28)),
    transforms.ToTensor(),
])

train_data = datasets.ImageFolder('dataset/train', transform=transform)
test_data = datasets.ImageFolder('dataset/test', transform=transform)

train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 16, 3, 1)
        self.conv2 = nn.Conv2d(16, 32, 3, 1)
        self.fc1 = nn.Linear(32 * 5 * 5, 128)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = SimpleCNN(num_classes=len(train_data.classes)).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

for epoch in range(5):
    model.train()
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        loss = criterion(model(images), labels)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1} complete.")


In [None]:
from torchvision import models

model = models.resnet18(pretrained=True)
model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(train_data.classes))
model = model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
criterion = nn.CrossEntropyLoss()

for epoch in range(5):
    model.train()
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        loss = criterion(model(images), labels)
        loss.backward()
        optimizer.step()
    print(f"ResNet Epoch {epoch+1} complete.")

