In [1]:
import os
import shutil
import random

def split_dataset(source_dir, train_dir, test_dir, test_ratio=0.2):
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    for class_name in os.listdir(source_dir):
        class_path = os.path.join(source_dir, class_name)
        if not os.path.isdir(class_path):
            continue

        images = os.listdir(class_path)
        random.shuffle(images)

        split_idx = int(len(images) * (1 - test_ratio))
        train_images = images[:split_idx]
        test_images = images[split_idx:]

        train_class_dir = os.path.join(train_dir, class_name)
        test_class_dir = os.path.join(test_dir, class_name)
        os.makedirs(train_class_dir, exist_ok=True)
        os.makedirs(test_class_dir, exist_ok=True)

        for img in train_images:
            shutil.copy(os.path.join(class_path, img), os.path.join(train_class_dir, img))
        for img in test_images:
            shutil.copy(os.path.join(class_path, img), os.path.join(test_class_dir, img))

split_dataset("/home/u/CS 411/archive", "dataset/train", "dataset/test", test_ratio=0.2)

In [2]:
import os
import numpy as np
from PIL import Image
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score

def load_images_from_folder(folder, image_size=(28, 28)):
    X, y = [], []
    for label in os.listdir(folder):
        label_path = os.path.join(folder, label)
        if not os.path.isdir(label_path):
            continue
        for filename in os.listdir(label_path):
            img_path = os.path.join(label_path, filename)
            try:
                img = Image.open(img_path).convert('L')
                img = img.resize(image_size)
                img_array = np.array(img).flatten()
                X.append(img_array)
                y.append(label)
            except:
                continue
    return np.array(X), np.array(y)

X_train, y_train = load_images_from_folder("dataset/train")
X_test, y_test = load_images_from_folder("dataset/test")

knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.6044815007816571
              precision    recall  f1-score   support

           +       0.19      0.79      0.31        19
           0       0.75      0.95      0.84       928
           2       0.51      0.76      0.61       671
           3       0.73      0.84      0.78       603
           4       0.52      0.49      0.51       416
           5       0.65      0.67      0.66       260
           6       0.86      0.83      0.85       432
           7       0.72      0.54      0.62       448
           8       0.95      0.56      0.70       396
           9       0.81      0.62      0.70       447
           B       0.62      0.73      0.67       404
       Delta       0.78      0.74      0.76        19
           E       0.70      0.56      0.62       112
           R       0.56      0.48      0.52       262
           a       0.85      0.79      0.82      1220
       alpha       0.81      0.79      0.80       409
        beta       0.84      0.57      0.68       11

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [3]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
X_train = X_train / 255.0
X_test = X_test / 255.0

svm = SVC(kernel='rbf', C=1.0, gamma='scale')
svm.fit(X_train, y_train)

y_pred = svm.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.861490359562272
              precision    recall  f1-score   support

           +       0.00      0.00      0.00        19
           0       0.95      0.99      0.97       928
           2       0.85      0.88      0.86       671
           3       0.93      0.93      0.93       603
           4       0.81      0.57      0.67       416
           5       0.85      0.80      0.82       260
           6       0.99      0.98      0.98       432
           7       0.97      0.66      0.78       448
           8       0.98      0.97      0.98       396
           9       0.94      0.95      0.94       447
           B       0.93      0.84      0.88       404
       Delta       1.00      0.89      0.94        19
           E       0.91      0.82      0.86       112
           R       0.93      0.78      0.85       262
           a       0.96      0.97      0.96      1220
       alpha       0.94      0.93      0.93       409
        beta       1.00      0.97      0.99       114

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [4]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

clf = RandomForestClassifier(n_estimators=100)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.9328295987493486


In [8]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F

transform = transforms.Compose([
    transforms.Grayscale(), 
    transforms.Resize((28, 28)),
    transforms.ToTensor(),
])

train_data = datasets.ImageFolder('dataset/train', transform=transform)
test_data = datasets.ImageFolder('dataset/test', transform=transform)

train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 16, 3, 1)
        self.conv2 = nn.Conv2d(16, 32, 3, 1)
        self.fc1 = nn.Linear(32 * 5 * 5, 128)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = SimpleCNN(num_classes=len(train_data.classes)).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

for epoch in range(500):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs=model(images)
        loss = criterion(model(images), labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    avg_loss = running_loss / len(train_loader.dataset)
    accuracy = 100. * correct / total
    print(f"Epoch {epoch+1}: Loss = {avg_loss:.4f}, Accuracy = {accuracy:.2f}%")

Epoch 1: Loss = 2.3000, Accuracy = 45.07%
Epoch 2: Loss = 1.5035, Accuracy = 62.97%
Epoch 3: Loss = 1.2900, Accuracy = 68.06%
Epoch 4: Loss = 1.1526, Accuracy = 71.65%
Epoch 5: Loss = 1.0480, Accuracy = 74.03%
Epoch 6: Loss = 0.9727, Accuracy = 76.00%
Epoch 7: Loss = 0.9105, Accuracy = 77.54%
Epoch 8: Loss = 0.8669, Accuracy = 78.64%
Epoch 9: Loss = 0.8283, Accuracy = 79.44%
Epoch 10: Loss = 0.8001, Accuracy = 80.19%
Epoch 11: Loss = 0.7685, Accuracy = 81.07%
Epoch 12: Loss = 0.7498, Accuracy = 81.60%
Epoch 13: Loss = 0.7253, Accuracy = 82.08%
Epoch 14: Loss = 0.7090, Accuracy = 82.46%
Epoch 15: Loss = 0.6911, Accuracy = 82.94%
Epoch 16: Loss = 0.6760, Accuracy = 83.29%
Epoch 17: Loss = 0.6638, Accuracy = 83.66%
Epoch 18: Loss = 0.6481, Accuracy = 83.86%
Epoch 19: Loss = 0.6370, Accuracy = 84.24%
Epoch 20: Loss = 0.6273, Accuracy = 84.37%
Epoch 21: Loss = 0.6158, Accuracy = 84.59%
Epoch 22: Loss = 0.6100, Accuracy = 84.94%
Epoch 23: Loss = 0.5969, Accuracy = 85.09%
Epoch 24: Loss = 0.5

KeyboardInterrupt: 

In [9]:
from torchvision import models

model = models.resnet18(pretrained=True)
model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(train_data.classes))
model = model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
criterion = nn.CrossEntropyLoss()

for epoch in range(200):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    avg_loss = running_loss / len(train_loader.dataset)
    accuracy = 100. * correct / total
    print(f"ResNet Epoch {epoch+1}: Loss = {avg_loss:.4f}, Accuracy = {accuracy:.2f}%")



ResNet Epoch 1: Loss = 1.3846, Accuracy = 66.66%
ResNet Epoch 2: Loss = 0.7807, Accuracy = 80.14%
ResNet Epoch 3: Loss = 0.6406, Accuracy = 83.54%
ResNet Epoch 4: Loss = 0.5634, Accuracy = 85.56%
ResNet Epoch 5: Loss = 0.5169, Accuracy = 86.76%
ResNet Epoch 6: Loss = 0.4822, Accuracy = 87.82%
ResNet Epoch 7: Loss = 0.4543, Accuracy = 88.68%
ResNet Epoch 8: Loss = 0.4383, Accuracy = 88.89%
ResNet Epoch 9: Loss = 0.4209, Accuracy = 89.47%
ResNet Epoch 10: Loss = 0.4042, Accuracy = 89.96%
ResNet Epoch 11: Loss = 0.3957, Accuracy = 90.18%
ResNet Epoch 12: Loss = 0.3796, Accuracy = 90.71%
ResNet Epoch 13: Loss = 0.3732, Accuracy = 90.79%
ResNet Epoch 14: Loss = 0.3700, Accuracy = 91.03%
ResNet Epoch 15: Loss = 0.3650, Accuracy = 91.21%
ResNet Epoch 16: Loss = 0.3503, Accuracy = 91.60%
ResNet Epoch 17: Loss = 0.3565, Accuracy = 91.33%
ResNet Epoch 18: Loss = 0.3478, Accuracy = 91.75%
ResNet Epoch 19: Loss = 0.3384, Accuracy = 92.06%
ResNet Epoch 20: Loss = 0.3350, Accuracy = 92.13%
ResNet Ep

KeyboardInterrupt: 