In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms, models
from sklearn.model_selection import train_test_split
from torchvision.datasets import ImageFolder
from torch.utils.data import Subset

In [None]:
train_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5])
])

val_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5])
])

test_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5])
])

# ---------------------------
# Dataset + DataLoaders
# ---------------------------
data_dir = "processed_cropped"
full_dataset = datasets.ImageFolder(root=data_dir, transform=train_transforms)

train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

val_dataset.dataset.transform = val_transforms

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)

# Test dataset
test_dataset = datasets.ImageFolder(root="processed_cropped_test", transform=test_transforms)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

# ---------------------------
# CNN
# ---------------------------
class EnhancedCNN(nn.Module):
    def __init__(self, num_classes):
        super(EnhancedCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1), nn.BatchNorm2d(32), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(128, 256, 3, padding=1), nn.BatchNorm2d(256), nn.ReLU(), nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256*8*8, 512), nn.ReLU(), nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

num_classes = len(os.listdir(data_dir))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = EnhancedCNN(num_classes=num_classes).to(device)

# ---------------------------
# Loss + Optimizer + Scheduler
# ---------------------------
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=5, verbose=True)

# ---------------------------
# Training Loop
# ---------------------------
num_epochs = 50
best_val_acc = 0.0

for epoch in range(num_epochs):
    # Training
    model.train()
    running_loss, running_corrects = 0.0, 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(torch.argmax(outputs, 1) == labels)

    epoch_loss = running_loss / train_size
    epoch_acc = running_corrects.double() / train_size

    # Validation
    model.eval()
    val_corrects = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            val_corrects += torch.sum(torch.argmax(outputs, 1) == labels)

    val_acc = val_corrects.double() / val_size
    scheduler.step(val_acc)

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_hand_cnn_model.pth")

    print(f"Epoch {epoch+1}/{num_epochs} | Loss: {epoch_loss:.4f} | Train Acc: {epoch_acc:.4f} | Val Acc: {val_acc:.4f}")

print("-"*30)
print(f"Best Validation Accuracy achieved: {best_val_acc:.4f}")
print("Model saved at: best_hand_cnn_model.pth")

# ---------------------------
# Test Accuracy
# ---------------------------
model.load_state_dict(torch.load("best_hand_cnn_model.pth"))
model.eval()
test_corrects = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        test_corrects += torch.sum(torch.argmax(outputs, 1) == labels)

test_acc = test_corrects.double() / len(test_dataset)
print(f"Test Accuracy: {test_acc:.4f}")




Epoch 1/50 | Loss: 3.9015 | Train Acc: 0.0760 | Val Acc: 0.1762
Epoch 2/50 | Loss: 2.6930 | Train Acc: 0.1793 | Val Acc: 0.3502
Epoch 3/50 | Loss: 2.1266 | Train Acc: 0.3124 | Val Acc: 0.6564
Epoch 4/50 | Loss: 1.7150 | Train Acc: 0.4146 | Val Acc: 0.7423
Epoch 5/50 | Loss: 1.4637 | Train Acc: 0.4967 | Val Acc: 0.7489
Epoch 6/50 | Loss: 1.2963 | Train Acc: 0.5399 | Val Acc: 0.8084
Epoch 7/50 | Loss: 1.1863 | Train Acc: 0.5848 | Val Acc: 0.8623
Epoch 8/50 | Loss: 1.0718 | Train Acc: 0.6193 | Val Acc: 0.8800
Epoch 9/50 | Loss: 0.9709 | Train Acc: 0.6499 | Val Acc: 0.8590
Epoch 10/50 | Loss: 0.9280 | Train Acc: 0.6672 | Val Acc: 0.8877
Epoch 11/50 | Loss: 0.9070 | Train Acc: 0.6658 | Val Acc: 0.9020
Epoch 12/50 | Loss: 0.8305 | Train Acc: 0.6871 | Val Acc: 0.9020
Epoch 13/50 | Loss: 0.7822 | Train Acc: 0.7050 | Val Acc: 0.9196
Epoch 14/50 | Loss: 0.7689 | Train Acc: 0.7240 | Val Acc: 0.9009
Epoch 15/50 | Loss: 0.7358 | Train Acc: 0.7157 | Val Acc: 0.9306
Epoch 16/50 | Loss: 0.6984 | Train

  model.load_state_dict(torch.load("best_hand_cnn_model.pth"))


Test Accuracy: 0.9934


In [None]:
data_dir = "../data/processed/processed_cropped_mp"

models_dir = os.path.join("..", "models")
os.makedirs(models_dir, exist_ok=True)

# ---------------------------
# Transforms
# ---------------------------
train_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

val_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

test_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

# ---------------------------
# Dataset
# ---------------------------
full_dataset = datasets.ImageFolder(root=data_dir, transform=train_transforms)
targets = full_dataset.targets
num_classes = len(full_dataset.classes)

all_indices = list(range(len(full_dataset)))

# Train / Test split (80 / 20)
train_indices, test_indices = train_test_split(
    all_indices,
    test_size=0.2,
    random_state=42,
    stratify=targets
)

# Train / Val split (90 / 10 of train)
train_indices, val_indices = train_test_split(
    train_indices,
    test_size=0.1,
    random_state=42,
    stratify=[targets[i] for i in train_indices]
)

train_dataset = Subset(full_dataset, train_indices)
val_dataset   = Subset(full_dataset, val_indices)
test_dataset  = Subset(full_dataset, test_indices)

# Change transforms for val & test
val_dataset.dataset.transform  = val_transforms
test_dataset.dataset.transform = test_transforms

# ---------------------------
# DataLoaders
# ---------------------------
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True,  num_workers=2)
val_loader   = DataLoader(val_dataset,   batch_size=32, shuffle=False, num_workers=2)
test_loader  = DataLoader(test_dataset,  batch_size=32, shuffle=False, num_workers=2)

# ---------------------------
# CNN Model
# ---------------------------
class EnhancedCNN(nn.Module):
    def __init__(self, num_classes):
        super(EnhancedCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1), nn.BatchNorm2d(32), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(128, 256, 3, padding=1), nn.BatchNorm2d(256), nn.ReLU(), nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 8 * 8, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

# ---------------------------
# Device
# ---------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = EnhancedCNN(num_classes).to(device)

# ---------------------------
# Loss / Optimizer / Scheduler
# ---------------------------
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='max', factor=0.5, patience=5, verbose=True
)

# ---------------------------
# Training Loop
# ---------------------------
num_epochs = 50
best_val_acc = 0.0
model_path = os.path.join(models_dir, "best_hand_cnn_model.pth")

for epoch in range(num_epochs):
    # ---- Train ----
    model.train()
    running_loss = 0.0
    running_corrects = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(torch.argmax(outputs, 1) == labels)

    epoch_loss = running_loss / len(train_dataset)
    epoch_acc  = running_corrects.double() / len(train_dataset)

    # ---- Validation ----
    model.eval()
    val_corrects = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            val_corrects += torch.sum(torch.argmax(outputs, 1) == labels)

    val_acc = val_corrects.double() / len(val_dataset)
    scheduler.step(val_acc)

    # ---- Save best model ----
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), model_path)

    print(f"Epoch {epoch+1:02}/{num_epochs} | "
        f"Loss: {epoch_loss:.4f} | "
        f"Train Acc: {epoch_acc:.4f} | "
        f"Val Acc: {val_acc:.4f}")

print("-" * 40)
print(f"Best Validation Accuracy: {best_val_acc:.4f}")
print(f"Model saved at: {model_path}")

# ---------------------------
# Test Accuracy
# ---------------------------
model.load_state_dict(torch.load(model_path))
model.eval()

test_corrects = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        test_corrects += torch.sum(torch.argmax(outputs, 1) == labels)

test_acc = test_corrects.double() / len(test_dataset)
print(f"Test Accuracy: {test_acc:.4f}")




Epoch 01/50 | Loss: 3.7520 | Train Acc: 0.0820 | Val Acc: 0.1731
Epoch 02/50 | Loss: 2.7251 | Train Acc: 0.1805 | Val Acc: 0.3626
Epoch 03/50 | Loss: 2.2638 | Train Acc: 0.2729 | Val Acc: 0.5659
Epoch 04/50 | Loss: 1.8666 | Train Acc: 0.3848 | Val Acc: 0.6648
Epoch 05/50 | Loss: 1.6070 | Train Acc: 0.4561 | Val Acc: 0.7500
Epoch 06/50 | Loss: 1.4245 | Train Acc: 0.4995 | Val Acc: 0.8077
Epoch 07/50 | Loss: 1.3195 | Train Acc: 0.5197 | Val Acc: 0.8132
Epoch 08/50 | Loss: 1.2414 | Train Acc: 0.5607 | Val Acc: 0.8434
Epoch 09/50 | Loss: 1.1611 | Train Acc: 0.5788 | Val Acc: 0.8709
Epoch 10/50 | Loss: 1.1682 | Train Acc: 0.5794 | Val Acc: 0.8379
Epoch 11/50 | Loss: 1.0688 | Train Acc: 0.6139 | Val Acc: 0.8984
Epoch 12/50 | Loss: 0.9853 | Train Acc: 0.6338 | Val Acc: 0.9011
Epoch 13/50 | Loss: 0.9259 | Train Acc: 0.6571 | Val Acc: 0.9203
Epoch 14/50 | Loss: 0.9391 | Train Acc: 0.6519 | Val Acc: 0.9341
Epoch 15/50 | Loss: 0.8645 | Train Acc: 0.6718 | Val Acc: 0.9176
Epoch 16/50 | Loss: 0.875

  model.load_state_dict(torch.load(model_path))


âœ… Test Accuracy: 0.9582
