In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cuda


In [4]:
# Enhanced data augmentation for better generalization
train_transform = transforms.Compose([
    transforms.RandomRotation(10),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Load dataset with augmentation
train_dataset = datasets.EMNIST(
    root='./data',
    split='letters',
    train=True,
    download=True,
    transform=train_transform
)

test_dataset = datasets.EMNIST(
    root='./data',
    split='letters',
    train=False,
    download=True,
    transform=test_transform
)
# Optimized batch size
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)


100%|██████████| 562M/562M [00:09<00:00, 61.7MB/s]


In [5]:
class OptimizedLetterCNN(nn.Module):
    def __init__(self, num_classes=26):
        super(OptimizedLetterCNN, self).__init__()

        # Block 1: 28x28 -> 14x14
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(32)

        # Block 2: 14x14 -> 7x7
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(64)
        self.conv4 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(64)

        # Block 3: 7x7 -> 3x3 (with adaptive pooling)
        self.conv5 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn5 = nn.BatchNorm2d(128)

        # Pooling layers
        self.pool = nn.MaxPool2d(2, 2)
        self.adaptive_pool = nn.AdaptiveAvgPool2d((3, 3))

        # Dropout
        self.dropout = nn.Dropout(0.5)

        # Fully connected layers
        self.fc1 = nn.Linear(128 * 3 * 3, 512)
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, num_classes)

    def forward(self, x):
        # Block 1
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = self.pool(x)
        x = self.dropout(x)

        # Block 2
        x = F.relu(self.bn3(self.conv3(x)))
        x = F.relu(self.bn4(self.conv4(x)))
        x = self.pool(x)
        x = self.dropout(x)

        # Block 3
        x = F.relu(self.bn5(self.conv5(x)))
        x = self.adaptive_pool(x)
        x = self.dropout(x)

        # Flatten and fully connected
        x = x.view(-1, 128 * 3 * 3)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)

        return x


In [6]:
# Initialize model
model = OptimizedLetterCNN().to(device)
# Advanced optimizer and scheduler
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.01)
scheduler = optim.lr_scheduler.OneCycleLR(
    optimizer,
    max_lr=0.01,
    epochs=20,
    steps_per_epoch=len(train_loader)
)
criterion = nn.CrossEntropyLoss()

In [None]:
def train_epoch(model, train_loader, criterion, optimizer, scheduler, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device) - 1  # Adjust labels to 0-25

        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        scheduler.step()

        running_loss += loss.item()
        _, predicted = torch.max(output.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

    accuracy = 100 * correct / total
    avg_loss = running_loss / len(train_loader)
    return avg_loss, accuracy

def validate_epoch(model, test_loader, criterion, device):
    model.eval()
    test_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device) - 1
            output = model(data)
            test_loss += criterion(output, target).item()

            _, predicted = torch.max(output, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

    accuracy = 100 * correct / total
    avg_loss = test_loss / len(test_loader)
    return avg_loss, accuracy


In [None]:
# Training loop
num_epochs = 20
best_accuracy = 0
train_losses, train_accuracies = [], []
test_losses, test_accuracies = [], []

print("Starting training...")
for epoch in range(num_epochs):
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, scheduler, device)
    test_loss, test_acc = validate_epoch(model, test_loader, criterion, device)

    train_losses.append(train_loss)
    train_accuracies.append(train_acc)
    test_losses.append(test_loss)
    test_accuracies.append(test_acc)

    if test_acc > best_accuracy:
        best_accuracy = test_acc
        torch.save(model.state_dict(), 'best_emnist_model.pth')

    print(f'Epoch {epoch+1:2d}: Train Acc: {train_acc:.2f}% | Test Acc: {test_acc:.2f}% | Loss: {test_loss:.4f}')

print(f'\nBest Test Accuracy: {best_accuracy:.2f}%')

# Plot results
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Train Loss')
plt.plot(test_losses, label='Test Loss')
plt.title('Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(train_accuracies, label='Train Accuracy')
plt.plot(test_accuracies, label='Test Accuracy')
plt.title('Training Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.legend()
plt.show()

# Load best model for inference
model.load_state_dict(torch.load('best_emnist_model.pth'))
final_test_loss, final_test_acc = validate_epoch(model, test_loader, criterion, device)
print(f'Final Test Accuracy: {final_test_acc:.2f}%')

Starting training...
Epoch  1: Train Acc: 57.66% | Test Acc: 88.45% | Loss: 0.3565
Epoch  2: Train Acc: 80.27% | Test Acc: 91.46% | Loss: 0.2671
Epoch  3: Train Acc: 82.85% | Test Acc: 91.75% | Loss: 0.2488
Epoch  4: Train Acc: 82.51% | Test Acc: 92.14% | Loss: 0.2352
Epoch  5: Train Acc: 80.95% | Test Acc: 91.57% | Loss: 0.2678
Epoch  6: Train Acc: 80.11% | Test Acc: 91.28% | Loss: 0.2859
Epoch  7: Train Acc: 80.87% | Test Acc: 90.94% | Loss: 0.2676
Epoch  8: Train Acc: 81.47% | Test Acc: 91.83% | Loss: 0.2731
Epoch  9: Train Acc: 82.46% | Test Acc: 91.38% | Loss: 0.2639
Epoch 10: Train Acc: 83.52% | Test Acc: 92.24% | Loss: 0.2547
Epoch 11: Train Acc: 84.77% | Test Acc: 93.07% | Loss: 0.2261
Epoch 12: Train Acc: 85.71% | Test Acc: 92.31% | Loss: 0.2467
Epoch 13: Train Acc: 87.07% | Test Acc: 93.23% | Loss: 0.2160
Epoch 14: Train Acc: 88.23% | Test Acc: 93.15% | Loss: 0.2163
Epoch 15: Train Acc: 89.20% | Test Acc: 93.43% | Loss: 0.1988
Epoch 16: Train Acc: 90.00% | Test Acc: 93.97% | 

In [None]:
# Training loop
num_epochs = 50
best_accuracy = 0
train_losses, train_accuracies = [], []
test_losses, test_accuracies = [], []

print("Starting training...")
for epoch in range(num_epochs):
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, scheduler, device)
    test_loss, test_acc = validate_epoch(model, test_loader, criterion, device)

    train_losses.append(train_loss)
    train_accuracies.append(train_acc)
    test_losses.append(test_loss)
    test_accuracies.append(test_acc)

    if test_acc > best_accuracy:
        best_accuracy = test_acc
        torch.save(model.state_dict(), 'best_emnist_model.pth')

    print(f'Epoch {epoch+1:2d}: Train Acc: {train_acc:.2f}% | Test Acc: {test_acc:.2f}% | Loss: {test_loss:.4f}')

print(f'\nBest Test Accuracy: {best_accuracy:.2f}%')

# Plot results
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Train Loss')
plt.plot(test_losses, label='Test Loss')
plt.title('Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(train_accuracies, label='Train Accuracy')
plt.plot(test_accuracies, label='Test Accuracy')
plt.title('Training Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.legend()
plt.show()

# Load best model for inference
model.load_state_dict(torch.load('best_emnist_model.pth'))
final_test_loss, final_test_acc = validate_epoch(model, test_loader, criterion, device)
print(f'Final Test Accuracy: {final_test_acc:.2f}%')