# Step 1: Notebook Setup

# EMNIST Letters + Digits Classification with CNN, Dropout, and Batch Normalization
In this notebook:
- We'll train a CNN on the EMNIST dataset (uppercase, lowercase, and digits).
- We'll use **Dropout** to reduce overfitting and **Batch Normalization** to speed up convergence.
- The dataset will be stored in `C:/emnist_data` to avoid OneDrive issues.

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt

# Device config
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device


device(type='cpu')

# Step 2: Data Loading

### Data Loading
We'll use `datasets.EMNIST` with the `byclass` split to get:
- Uppercase letters
- Lowercase letters
- Digits  
This gives **62 classes** in total.


In [2]:
# Transform: normalize to [-1, 1]
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Path for dataset
data_path = "./emnist_data"

# Train and Test datasets
train_dataset = datasets.EMNIST(
    root=data_path, split='byclass', train=True,
    download=True, transform=transform
)

test_dataset = datasets.EMNIST(
    root=data_path, split='byclass', train=False,
    download=True, transform=transform
)

# Data loaders (set num_workers=0 for Windows)
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=0)

len(train_dataset), len(test_dataset)


100.0%


(697932, 116323)

# Step 3: CNN Model with Dropout + BatchNorm

### CNN Model
We'll use:
- **Conv2d + BatchNorm + ReLU + MaxPool**
- Dropout between layers
- Fully connected layers


In [3]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv_layer = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Dropout(0.25),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Dropout(0.25)
        )

        self.fc_layer = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 7 * 7, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, 62)  # 10 digits + 26 lowercase + 26 uppercase
        )

    def forward(self, x):
        x = self.conv_layer(x)
        x = self.fc_layer(x)
        return x

model = CNNModel().to(device)
model


CNNModel(
  (conv_layer): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Dropout(p=0.25, inplace=False)
    (5): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (9): Dropout(p=0.25, inplace=False)
  )
  (fc_layer): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=3136, out_features=256, bias=True)
    (2): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): ReLU()
    (4): Dropout(p=0.5, inplace=False)
    (5): Linear(in_features=256, out_features=62, bias=True)
  )
)

# Step 4: Loss & Optimizer

In [4]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


# Step 5: Training Loop

### Training Loop
We'll train for a few epochs and keep track of loss & accuracy.


In [5]:
def train_model(num_epochs=5):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct, total = 0, 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, "
              f"Accuracy: {100 * correct / total:.2f}%")

train_model(5)


Epoch [1/5], Loss: 0.5888, Accuracy: 80.85%
Epoch [2/5], Loss: 0.4655, Accuracy: 83.76%
Epoch [3/5], Loss: 0.4406, Accuracy: 84.47%
Epoch [4/5], Loss: 0.4258, Accuracy: 84.85%
Epoch [5/5], Loss: 0.4172, Accuracy: 85.10%


# Step 6: Testing

In [6]:
model.eval()
correct, total = 0, 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")


Test Accuracy: 86.66%


# Step 7: Saving the Model

In [10]:
torch.save(model.state_dict(), r"C:\Pavitra Projects\Character Recognition\emnist_cnn.pth")

