In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset
import torchvision.transforms as transforms
from tqdm import tqdm
import matplotlib.pyplot as plt
import string
import pickle
from PIL import Image


**Getting the DataSet**

In [10]:
def load_data(per_train, per_val, per_test):
    assert(per_test + per_train + per_val == 100)

    with open("src/model/NIST.pkl", "rb") as f:
        NIST_dataset = pickle.load(f)

    total = len(NIST_dataset)
    train_end = int(per_train * total / 100)
    val_end = int((per_train + per_val) * total / 100)

    train_data = NIST_dataset[:train_end]
    val_data = NIST_dataset[train_end:val_end]
    test_data = NIST_dataset[val_end:]

    return train_data, val_data, test_data

In [11]:
class NISTDataset(Dataset):
    def __init__(self, data, transform=None, label_map=None):
        self.data = data
        self.transform = transform

        all_chars = list(string.digits + string.ascii_uppercase + string.ascii_lowercase)
        self.label_map = {char: idx for idx, char in enumerate(all_chars)}

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        label_char, img_path = self.data[idx]
        image = Image.open(img_path).convert("L")

        if self.transform:
            image = self.transform(image)

        label = self.label_map[label_char]
        return image, label

In [12]:
class InvertIfMajority:
    # Assure that the background is white while the drawing is black
    def __call__(self, image):
        # Convert to grayscale (in case the image is RGB)
        image = image.convert("L")
        
        # Convert the image to a binary tensor (0 for black, 1 for white)
        tensor_image = transforms.ToTensor()(image)
        
        # Check if the majority of pixels are 1 (foreground/drawing)
        if tensor_image.mean() > 0.5:
            tensor_image = 1 - tensor_image  # Inversion of 0 and 1
        
        return tensor_image

transform = transforms.Compose([
    transforms.Resize((128,128)),
    transforms.Grayscale(num_output_channels=1),
    InvertIfMajority()
])

In [13]:
train_data, val_data, test_data = load_data(70, 20, 10)

train_dataset = NISTDataset(train_data, transform=transform)
test_dataset = NISTDataset(test_data, transform=transform)
val_dataset = NISTDataset(val_data, transform=transform)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=512, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=512, shuffle=False)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=512, shuffle=False)

**Defining the Convolutional Neural Network**

Loss: CrossEntropyLoss

Optimizer: Adam

In [14]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(64 * 32 * 32, 256)
        self.fc2 = nn.Linear(256, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 62)
        self.relu = nn.ReLU()
    
    def forward(self, x, drop_prob=0.3):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)  # Flatten for FC layers
        x = F.relu(self.fc1(x))
        x = F.dropout(x, p=drop_prob, training=self.training)  # Variable dropout
        x = F.relu(self.fc2(x))
        x = F.dropout(x, p=drop_prob, training=self.training)
        x = F.relu(self.fc3(x))
        x = F.dropout(x, p=drop_prob, training=self.training)
        x = self.fc4(x)
        return x

model = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=5e-4, weight_decay=5e-4) # L2 regularization

**Training the model**

In [15]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

model = model.to(device)


Using device: cpu


In [None]:
num_epochs = 1
loss_hist = []

for epoch in range(num_epochs):
    model.train()
    with tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch+1}/{num_epochs}") as pbar:
        for batch_idx, (images, labels) in pbar:
            optimizer.zero_grad()
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            pbar.set_postfix(loss=loss.item())
            
            loss_hist.append(loss.item())

Epoch 1/3:   0%|▏                                                                       | 7/2227 [01:01<5:25:41,  8.80s/it, loss=3.92]


KeyboardInterrupt: 

In [None]:
fig, ax = plt.subplots(figsize=(9, 3))

ax.plot(range(len(loss_hist)), loss_hist, label="Loss")
ax.set_xlabel("Iterations", fontsize=12)
ax.set_ylabel("Loss", fontsize=12)
ax.set_title("Model Loss", fontsize=14)
fig.tight_layout()

plt.show()

**Testing model**

In [None]:
correct = 0
total = 0

with torch.no_grad(): # Environment without gradient calculation
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")

**Saving Weights**

In [None]:
torch.save(model.state_dict(), "model_weights.pth")