In [15]:
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader

import pandas as pd

In [16]:
import kagglehub

path = kagglehub.dataset_download("datamunge/sign-language-mnist")

Using Colab cache for faster access to the 'sign-language-mnist' dataset.


In [17]:
training_df = pd.read_csv(f"{path}/sign_mnist_train.csv")
validation_df = pd.read_csv(f"{path}/sign_mnist_test.csv")

In [18]:
training_df.describe()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
count,27455.0,27455.0,27455.0,27455.0,27455.0,27455.0,27455.0,27455.0,27455.0,27455.0,...,27455.0,27455.0,27455.0,27455.0,27455.0,27455.0,27455.0,27455.0,27455.0,27455.0
mean,12.318813,145.419377,148.500273,151.247714,153.546531,156.210891,158.411255,160.472154,162.339683,163.954799,...,141.104863,147.495611,153.325806,159.125332,161.969259,162.736696,162.906137,161.966454,161.137898,159.824731
std,7.287552,41.358555,39.942152,39.056286,38.595247,37.111165,36.125579,35.016392,33.661998,32.651607,...,63.751194,65.512894,64.427412,63.708507,63.738316,63.444008,63.50921,63.298721,63.610415,64.396846
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,6.0,121.0,126.0,130.0,133.0,137.0,140.0,142.0,144.0,146.0,...,92.0,96.0,103.0,112.0,120.0,125.0,128.0,128.0,128.0,125.5
50%,13.0,150.0,153.0,156.0,158.0,160.0,162.0,164.0,165.0,166.0,...,144.0,162.0,172.0,180.0,183.0,184.0,184.0,182.0,182.0,182.0
75%,19.0,174.0,176.0,178.0,179.0,181.0,182.0,183.0,184.0,185.0,...,196.0,202.0,205.0,207.0,208.0,207.0,207.0,206.0,204.0,204.0
max,24.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0,...,255.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0


In [19]:
IMG_HEIGHT = 28
IMG_WIDTH = 28
IMG_CHS = 1

In [20]:
class SignDataset(Dataset):
  def __init__(self, base_df):
    x_df = base_df.copy()
    y_df = x_df.pop('label')
    x_df = x_df.values / 255 # normalization
    x_df = x_df.reshape(-1, IMG_CHS, IMG_WIDTH, IMG_HEIGHT)
    self.xs = torch.tensor(x_df).float().to("cuda")
    self.ys = torch.tensor(y_df).to("cuda")

  def __getitem__(self, idx):
    x = self.xs[idx]
    y = self.ys[idx]
    return x, y

  def __len__(self):
    return len(self.xs)

In [21]:
training_data = SignDataset(training_df)
training_loader = DataLoader(training_data, batch_size=32)
training_N = len(training_loader.dataset)

validation_data = SignDataset(validation_df)
validation_loader = DataLoader(validation_data, batch_size=32)
validation_N = len(validation_loader.dataset)

In [22]:
N_CLASSES = max(training_df['label'].max(), validation_df['label'].max()) + 1

In [23]:
N_CLASSES = max(training_df['label'].max(), validation_df['label'].max()) + 1

KERNAL_SIZE = 3
FLATTENED_IMG_SIZE = 75*3*3

model = nn.Sequential(
    # First Convolutional (28px x 28px)
    nn.Conv2d(IMG_CHS, 25, KERNAL_SIZE, stride=1, padding=1), #25 Feature Map
    nn.BatchNorm2d(25),
    nn.ReLU(),
    nn.MaxPool2d(2, stride=2),
    # Second Convolutional (14px x 14px)
    nn.Conv2d(25, 50, KERNAL_SIZE, stride=1, padding=1), #50 Feature Map
    nn.BatchNorm2d(50),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.MaxPool2d(2, stride=2),
    # Third Convolutional (7px x 7px)
    nn.Conv2d(50, 75, KERNAL_SIZE, stride=1, padding=1), #75 Feature Map
    nn.BatchNorm2d(75),
    nn.ReLU(),
    nn.MaxPool2d(2, stride=2),
    # Flatten to dense (3px x 3px -> 1D)
    nn.Flatten(),
    nn.Linear(FLATTENED_IMG_SIZE, 512),
    nn.Dropout(0.3),
    nn.ReLU(),
    nn.Linear(512, N_CLASSES)
)

In [24]:
model = torch.compile(model.to('cuda'))
loss_function = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters())

In [25]:
def get_batch_accuracy(output, y, N):
  pred = output.argmax(dim=1, keepdim=True)
  correct = pred.eq(y.view_as(pred)).sum().item()
  return correct / N

In [26]:
def train():
  loss = 0
  accuracy = 0

  model.train()
  for x, y in training_loader:
    output = model(x)
    optimizer.zero_grad()
    batch_loss = loss_function(output, y)
    batch_loss.backward()
    optimizer.step()

    loss += batch_loss.item()
    accuracy += get_batch_accuracy(output, y, training_N)
  print("Training Loss", loss)
  print("Training Accuracy", accuracy)

In [27]:
def validate():
  loss = 0
  accuracy = 0

  model.eval()
  with torch.no_grad():
    for x, y in validation_loader:
      output = model(x)

      loss += loss_function(output, y).item()
      accuracy += get_batch_accuracy(output, y, validation_N)
  print("Validation Loss", loss)
  print("Validation Accuracy", accuracy)

In [28]:
%%time

EPOCHS = 20

for epoch in range(EPOCHS):
  print('Epoch', epoch)
  train()
  validate()

Epoch 0


W0905 16:01:42.399000 10900 torch/_inductor/utils.py:1436] [0/1] Not enough SMs to use max_autotune_gemm mode


Training Loss 285.64839202258736
Training Accuracy 0.9007102531414989
Validation Loss 28.717287968844175
Validation Accuracy 0.9485499163413236
Epoch 1
Training Loss 15.885625970375258
Training Accuracy 0.9954835184847841
Validation Loss 28.94351511914283
Validation Accuracy 0.95677635248187
Epoch 2
Training Loss 13.961776145530166
Training Accuracy 0.9955199417228113
Validation Loss 18.684054311476302
Validation Accuracy 0.9716954824316747
Epoch 3
Training Loss 15.598263202249655
Training Accuracy 0.9946822072482172
Validation Loss 22.109013599565515
Validation Accuracy 0.9736475181260417
Epoch 4
Training Loss 0.6283512911727485
Training Accuracy 0.9998907302859141
Validation Loss 17.525599596972825
Validation Accuracy 0.9764361405465659
Epoch 5
Training Loss 13.233521883889807
Training Accuracy 0.9952285558186043
Validation Loss 13.317313486790226
Validation Accuracy 0.981734523145562
Epoch 6
Training Loss 2.0221205510537175
Training Accuracy 0.9993079584775003
Validation Loss 67.603