We are working with the Sign Language MNIST dataset.
First we need to load the dataset for training and testing from the CSV files.

In [30]:
import pandas as pd

training_df = pd.read_csv("C:\Machine Learning\CSCI4050U_FinalProject\dataset\sign_mnist_train\sign_mnist_train.csv") # Need to find a way to provide a relative path
testing_df = pd.read_csv("C:\Machine Learning\CSCI4050U_FinalProject\dataset\sign_mnist_test\sign_mnist_test.csv")

# Extract labels and features
y_train = training_df['label']
training_df.drop(['label'], axis=1, inplace=True)

y_test = testing_df['label']
testing_df.drop(['label'], axis=1, inplace=True)

# Adjust parameters as needed
size = 28
channels = 1
batch = 128
epochs = 100

X_train = training_df.values.reshape(training_df.shape[0], size, size, channels)
X_test = testing_df.values.reshape(testing_df.shape[0], size, size, channels)

Create the Dataloaders

In [31]:
import torch
from torchvision import transforms
from torch.utils.data import TensorDataset, DataLoader

X_train_tensor = torch.tensor(X_train.reshape(-1, 1, size, size)).float()
y_train_tensor = torch.tensor(y_train.values).long()

X_test_tensor = torch.tensor(X_test.reshape(-1, 1, size, size)).float()
y_test_tensor = torch.tensor(y_test.values).long()

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

training_dataloader = DataLoader(train_dataset, batch_size=batch, shuffle=True)
val_dataloader = DataLoader(test_dataset, batch_size=batch, shuffle=False)

Helper Functions for showing model architecture and plotting metrics

In [38]:
import torchinfo
def describe(model, **kwargs):
    return torchinfo.summary(model,
                             input_size=(batch, 1, 28, 28),
                             col_names=['input_size', 'output_size', 'kernel_size', 'num_params'],
                             row_settings=['ascii_only'])

In [33]:
import shutil
from lightning.pytorch.loggers import CSVLogger
from lightning.pytorch import Trainer, seed_everything

def train(model):
    name = model.__class__.__name__
    shutil.rmtree(f'./lightning_logs/{name}', ignore_errors=True)
    seed_everything(0, workers=True)
    logger = CSVLogger('./lightning_logs', name=name)
    trainer = Trainer(max_epochs=3, logger=logger, deterministic=True)
    trainer.fit(model,
                train_dataloaders=training_dataloader,
                val_dataloaders=val_dataloader)

In [34]:
def show_metrics(name):
    df = pd.read_csv(f'./lightning_logs/{name}/version_0/metrics.csv')
    df.set_index('step', inplace=True)
    ax = df[['train_step_acc']].dropna().plot()
    df[['val_step_acc']].dropna().plot(ax=ax)
    return df[['val_step_acc']].dropna().round(2)

Base Model

In [35]:
from typing import Tuple
from lightning.pytorch import LightningModule
from torch import nn
import torch
from torch import Tensor
import torchmetrics

class BaseModel(LightningModule):
    def __init__(self, num_classes):
        super().__init__()
        self.num_classes = num_classes
        self.accuracy = torchmetrics.classification.Accuracy(
            task="multiclass",
            num_classes=num_classes)
        self.model = self.build_model()
        
    def build_model(self):
        raise Exception("Not yet implemented")

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters())

    def forward(self, x):
        return self.model(x)
    
    def loss(self, logits, target):
        return nn.functional.cross_entropy(logits, target)
    
    def shared_step(self, mode:str, batch:Tuple[Tensor, Tensor], batch_index:int):
        x, target = batch
        output = self.forward(x)
        loss = self.loss(output, target)
        self.accuracy(output, target)
        self.log(f"{mode}_step_acc", self.accuracy, prog_bar=True)
        self.log(f"{mode}_step_loss", loss, prog_bar=False)
        return loss
    
    def training_step(self, batch, batch_index):
        return self.shared_step('train', batch, batch_index)
    
    def validation_step(self, batch, batch_index):
        return self.shared_step('val', batch, batch_index)
    
    def test_step(self, batch, batch_index):
        return self.shared_step('test', batch, batch_index)

Convolution Model

In [40]:
class ConvNet(BaseModel):
    def __init__(self, num_classes, num_kernels, kernel_size, pool_size):
        self.num_kernels = num_kernels
        self.kernel_size = kernel_size
        self.pool_size = pool_size
        super().__init__(num_classes)
        
    def build_model(self):
        return nn.Sequential(
            nn.Conv2d(1, self.num_kernels, self.kernel_size, padding='same'), 
            nn.MaxPool2d(self.pool_size, stride = 2), 
            nn.ReLU(), 
            nn.Flatten(), 
            nn.Linear(588, self.num_classes))

In [41]:
convo_model = ConvNet(26, 3, 3, 2)
describe(convo_model)
train(convo_model)

Seed set to 0


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Missing logger folder: ./lightning_logs\ConvNet
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type               | Params
------------------------------------------------
0 | accuracy | MulticlassAccuracy | 0     
1 | model    | Sequential         | 15.3 K
------------------------------------------------
15.3 K    Trainable params
0         Non-trainable params
15.3 K    Total params
0.061     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

c:\Users\Ting\anaconda3\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


RuntimeError: Deterministic behavior was enabled with either `torch.use_deterministic_algorithms(True)` or `at::Context::setDeterministicAlgorithms(true)`, but this operation is not deterministic because it uses CuBLAS and you have CUDA >= 10.2. To enable deterministic behavior in this case, you must set an environment variable before running your PyTorch application: CUBLAS_WORKSPACE_CONFIG=:4096:8 or CUBLAS_WORKSPACE_CONFIG=:16:8. For more information, go to https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility