### Importing Data

In [2]:
from torch import nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import torchvision.models as models
import torch
import os
from torchvision.transforms import transforms
from torchvision import datasets

We can't use the csv since VGG requires data of at least 224x224. So created a new data loader and the data should be downloaded from https://www.kaggle.com/datasets/grassknoted/asl-alphabet?datasetId=23079&sortBy=voteCount&select=asl_alphabet_train


In [9]:
batch_size = 64

transform = transforms.Compose([transforms.Resize(224),
                                transforms.RandomHorizontalFlip(),
                                transforms.ToTensor()])

data_path = "./"
dataset_images = datasets.ImageFolder(os.path.join(data_path, 'asl_alphabet_train', 'asl_alphabet_train'), transform=transform)

training_size = int(len(dataset_images) * 0.8)
testing_size = len(dataset_images) - training_size

image_data_train, image_data_val = torch.utils.data.random_split(dataset_images, lengths=[training_size, testing_size])

data_loader_train = torch.utils.data.DataLoader(image_data_train, batch_size=batch_size, shuffle=True)
data_loader_val = torch.utils.data.DataLoader(image_data_val, batch_size=batch_size, shuffle=False)


### VGG16

In [13]:
import torchvision.models as models
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_vgg = models.vgg16(pretrained=True)

In [14]:
number_features = model_vgg.classifier[6].in_features
model_vgg.classifier[6] = nn.Linear(number_features, 29)

loss_function = nn.CrossEntropyLoss()
opt = torch.optim.SGD(model_vgg.parameters(), lr=0.001, momentum=0.9)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_vgg.to(device)
model_vgg.train()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [15]:
# create our main training loop
def train(model, epoch, train_dl, test_dl, dev, loss_arr, acc_arr):
    model.train()
    for data, target in train_dl:
        print(data.shape)
        data = data.to(dev)
        # target = target.long()
        target = target.to(dev)

        opt.zero_grad() # zero out gradients (pytorch accumulates by default)
        output = model(data)
        loss = -1*output[range(target.shape[0]), target].mean()
        loss.backward()
        opt.step()

    model.eval()

    correct = 0
    with torch.no_grad():
        for data, target in test_dl:
            data = data.to(dev)
            # target = target.long()
            target = target.to(dev)

            output = model(data)
            loss += -1*output[range(target.shape[0]), target].mean()
            pred = output.data.max(1, keepdim=True)[1] # what does the model predict?
            correct += pred.eq(target.data.view_as(pred)).sum() # was the model correct?

            loss /= len(test_dl.dataset)
        acc_arr.append(100*correct / len(test_dl.dataset))
        loss_arr.append(loss)
        print(f'Results for Epoch {epoch+1} --> Loss: {loss} | Accuracy: {100*correct / len(test_dl.dataset)}')

In [16]:
# do the training
for epoch in range(1):
    loss_arr, acc_arr = [], []
    train(model_vgg, epoch, data_loader_train, data_loader_val, device, loss_arr, acc_arr)

KeyboardInterrupt: 

### RestNet50

In [10]:
resnet = models.resnet50(pretrained=True)
number_features = resnet.fc.in_features
resnet.fc = nn.Linear(number_features, 29)

loss_function = nn.CrossEntropyLoss()
opt = torch.optim.SGD(resnet.parameters(), lr=0.001, momentum=0.9)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
resnet.to(device)
resnet.train()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [12]:
# do the training
for epoch in range(1):
    loss_arr, acc_arr = [], []
    train(resnet, epoch, data_loader_train, data_loader_val, device, loss_arr, acc_arr)

torch.Size([200, 3, 224, 224])
torch.Size([200, 3, 224, 224])


KeyboardInterrupt: 