### Importing Data

In [1]:
from torch import nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import torchvision.models as models
import torch
import os
from torchvision.transforms import transforms
from torchvision import datasets

We can't use the csv since VGG requires data of at least 224x224. So created a new data loader and the data should be downloaded from https://www.kaggle.com/datasets/grassknoted/asl-alphabet?datasetId=23079&sortBy=voteCount&select=asl_alphabet_train


In [2]:
batch_size = 20

transform = transforms.Compose([transforms.Resize(224),
                                transforms.RandomHorizontalFlip(),
                                transforms.ToTensor()])

data_path = "./"
dataset_images = datasets.ImageFolder(os.path.join(data_path, 'asl_alphabet_train', 'asl_alphabet_train'), transform=transform)
dataset_subset = torch.utils.data.Subset(dataset_images, np.random.choice(len(dataset_images), 10000, replace=False))

training_size = int(len(dataset_subset) * 0.8)
print(training_size)
testing_size = len(dataset_subset) - training_size
print(testing_size)

image_data_train, image_data_val = torch.utils.data.random_split(dataset_subset, lengths=[training_size, testing_size])

data_loader_train = torch.utils.data.DataLoader(image_data_train, batch_size=batch_size, shuffle=True)
data_loader_val = torch.utils.data.DataLoader(image_data_val, batch_size=batch_size, shuffle=False)


8000
2000


In [18]:
for data, target in data_loader_train:
    print(data.dtype)
    break

torch.float32


### VGG16

In [3]:
import torchvision.models as models
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_vgg = models.vgg16(pretrained=True)

In [4]:
torch.cuda.empty_cache()

In [5]:
number_features = model_vgg.classifier[6].in_features
model_vgg.classifier[6] = nn.Linear(number_features, 29)

loss_function = nn.CrossEntropyLoss()
opt = torch.optim.SGD(model_vgg.parameters(), lr=0.0001, momentum=0.9)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_vgg.to(device)
model_vgg.train()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [13]:
for data, target in data_loader_train:
    print(data.shape)
    break

torch.Size([20, 3, 224, 224])


In [7]:
# create our main training loop
def train(model, loss_func, epoch, train_dl, test_dl, dev, train_loss, train_acc, valid_loss, valid_acc):
    model.train()

    running_loss = 0
    correct = 0
    total = 0

    for data, target in train_dl:
        data = data.to(dev)
        # target = target.long()
        target = target.to(dev)

        opt.zero_grad() # zero out gradients (pytorch accumulates by default)
        output = model(data)
        loss = loss_func(output, target)
        loss.backward()
        opt.step()

        running_loss += loss.item()
        # _, predicted = output.max(1)
        pred = output.data.max(1, keepdim=True)[1] # what does the model predict?
        correct += pred.eq(target.data.view_as(pred)).sum().item() # was the model correct?

    loss = running_loss / len(train_dl.dataset)
    acc = 100.*correct / len(train_dl.dataset)
    train_loss.append(loss)
    train_acc.append(acc)

    print(f'Train results for Epoch {epoch+1} --> Loss: {loss} | Accuracy: {acc}')

    model.eval()

    correct = 0
    with torch.no_grad():
        for data, target in test_dl:
            data = data.to(dev)
            # target = target.long()
            target = target.to(dev)

            output = model(data)
            loss += -1*output[range(target.shape[0]), target].mean()
            pred = output.data.max(1, keepdim=True)[1] # what does the model predict?
            correct += pred.eq(target.data.view_as(pred)).sum() # was the model correct?

            loss /= len(test_dl.dataset)
        acc = 100*correct / len(test_dl.dataset)
        valid_acc.append(acc)
        valid_loss.append(loss)
        print(f'Results for Epoch {epoch+1} --> Loss: {loss} | Accuracy: {acc}')

In [8]:
# do the training
train_loss, train_acc = [], []
valid_loss, valid_acc = [], []

loss_arr, acc_arr = [], []
for epoch in range(12):
    train(model_vgg, loss_function, epoch, data_loader_train, data_loader_val, device, train_loss, train_acc, valid_loss, valid_acc)

Results for Epoch 1 --> Loss: -0.0034078021999448538 | Accuracy: 88.6500015258789
Results for Epoch 2 --> Loss: -0.005797719582915306 | Accuracy: 95.95000457763672
Results for Epoch 3 --> Loss: -0.0069970362819731236 | Accuracy: 98.1500015258789
Results for Epoch 4 --> Loss: -0.007792968302965164 | Accuracy: 98.1500015258789
Results for Epoch 5 --> Loss: -0.00860049482434988 | Accuracy: 98.80000305175781
Results for Epoch 6 --> Loss: -0.008748388849198818 | Accuracy: 99.1500015258789
Results for Epoch 7 --> Loss: -0.00894574262201786 | Accuracy: 99.20000457763672
Results for Epoch 8 --> Loss: -0.009509174153208733 | Accuracy: 99.20000457763672
Results for Epoch 9 --> Loss: -0.009466303512454033 | Accuracy: 99.35000610351562
Results for Epoch 10 --> Loss: -0.010220109485089779 | Accuracy: 99.35000610351562
Results for Epoch 11 --> Loss: -0.009787488728761673 | Accuracy: 99.50000762939453
Results for Epoch 12 --> Loss: -0.01135743223130703 | Accuracy: 99.45000457763672


In [9]:
torch.save(model_vgg.state_dict(), 'model_vgg.pth')

In [None]:
# save loss and acc just in case
import pandas as pd
training_dict = {'loss': loss_arr, 'acc': acc_arr}
df = pd.DataFrame(training_dict)
df.to_csv('training_data_vgg.csv')

### RestNet50

In [14]:
resnet = models.resnet50(pretrained=True)
number_features = resnet.fc.in_features
resnet.fc = nn.Linear(number_features, 29)

loss_function = nn.CrossEntropyLoss()
opt = torch.optim.SGD(resnet.parameters(), lr=0.001, momentum=0.9)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
resnet.to(device)
resnet.train()

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\pierc/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:02<00:00, 47.5MB/s]


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [15]:
# do the training
for epoch in range(1):
    loss_arr, acc_arr = [], []
    train(resnet, epoch, data_loader_train, data_loader_val, device, loss_arr, acc_arr)

TypeError: train() missing 1 required positional argument: 'acc_arr'