In [1]:
import torch, torchvision
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.transforms import functional
from torch import nn
from copy import deepcopy
import time
from torch.utils.data import random_split
import numpy as np

First, import dataset

In [2]:
img_path = './ASLDataset'
device = torch.device('cuda')
torch.manual_seed(77)
batch_size = 64

Use torch.utils.data.Subset to split the train and test dataset

In [3]:
transform = transforms.Compose([
    transforms.Resize(32),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485, 0.456, 0.406], 
                         std = [0.229, 0.224, 0.225])
])

dataset = ImageFolder(root = 'ASLDataset/asl_alphabet_train', 
                      transform = transform)

train_size = int(len(dataset) * .8)
train_set = torch.utils.data.Subset(dataset, range(train_size))
test_set = torch.utils.data.Subset(dataset, range(train_size, 
                                                  len(dataset)))

train_loader = DataLoader(train_set, batch_size = batch_size, 
                          shuffle = True)
test_loader = DataLoader(test_set, batch_size = batch_size, 
                         shuffle = True)

class_names = dataset.classes
num_classes = len(dataset.classes)

Transfer learning, and unfreeze only the last three datasets

In [4]:
intermediate_feature_num = 512
model = torchvision.models.vgg16(pretrained = True)

for n, param in enumerate(model.features.parameters()):
    param.requires_grad = False

in_features = model.classifier[3].in_features
model.classifier[3] = nn.Linear(in_features, intermediate_feature_num)
model.classifier[6] = nn.Linear(intermediate_feature_num, num_classes)

print(model)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

check the layers are freezed besides the last few

In [5]:
# what parameters are trainable:
for name, param in model.named_parameters():
    print(f'{name}: requires_grad={param.requires_grad}')

features.0.weight: requires_grad=False
features.0.bias: requires_grad=False
features.2.weight: requires_grad=False
features.2.bias: requires_grad=False
features.5.weight: requires_grad=False
features.5.bias: requires_grad=False
features.7.weight: requires_grad=False
features.7.bias: requires_grad=False
features.10.weight: requires_grad=False
features.10.bias: requires_grad=False
features.12.weight: requires_grad=False
features.12.bias: requires_grad=False
features.14.weight: requires_grad=False
features.14.bias: requires_grad=False
features.17.weight: requires_grad=False
features.17.bias: requires_grad=False
features.19.weight: requires_grad=False
features.19.bias: requires_grad=False
features.21.weight: requires_grad=False
features.21.bias: requires_grad=False
features.24.weight: requires_grad=False
features.24.bias: requires_grad=False
features.26.weight: requires_grad=False
features.26.bias: requires_grad=False
features.28.weight: requires_grad=False
features.28.bias: requires_grad=

In [6]:
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 
                                                   step_size = 50)

Train model

In [7]:
def train_model(model, criterion, optimizer, scheduler, num_epochs):
    milestone = np.linspace(0, len(train_loader) - 1, 21).astype(np.int32)[1:-1]
    t0 = time.time()
    model.train()
    length = len(train_loader)
    
    for epoch in range(num_epochs):
        print(f'Epoch {epoch + 1}/{num_epochs}')
        print('-' * 10)
        running_loss = 0.0
        running_corrects = 0
        
        for n, (inputs, labels) in enumerate(train_loader):
            if n in milestone:
                print(f'{(np.where(milestone==n)[0][0] + 1) * 5}%', end = " ")
            elif n == len(train_loader) - 1:
                print("DONE")
                
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            model.zero_grad()
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            running_corrects += torch.sum(preds == labels.data)
            
        scheduler.step()
        epoch_loss = running_loss / train_size
        epoch_acc = running_corrects / train_size
        
        print(f'Training loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
    
    return model

In [46]:
model = train_model(model, criterion, optimizer, exp_lr_scheduler, 10)

Epoch 1/10
----------
5% 10% 15% 20% 25% 30% 35% 40% 45% 50% 55% 60% 65% 70% 75% 80% 85% 90% 95% DONE
Training loss: 0.7714 Acc: 0.8040
Epoch 2/10
----------
5% 10% 15% 20% 25% 30% 35% 40% 45% 50% 55% 60% 65% 70% 75% 80% 85% 90% 95% DONE
Training loss: 0.6961 Acc: 0.8148
Epoch 3/10
----------
5% 10% 15% 20% 25% 30% 35% 40% 45% 50% 55% 60% 65% 70% 75% 80% 85% 90% 95% DONE
Training loss: 0.6797 Acc: 0.8216
Epoch 4/10
----------
5% 10% 15% 20% 25% 30% 35% 40% 45% 50% 55% 60% 65% 70% 75% 80% 85% 90% 95% DONE
Training loss: 0.7016 Acc: 0.8201
Epoch 5/10
----------
5% 10% 15% 20% 25% 30% 35% 40% 45% 50% 55% 60% 65% 70% 75% 80% 85% 90% 95% DONE
Training loss: 0.6780 Acc: 0.8262
Epoch 6/10
----------
5% 10% 15% 20% 25% 30% 35% 40% 45% 50% 55% 60% 65% 70% 75% 80% 85% 90% 95% DONE
Training loss: 0.6601 Acc: 0.8313
Epoch 7/10
----------
5% 10% 15% 20% 25% 30% 35% 40% 45% 50% 55% 60% 65% 70% 75% 80% 85% 90% 95% DONE
Training loss: 0.6550 Acc: 0.8334
Epoch 8/10
----------
5% 10% 15% 20% 25% 30% 35%

save parameters in case of system crush

In [8]:
# torch.save(model.state_dict(), 'CAIS_2023_WinterProj_model_param.pth')
model.load_state_dict(torch.load('CAIS_2023_WinterProj_model_param.pth'))
model = model.to(device)

Evaluate: severely overfitting (WTF)

In [9]:
# evaluates 

running_corrects = 0
length = len(test_set)
model.eval()

y_true = []
y_pred = []

for n, (inputs, labels) in enumerate(test_loader):
    inputs = inputs.to(device)
    labels = labels.to(device)
    
    y_true.extend(labels.cpu().numpy())

    outputs = model(inputs)
    _, preds = torch.max(outputs, 1)
    
    y_pred.extend(preds.cpu().numpy())
    
    running_corrects += torch.sum(preds == labels.data)
    

epoch_acc = running_corrects / length

print(f'Testing Acc: {epoch_acc:.4f}')

Testing Acc: 0.0136


In [10]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

Accuracy: 0.013620689655172415
Precision: 0.052303448275862065
Recall: 0.013620689655172415
F1 Score: 0.021612995155314903


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
