In [1]:
import torch.nn as nn
import torch.nn.functional as F
import torch
import torchvision.transforms as transforms
from ASLDataset import ASLDataset
import torch.optim as optim
import time
from torchsummary import summary
import torchvision.models as models

import warnings
warnings.filterwarnings("ignore")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 
print(device)

  from .autonotebook import tqdm as notebook_tqdm


cuda


The baseline model from scratch in 3.1.

In [2]:
class ConvBlock(nn.Module):

    def __init__(self, in_channels, out_channels, kernel_size, **kwargs):
        super(ConvBlock, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, **kwargs)
        self.bn = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU()

    def forward(self, x):
        return self.relu(self.bn(self.conv(x)))

class BaselineModel(nn.Module):
    def __init__(self, input_feature):
        super().__init__()
        
        self.layer0 = nn.Sequential(
            nn.Conv2d(input_feature, 64, kernel_size=7, stride=2, padding=3),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        
        self.layer1 = nn.Sequential(
            ConvBlock(64, 128, kernel_size=3, stride=2),
            ConvBlock(128, 128, kernel_size=3, stride=2),
            ConvBlock(128, 256, kernel_size=3, stride=2),
            ConvBlock(256, 256, kernel_size=3, stride=2),
        )
        self.gpool = torch.nn.AdaptiveAvgPool2d(1)
        self.fc = torch.nn.Linear(256, 29)
        
        

    def forward(self, x):
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.gpool(x)
        x = torch.flatten(x,1)
        x = self.fc(x)

        return x


The Resnet style network from scratch.

In [3]:
class ResBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=2, padding=1)
        self.skip = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=2),
            nn.BatchNorm2d(out_channels)
        )

        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.bn2 = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        x1 = self.skip(x)
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(x + x1)
        return x
    
class ResNetStyle(nn.Module):
    def __init__(self, input_feature):
        super().__init__()
        self.layer0 = nn.Sequential(
            nn.Conv2d(input_feature, 64, kernel_size=7, stride=2, padding=3),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )

        self.layer1 = nn.Sequential(
            ResBlock(64, 128),
            ResBlock(128, 256),
        )

        self.gpool = torch.nn.AdaptiveAvgPool2d(1)
        self.fc = torch.nn.Linear(256, 29)

    def forward(self, x):
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.gpool(x)
        x = torch.flatten(x,1)
        x = self.fc(x)

        return x
    

The Inception style network from scratch.

In [4]:
class InceptionBlock(nn.Module):

    def __init__(self, in_channels, ch_1x1, ch_3x3_1, ch_3x3_2, ch_5x5_1, ch_5x5_2, ch_pool):
        super().__init__()

        self.p_1x1 = ConvBlock(in_channels, ch_1x1, kernel_size=1)

        self.p_3x3_1 = ConvBlock(in_channels, ch_3x3_1, kernel_size=1)
        self.p_3x3_2 = ConvBlock(ch_3x3_1, ch_3x3_2, kernel_size=3, padding=1)

        self.p_5x5_1 = ConvBlock(in_channels, ch_5x5_1, kernel_size=1)
        self.p_5x5_2 = ConvBlock(ch_5x5_1, ch_5x5_2, kernel_size=5, padding=2)

        self.p_pool_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
        self.p_pool_2 = ConvBlock(in_channels, ch_pool, kernel_size=1)

    def forward(self, x):
        x1 = self.p_1x1(x)

        x2 = self.p_3x3_1(x)
        x2 = self.p_3x3_2(x2)

        x3 = self.p_5x5_1(x)
        x3 = self.p_5x5_2(x3)

        x4 = self.p_pool_1(x)
        x4 = self.p_pool_2(x4)

        x = torch.cat([x1, x2, x3, x4], 1)
        return x


class InceptionStyle(nn.Module):

    def __init__(self, in_channels):
        super().__init__()

        self.layer0 = nn.Sequential(
            ConvBlock(in_channels, 64, kernel_size=7, stride=2, padding=3),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            ConvBlock(64, 192, kernel_size=3, stride=1, padding=1),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
        )

        self.layer1 = nn.Sequential(
            InceptionBlock(192, 64, 96, 128, 16, 32, 32),
            InceptionBlock(256, 128, 128, 192, 32, 96, 64)
        )

        self.gpool = torch.nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(480, 29)

    def forward(self, x):
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.gpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x


In [5]:
resNetStyle = ResNetStyle(3).to(device)
criterion_r = nn.CrossEntropyLoss()
optimizer_r = optim.Adam(resNetStyle.parameters(), lr=0.01, weight_decay=0.001)
scheduler_r = torch.optim.lr_scheduler.MultiStepLR(optimizer_r, milestones=[4,10,15], gamma=0.1)

summary(resNetStyle,(3, 200, 200))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 100, 100]           9,472
         MaxPool2d-2           [-1, 64, 50, 50]               0
       BatchNorm2d-3           [-1, 64, 50, 50]             128
              ReLU-4           [-1, 64, 50, 50]               0
            Conv2d-5          [-1, 128, 25, 25]           8,320
       BatchNorm2d-6          [-1, 128, 25, 25]             256
            Conv2d-7          [-1, 128, 25, 25]          73,856
       BatchNorm2d-8          [-1, 128, 25, 25]             256
            Conv2d-9          [-1, 128, 25, 25]         147,584
      BatchNorm2d-10          [-1, 128, 25, 25]             256
         ResBlock-11          [-1, 128, 25, 25]               0
           Conv2d-12          [-1, 256, 13, 13]          33,024
      BatchNorm2d-13          [-1, 256, 13, 13]             512
           Conv2d-14          [-1, 256,

In [6]:
# define the train and test methods
def train(model, data_loader, criterion, optimizer, scheduler):
    
    model.train()
    running_loss = 0.0

    for i, data in enumerate(data_loader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    
    scheduler.step()
    
    
    return running_loss / len(data_loader)


def test(model, dataloader):
    correct = 0
    total = 0
    model.eval()
    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in dataloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = model(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    return correct/total

In [7]:
no_transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
)
rotation = transforms.Compose(
    [transforms.ToTensor(),
     transforms.RandomRotation((-20, 20)),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
)
grayscale = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Grayscale(),
     transforms.Normalize(0.5,0.5)]
)

trainset = ASLDataset('train',transform=no_transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=256,
                                          shuffle=True, num_workers=2)

testset = ASLDataset('test',transform=no_transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64,
                                         shuffle=False, num_workers=2)

Train the baseline model without preprocessing.

In [8]:
baselineModel = BaselineModel(3).to(device)
criterion_b = nn.CrossEntropyLoss()
optimizer_b = optim.Adam(baselineModel.parameters(), lr=0.01, weight_decay=0.001)
scheduler_b = torch.optim.lr_scheduler.MultiStepLR(optimizer_b, milestones=[4,10,15], gamma=0.1)


summary(baselineModel,(3, 200, 200))

for epoch in range(20):  # loop over the dataset multiple times

    start_time = time.time()
    train_loss = train(baselineModel, trainloader, criterion_b, optimizer_b, scheduler_b)
    test_acc = test(baselineModel, testloader)
        
    print(f'Epoch: {epoch+1}, time used: {time.time()-start_time:.2f}s, training loss: {train_loss:.6f}, test accuracy:{test_acc:.6f}')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 100, 100]           9,472
         MaxPool2d-2           [-1, 64, 50, 50]               0
       BatchNorm2d-3           [-1, 64, 50, 50]             128
              ReLU-4           [-1, 64, 50, 50]               0
            Conv2d-5          [-1, 128, 24, 24]          73,856
       BatchNorm2d-6          [-1, 128, 24, 24]             256
              ReLU-7          [-1, 128, 24, 24]               0
         ConvBlock-8          [-1, 128, 24, 24]               0
            Conv2d-9          [-1, 128, 11, 11]         147,584
      BatchNorm2d-10          [-1, 128, 11, 11]             256
             ReLU-11          [-1, 128, 11, 11]               0
        ConvBlock-12          [-1, 128, 11, 11]               0
           Conv2d-13            [-1, 256, 5, 5]         295,168
      BatchNorm2d-14            [-1, 25

In [9]:
torch.save(baselineModel.state_dict(), f'./trained_models/baselineModel_model.pth')

The baseline model using grayscale preprocessing.

In [10]:
trainset = ASLDataset('train',transform=grayscale)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=256,
                                          shuffle=True, num_workers=2)

testset = ASLDataset('test',transform=grayscale)
testloader = torch.utils.data.DataLoader(testset, batch_size=64,
                                         shuffle=False, num_workers=2)

baselineModel = BaselineModel(1).to(device)
criterion_b = nn.CrossEntropyLoss()
optimizer_b = optim.Adam(baselineModel.parameters(), lr=0.01, weight_decay=0.001)
scheduler_b = torch.optim.lr_scheduler.MultiStepLR(optimizer_b, milestones=[4,10,15], gamma=0.1)


summary(baselineModel,(1, 200, 200))

for epoch in range(20):  # loop over the dataset multiple times

    start_time = time.time()
    train_loss = train(baselineModel, trainloader, criterion_b, optimizer_b, scheduler_b)
    test_acc = test(baselineModel, testloader)
        
    print(f'Epoch: {epoch+1}, time used: {time.time()-start_time:.2f}s, training loss: {train_loss:.6f}, test accuracy:{test_acc:.6f}')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 100, 100]           3,200
         MaxPool2d-2           [-1, 64, 50, 50]               0
       BatchNorm2d-3           [-1, 64, 50, 50]             128
              ReLU-4           [-1, 64, 50, 50]               0
            Conv2d-5          [-1, 128, 24, 24]          73,856
       BatchNorm2d-6          [-1, 128, 24, 24]             256
              ReLU-7          [-1, 128, 24, 24]               0
         ConvBlock-8          [-1, 128, 24, 24]               0
            Conv2d-9          [-1, 128, 11, 11]         147,584
      BatchNorm2d-10          [-1, 128, 11, 11]             256
             ReLU-11          [-1, 128, 11, 11]               0
        ConvBlock-12          [-1, 128, 11, 11]               0
           Conv2d-13            [-1, 256, 5, 5]         295,168
      BatchNorm2d-14            [-1, 25

The baseline model using rotation preprocessing.

In [11]:
trainset = ASLDataset('train',transform=rotation)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=256,
                                          shuffle=True, num_workers=2)

testset = ASLDataset('test',transform=no_transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64,
                                         shuffle=False, num_workers=2)

baselineModel = BaselineModel(3).to(device)
criterion_b = nn.CrossEntropyLoss()
optimizer_b = optim.Adam(baselineModel.parameters(), lr=0.01, weight_decay=0.001)
scheduler_b = torch.optim.lr_scheduler.MultiStepLR(optimizer_b, milestones=[4,10,15], gamma=0.1)


summary(baselineModel,(3, 200, 200))

for epoch in range(20):  # loop over the dataset multiple times

    start_time = time.time()
    train_loss = train(baselineModel, trainloader, criterion_b, optimizer_b, scheduler_b)
    test_acc = test(baselineModel, testloader)
        
    print(f'Epoch: {epoch+1}, time used: {time.time()-start_time:.2f}s, training loss: {train_loss:.6f}, test accuracy:{test_acc:.6f}')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 100, 100]           9,472
         MaxPool2d-2           [-1, 64, 50, 50]               0
       BatchNorm2d-3           [-1, 64, 50, 50]             128
              ReLU-4           [-1, 64, 50, 50]               0
            Conv2d-5          [-1, 128, 24, 24]          73,856
       BatchNorm2d-6          [-1, 128, 24, 24]             256
              ReLU-7          [-1, 128, 24, 24]               0
         ConvBlock-8          [-1, 128, 24, 24]               0
            Conv2d-9          [-1, 128, 11, 11]         147,584
      BatchNorm2d-10          [-1, 128, 11, 11]             256
             ReLU-11          [-1, 128, 11, 11]               0
        ConvBlock-12          [-1, 128, 11, 11]               0
           Conv2d-13            [-1, 256, 5, 5]         295,168
      BatchNorm2d-14            [-1, 25

Resnet style CNN without preprocessing.

In [12]:
trainset = ASLDataset('train',transform=no_transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=256,
                                          shuffle=True, num_workers=1)

testset = ASLDataset('test',transform=no_transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64,
                                         shuffle=False, num_workers=1)

resNetStyle = ResNetStyle(3).to(device)
criterion_r = nn.CrossEntropyLoss()
optimizer_r = optim.Adam(resNetStyle.parameters(), lr=0.01, weight_decay=0.001)
scheduler_r = torch.optim.lr_scheduler.MultiStepLR(optimizer_r, milestones=[4,10,15], gamma=0.1)


summary(resNetStyle,(3, 200, 200))

for epoch in range(20):  # loop over the dataset multiple times

    start_time = time.time()
    train_loss = train(resNetStyle, trainloader, criterion_r, optimizer_r, scheduler_r)
    test_acc = test(resNetStyle, testloader)
        
    print(f'Epoch: {epoch+1}, time used: {time.time()-start_time:.2f}s, training loss: {train_loss:.6f}, test accuracy:{test_acc:.6f}')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 100, 100]           9,472
         MaxPool2d-2           [-1, 64, 50, 50]               0
       BatchNorm2d-3           [-1, 64, 50, 50]             128
              ReLU-4           [-1, 64, 50, 50]               0
            Conv2d-5          [-1, 128, 25, 25]           8,320
       BatchNorm2d-6          [-1, 128, 25, 25]             256
            Conv2d-7          [-1, 128, 25, 25]          73,856
       BatchNorm2d-8          [-1, 128, 25, 25]             256
            Conv2d-9          [-1, 128, 25, 25]         147,584
      BatchNorm2d-10          [-1, 128, 25, 25]             256
         ResBlock-11          [-1, 128, 25, 25]               0
           Conv2d-12          [-1, 256, 13, 13]          33,024
      BatchNorm2d-13          [-1, 256, 13, 13]             512
           Conv2d-14          [-1, 256,

Resnet style CNN without grayscale images.

In [13]:

trainset = ASLDataset('train',transform=grayscale)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=256,
                                          shuffle=True, num_workers=1)

testset = ASLDataset('test',transform=grayscale)
testloader = torch.utils.data.DataLoader(testset, batch_size=64,
                                         shuffle=False, num_workers=1)

resNetStyle = ResNetStyle(1).to(device)
criterion_r = nn.CrossEntropyLoss()
optimizer_r = optim.Adam(resNetStyle.parameters(), lr=0.01, weight_decay=0.001)
scheduler_r = torch.optim.lr_scheduler.MultiStepLR(optimizer_r, milestones=[4,10,15], gamma=0.1)


summary(resNetStyle,(1, 200, 200))

for epoch in range(20):  # loop over the dataset multiple times

    start_time = time.time()
    train_loss = train(resNetStyle, trainloader, criterion_r, optimizer_r, scheduler_r)
    test_acc = test(resNetStyle, testloader)
        
    print(f'Epoch: {epoch+1}, time used: {time.time()-start_time:.2f}s, training loss: {train_loss:.6f}, test accuracy:{test_acc:.6f}')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 100, 100]           3,200
         MaxPool2d-2           [-1, 64, 50, 50]               0
       BatchNorm2d-3           [-1, 64, 50, 50]             128
              ReLU-4           [-1, 64, 50, 50]               0
            Conv2d-5          [-1, 128, 25, 25]           8,320
       BatchNorm2d-6          [-1, 128, 25, 25]             256
            Conv2d-7          [-1, 128, 25, 25]          73,856
       BatchNorm2d-8          [-1, 128, 25, 25]             256
            Conv2d-9          [-1, 128, 25, 25]         147,584
      BatchNorm2d-10          [-1, 128, 25, 25]             256
         ResBlock-11          [-1, 128, 25, 25]               0
           Conv2d-12          [-1, 256, 13, 13]          33,024
      BatchNorm2d-13          [-1, 256, 13, 13]             512
           Conv2d-14          [-1, 256,

Inception style CNN without preprocessing.

In [14]:
trainset = ASLDataset('train',transform=no_transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=256,
                                          shuffle=True, num_workers=2)

testset = ASLDataset('test',transform=no_transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64,
                                         shuffle=False, num_workers=2)
inceptionStyle = InceptionStyle(3).to(device)
criterion_i = nn.CrossEntropyLoss()
optimizer_i = optim.Adam(inceptionStyle.parameters(), lr=0.01, weight_decay=0.001)
scheduler_i = torch.optim.lr_scheduler.MultiStepLR(optimizer_i, milestones=[4,10,15], gamma=0.1)


summary(inceptionStyle,(3, 200, 200))

for epoch in range(20):  # loop over the dataset multiple times

    start_time = time.time()
    train_loss = train(inceptionStyle, trainloader, criterion_i, optimizer_i, scheduler_i)
    test_acc = test(inceptionStyle, testloader)
        
    print(f'Epoch: {epoch+1}, time used: {time.time()-start_time:.2f}s, training loss: {train_loss:.6f}, test accuracy:{test_acc:.6f}')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 100, 100]           9,472
       BatchNorm2d-2         [-1, 64, 100, 100]             128
              ReLU-3         [-1, 64, 100, 100]               0
         ConvBlock-4         [-1, 64, 100, 100]               0
         MaxPool2d-5           [-1, 64, 50, 50]               0
            Conv2d-6          [-1, 192, 50, 50]         110,784
       BatchNorm2d-7          [-1, 192, 50, 50]             384
              ReLU-8          [-1, 192, 50, 50]               0
         ConvBlock-9          [-1, 192, 50, 50]               0
        MaxPool2d-10          [-1, 192, 25, 25]               0
           Conv2d-11           [-1, 64, 25, 25]          12,352
      BatchNorm2d-12           [-1, 64, 25, 25]             128
             ReLU-13           [-1, 64, 25, 25]               0
        ConvBlock-14           [-1, 64,

Inception style CNN with grayscale images.

In [15]:
trainset = ASLDataset('train',transform=grayscale)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=256,
                                          shuffle=True, num_workers=2)

testset = ASLDataset('test',transform=grayscale)
testloader = torch.utils.data.DataLoader(testset, batch_size=64,
                                         shuffle=False, num_workers=2)


inceptionStyle = InceptionStyle(1).to(device)
criterion_i = nn.CrossEntropyLoss()
optimizer_i = optim.Adam(inceptionStyle.parameters(), lr=0.01, weight_decay=0.001)
scheduler_i = torch.optim.lr_scheduler.MultiStepLR(optimizer_i, milestones=[4,10,15], gamma=0.1)


summary(inceptionStyle,(1, 200, 200))

for epoch in range(20):  # loop over the dataset multiple times

    start_time = time.time()
    train_loss = train(inceptionStyle, trainloader, criterion_i, optimizer_i, scheduler_i)
    test_acc = test(inceptionStyle, testloader)
        
    print(f'Epoch: {epoch+1}, time used: {time.time()-start_time:.2f}s, training loss: {train_loss:.6f}, test accuracy:{test_acc:.6f}')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 100, 100]           3,200
       BatchNorm2d-2         [-1, 64, 100, 100]             128
              ReLU-3         [-1, 64, 100, 100]               0
         ConvBlock-4         [-1, 64, 100, 100]               0
         MaxPool2d-5           [-1, 64, 50, 50]               0
            Conv2d-6          [-1, 192, 50, 50]         110,784
       BatchNorm2d-7          [-1, 192, 50, 50]             384
              ReLU-8          [-1, 192, 50, 50]               0
         ConvBlock-9          [-1, 192, 50, 50]               0
        MaxPool2d-10          [-1, 192, 25, 25]               0
           Conv2d-11           [-1, 64, 25, 25]          12,352
      BatchNorm2d-12           [-1, 64, 25, 25]             128
             ReLU-13           [-1, 64, 25, 25]               0
        ConvBlock-14           [-1, 64,

Transfer learning using the Resnet18.

In [18]:
transform = transforms.Compose([
    transforms.ToTensor(),
     transforms.Resize((224,224)),
    #  transforms.Gray(0.5, 0.5, 0.5),
     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]
)

trainset = ASLDataset('train',transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=256,
                                          shuffle=True, num_workers=2)

testset = ASLDataset('test',transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64,
                                         shuffle=False, num_workers=2)

In [19]:
resnet = models.resnet18(pretrained=True)
for param in resnet.parameters():
    param.requires_grad = False
for param in resnet.layer4.parameters():
    param.requires_grad = True
    

num_ftrs = resnet.fc.in_features
resnet.fc = nn.Linear(num_ftrs, 29)
resnet.to(device)
print(resnet)
summary(resnet, (3,224,224))

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [20]:
criterion_t_r = nn.CrossEntropyLoss()
optimizer_t_r = optim.Adam(filter(lambda p: p.requires_grad, resnet.parameters()), lr=0.01, weight_decay=0.001)
scheduler_t_r = torch.optim.lr_scheduler.MultiStepLR(optimizer_t_r, milestones=[4,10,15], gamma=0.1)

for epoch in range(20):  # loop over the dataset multiple times
    
    start_time = time.time()
    train_loss = train(resnet, trainloader, criterion_t_r, optimizer_t_r, scheduler_t_r)
    test_acc = test(resnet, testloader)
    
    print(f'Epoch: {epoch+1}, time used: {time.time()-start_time:.2f}s, training loss: {train_loss:.6f}, test accuracy:{test_acc:.6f}')

Epoch: 1, time used: 51.11s, training loss: 0.230679, test accuracy:0.674011
Epoch: 2, time used: 51.14s, training loss: 0.059996, test accuracy:0.331277
Epoch: 3, time used: 51.34s, training loss: 0.051952, test accuracy:0.697477
Epoch: 4, time used: 51.47s, training loss: 0.045768, test accuracy:0.691969
Epoch: 5, time used: 51.51s, training loss: 0.011092, test accuracy:0.883331
Epoch: 6, time used: 51.49s, training loss: 0.008980, test accuracy:0.884764
Epoch: 7, time used: 51.49s, training loss: 0.010693, test accuracy:0.906467
Epoch: 8, time used: 51.69s, training loss: 0.010538, test accuracy:0.888840
Epoch: 9, time used: 51.74s, training loss: 0.012667, test accuracy:0.868018
Epoch: 10, time used: 51.65s, training loss: 0.012595, test accuracy:0.898535
Epoch: 11, time used: 51.54s, training loss: 0.006690, test accuracy:0.911535
Epoch: 12, time used: 51.49s, training loss: 0.005978, test accuracy:0.913187
Epoch: 13, time used: 51.49s, training loss: 0.006452, test accuracy:0.91

In [21]:
torch.save(resnet.state_dict(), f'./trained_models/resnet_raw.pth')

Transfer learning using the InceptionV3.

In [22]:
inception = models.inception_v3(pretrained=True)

for param in inception.parameters():
    param.requires_grad = False
for param in inception.Mixed_7c.parameters():
    param.requires_grad = True
    
num_ftrs = inception.fc.in_features
inception.fc = nn.Linear(num_ftrs, 29)
inception.to(device)
print(inception)
summary(inception, (3,299,299))

Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to /home/sam/.cache/torch/hub/checkpoints/inception_v3_google-0cc3c7bd.pth
100%|██████████| 104M/104M [00:01<00:00, 97.8MB/s] 


Inception3(
  (Conv2d_1a_3x3): BasicConv2d(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2a_3x3): BasicConv2d(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2b_3x3): BasicConv2d(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (Conv2d_3b_1x1): BasicConv2d(
    (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_4a_3x3): BasicConv2d(
    (conv): Conv2d(80, 192, kernel_size=(3, 3), stri

In [23]:
def train_inc(model, data_loader, criterion, optimizer, scheduler):
    
    model.train()
    running_loss = 0.0

    for i, data in enumerate(data_loader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()
        outputs, aux_output = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    
    scheduler.step()
    
    
    return running_loss / len(data_loader)


def test_inc(model, dataloader):
    correct = 0
    total = 0
    model.eval()
    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in dataloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs, aux_output = model(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    return correct/total

In [24]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Resize((299,299)),
     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]
)

trainset = ASLDataset('train',transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=256,
                                          shuffle=True, num_workers=2)

testset = ASLDataset('test',transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64,
                                         shuffle=False, num_workers=2)

criterion_t_i = nn.CrossEntropyLoss()
optimizer_t_i = optim.Adam(filter(lambda p: p.requires_grad, inception.parameters()), lr=0.01, weight_decay=0.001)
scheduler_t_i = torch.optim.lr_scheduler.MultiStepLR(optimizer_t_i, milestones=[4,10,15], gamma=0.1)


for epoch in range(20):  # loop over the dataset multiple times
    
    start_time = time.time()
    train_loss = train_inc(inception, trainloader, criterion_t_i, optimizer_t_i, scheduler_t_i)
    test_acc = test(inception, testloader)
    
    print(f'Epoch: {epoch+1}, time used: {time.time()-start_time:.2f}s, training loss: {train_loss:.6f}, test accuracy:{test_acc:.6f}')

Epoch: 1, time used: 160.01s, training loss: 0.284869, test accuracy:0.723697
Epoch: 2, time used: 159.88s, training loss: 0.118920, test accuracy:0.767434
Epoch: 3, time used: 159.91s, training loss: 0.105916, test accuracy:0.777349
Epoch: 4, time used: 159.89s, training loss: 0.105231, test accuracy:0.752231
Epoch: 5, time used: 159.90s, training loss: 0.038416, test accuracy:0.807976
Epoch: 6, time used: 159.90s, training loss: 0.030549, test accuracy:0.809519
Epoch: 7, time used: 159.94s, training loss: 0.030390, test accuracy:0.797400
Epoch: 8, time used: 159.90s, training loss: 0.028855, test accuracy:0.797841
Epoch: 9, time used: 159.89s, training loss: 0.030302, test accuracy:0.799934
Epoch: 10, time used: 159.95s, training loss: 0.028971, test accuracy:0.794536
Epoch: 11, time used: 159.96s, training loss: 0.021142, test accuracy:0.809739
Epoch: 12, time used: 159.94s, training loss: 0.018986, test accuracy:0.808417
Epoch: 13, time used: 159.93s, training loss: 0.019355, test 

KeyboardInterrupt: 

In [None]:
torch.save(inception.state_dict(), f'./trained_models/inception_raw.pth')

Transfer learning using the efficientnet.

In [None]:
efficientnet = models.efficientnet_b0(pretrained=True)

for param in efficientnet.parameters():
    param.requires_grad = False
for param in efficientnet.features[8].parameters():
    param.requires_grad = True
    
num_ftrs = efficientnet.classifier[1].in_features
efficientnet.classifier[1] = nn.Linear(num_ftrs, 29)
efficientnet.to(device)
print(efficientnet)
summary(efficientnet, (3,224,224))

EfficientNet(
  (features): Sequential(
    (0): ConvNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): ConvNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): ConvNormActivation(
 

In [None]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Resize((224,224)),
     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]
)

trainset = ASLDataset('train',transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=256,
                                          shuffle=True, num_workers=2)

testset = ASLDataset('test',transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64,
                                         shuffle=False, num_workers=2)

criterion_t_e = nn.CrossEntropyLoss()
optimizer_t_e = optim.Adam(filter(lambda p: p.requires_grad, efficientnet.parameters()), lr=0.01, weight_decay=0.001)
scheduler_t_e = torch.optim.lr_scheduler.MultiStepLR(optimizer_t_e, milestones=[4,10,15], gamma=0.1)


for epoch in range(20):  # loop over the dataset multiple times
    
    start_time = time.time()
    train_loss = train(efficientnet, trainloader, criterion_t_e, optimizer_t_e, scheduler_t_e)
    test_acc = test(efficientnet, testloader)
    
    print(f'Epoch: {epoch+1}, time used: {time.time()-start_time:.2f}s, training loss: {train_loss:.6f}, test accuracy:{test_acc:.6f}')

Epoch: 1, time used: 99.16s, training loss: 0.281941, test accuracy:0.730178
Epoch: 2, time used: 100.14s, training loss: 0.319053, test accuracy:0.618855
Epoch: 3, time used: 100.98s, training loss: 0.324888, test accuracy:0.726814
Epoch: 4, time used: 100.56s, training loss: 0.301594, test accuracy:0.686903
Epoch: 5, time used: 101.39s, training loss: 0.186348, test accuracy:0.792262
Epoch: 6, time used: 99.51s, training loss: 0.172263, test accuracy:0.801285
Epoch: 7, time used: 96.54s, training loss: 0.176274, test accuracy:0.789357
Epoch: 8, time used: 102.72s, training loss: 0.174101, test accuracy:0.800061
Epoch: 9, time used: 101.41s, training loss: 0.175362, test accuracy:0.760226
Epoch: 10, time used: 108.29s, training loss: 0.175885, test accuracy:0.778806
Epoch: 11, time used: 112.52s, training loss: 0.154258, test accuracy:0.811989
Epoch: 12, time used: 112.65s, training loss: 0.147368, test accuracy:0.810230
Epoch: 13, time used: 99.96s, training loss: 0.145420, test accu

Transfer learning using the mobilenet_v3_small.

In [None]:
mobilenet_v3_small = models.mobilenet_v3_small(pretrained=False)
mobilenet_v3_small.load_state_dict(torch.load('./trained_models/mobilenet_v3_small-047dcff4.pth'))

for param in mobilenet_v3_small.features.parameters():
    param.requires_grad = False
for param in mobilenet_v3_small.features[12].parameters():
    param.requires_grad = True
    
num_ftrs = mobilenet_v3_small.classifier[3].in_features
mobilenet_v3_small.classifier[3] = nn.Linear(num_ftrs, 29)
mobilenet_v3_small.to(device)
print(mobilenet_v3_small)
summary(mobilenet_v3_small, (3,224,224))


MobileNetV3(
  (features): Sequential(
    (0): ConvNormActivation(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): Hardswish()
    )
    (1): InvertedResidual(
      (block): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): SqueezeExcitation(
          (avgpool): AdaptiveAvgPool2d(output_size=1)
          (fc1): Conv2d(16, 8, kernel_size=(1, 1), stride=(1, 1))
          (fc2): Conv2d(8, 16, kernel_size=(1, 1), stride=(1, 1))
          (activation): ReLU()
          (scale_activation): Hardsigmoid()
        )
        (2): ConvNormActivation(
          (0): Conv2d(16, 16, kernel_size=(1, 1), stride

In [None]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Resize((224,224)),
     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]
)

trainset = ASLDataset('train',transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=256,
                                          shuffle=True, num_workers=2)

testset = ASLDataset('test',transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64,
                                         shuffle=False, num_workers=2)

criterion_t_m = nn.CrossEntropyLoss()
optimizer_t_m = optim.Adam(filter(lambda p: p.requires_grad, mobilenet_v3_small.parameters()), lr=0.01, weight_decay=0.001)
scheduler_t_m = torch.optim.lr_scheduler.MultiStepLR(optimizer_t_m, milestones=[4,10,15], gamma=0.1)


for epoch in range(20):  # loop over the dataset multiple times
    
    start_time = time.time()
    train_loss = train(mobilenet_v3_small, trainloader, criterion_t_m, optimizer_t_m, scheduler_t_m)
    test_acc = test(mobilenet_v3_small, testloader)
    
    print(f'Epoch: {epoch+1}, time used: {time.time()-start_time:.2f}s, training loss: {train_loss:.6f}, test accuracy:{test_acc:.6f}')

Epoch: 1, time used: 67.69s, training loss: 0.319554, test accuracy:0.547978
Epoch: 2, time used: 67.01s, training loss: 0.234912, test accuracy:0.506614
Epoch: 3, time used: 67.16s, training loss: 0.248657, test accuracy:0.506461
Epoch: 4, time used: 65.89s, training loss: 0.230863, test accuracy:0.554782
Epoch: 5, time used: 67.30s, training loss: 0.146054, test accuracy:0.627265
Epoch: 6, time used: 67.23s, training loss: 0.138161, test accuracy:0.631470
Epoch: 7, time used: 67.33s, training loss: 0.135942, test accuracy:0.606315
Epoch: 8, time used: 66.02s, training loss: 0.135535, test accuracy:0.598593
Epoch: 9, time used: 67.25s, training loss: 0.136576, test accuracy:0.630247
Epoch: 10, time used: 66.86s, training loss: 0.134169, test accuracy:0.619008
Epoch: 11, time used: 67.12s, training loss: 0.120969, test accuracy:0.641486
Epoch: 12, time used: 66.80s, training loss: 0.117740, test accuracy:0.636058
Epoch: 13, time used: 66.02s, training loss: 0.118150, test accuracy:0.63