In [1]:
import torch, torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np

import time
import matplotlib.pyplot as plt
import os
import glob
import random

from PIL import Image
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import datasets, models, transforms
from torchvision.transforms.transforms import RandomPerspective
from torchsummary import summary

In [2]:
# Image transformations

image_transforms = {
    # transforming 'train' images
    'train' : transforms.Compose([
        transforms.Resize(size=256),
        #transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),
        transforms.GaussianBlur(kernel_size=(3, 13), sigma=(0.1, 0.2)),
        transforms.RandomRotation(degrees=15),
        transforms.RandomHorizontalFlip(),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225]),
    ]),
    # transforming 'test' images
    'test' : transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225]),
    ])
}

In [3]:
# set dataset path from local folder
dataset = 'C:/KPT_Project/dataset'

# get images from folders
train_dir = os.path.join(dataset, 'train')
test_dir = os.path.join(dataset, 'validation_2')

# set batch size
batch_size = 64

# set classes from dataset folder
num_class = len(os.listdir(train_dir))
print(num_class)

# transforms dataset
data = {
    'train' : datasets.ImageFolder(root=train_dir, transform=image_transforms['train']),
    'test' : datasets.ImageFolder(root=test_dir, transform=image_transforms['test'])
}

26


In [4]:
# loader
trainloader = DataLoader(dataset=data['train'], 
                         batch_size=batch_size, 
                         num_workers = 0,
                         shuffle=True)

testloader = DataLoader(dataset=data['test'], 
                        batch_size=batch_size, 
                        num_workers = 0,
                        shuffle=False)  # not necessary to shuffle the test images

train_data_size = len(trainloader.dataset)
test_data_size = len(testloader.dataset)

# print(len(trainloader))
# print(len(testloader))

# checking for cuda usage
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
torch.cuda.is_available()

True

In [5]:
def train_and_validate(model, loss_criterion, optimizer, epochs=25):
    
    start = time.time()
    history = []
    best_acc = 0

    for epoch in range(epochs):
        epoch_start = time.time()
        print("Epoch: {}/{}".format(epoch+1, epochs))

        model.train()

        train_loss = 0.0
        train_acc = 0.0
        
        valid_loss = 0.0
        valid_acc = 0.0
        
        for i, (inputs, labels) in enumerate(trainloader):

            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = loss_criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * inputs.size(0)
            ret, predictions = torch.max(outputs.data, 1)
            correct_counts = predictions.eq(labels.data.view_as(predictions))
            acc = torch.mean(correct_counts.type(torch.FloatTensor))
            train_acc += acc.item() * inputs.size(0)

        with torch.no_grad():

            model.eval()

            for j, (inputs, labels) in enumerate(testloader):
                inputs = inputs.to(device)
                labels = labels.to(device)

                outputs = model(inputs)
                loss = loss_criterion(outputs, labels)
                valid_loss += loss.item() * inputs.size(0)
                ret, predictions = torch.max(outputs.data, 1)
                correct_counts = predictions.eq(labels.data.view_as(predictions))
                acc = torch.mean(correct_counts.type(torch.FloatTensor))
                valid_acc += acc.item() * inputs.size(0)

        avg_train_loss = train_loss/train_data_size 
        avg_train_acc = train_acc/train_data_size
        avg_test_loss = valid_loss/test_data_size 
        avg_test_acc = valid_acc/test_data_size ##

        history.append([avg_train_loss, avg_test_loss, avg_train_acc, avg_test_acc])
                
        epoch_end = time.time()
    
        print("Epoch : {:03d}, Training: Loss: {:.4f}, Accuracy: {:.4f}%, \n\t\tValidation : Loss : {:.4f}, Accuracy: {:.4f}%, Time: {:.4f}s".format(epoch, avg_train_loss, avg_train_acc*100, avg_test_loss, avg_test_acc*100, epoch_end-epoch_start))
        
        torch.save(model, 'ASL_model_'+str(epoch)+'.pt')
            
    return model, history

In [6]:
# Training model
model_ft = models.googlenet(pretrained=True)
num_ftrs = model_ft.fc.in_features

for module,param in zip(model_ft.modules(), model_ft.parameters()):
  if isinstance(module, nn.BatchNorm2d):
    param.requires_grad = False

model_ft.fc = nn.Sequential(nn.Linear(num_ftrs, 512),
            nn.ReLU(),
            nn.Dropout(0.25),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Dropout(0.15),
            nn.Linear(64, num_class))
 
model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()

optimizer_ft = optim.Adam(model_ft.parameters(), lr=0.001)



In [7]:
num_epochs = 40
trained_model, history = train_and_validate(model_ft, criterion, optimizer_ft, num_epochs)

Epoch: 1/40
Epoch : 000, Training: Loss: 1.8837, Accuracy: 41.9813%, 
		Validation : Loss : 2.1830, Accuracy: 55.0107%, Time: 361.6408s
Epoch: 2/40
Epoch : 001, Training: Loss: 0.4803, Accuracy: 84.4634%, 
		Validation : Loss : 0.9144, Accuracy: 78.1983%, Time: 327.6666s
Epoch: 3/40
Epoch : 002, Training: Loss: 0.2051, Accuracy: 94.3159%, 
		Validation : Loss : 1.4545, Accuracy: 74.6802%, Time: 324.6822s
Epoch: 4/40
Epoch : 003, Training: Loss: 0.1454, Accuracy: 96.1564%, 
		Validation : Loss : 1.4747, Accuracy: 76.7591%, Time: 323.4720s
Epoch: 5/40
Epoch : 004, Training: Loss: 0.1200, Accuracy: 96.6166%, 
		Validation : Loss : 1.1938, Accuracy: 77.0789%, Time: 325.0078s
Epoch: 6/40
Epoch : 005, Training: Loss: 0.0976, Accuracy: 97.4015%, 
		Validation : Loss : 0.9239, Accuracy: 81.3966%, Time: 319.5835s
Epoch: 7/40
Epoch : 006, Training: Loss: 0.0484, Accuracy: 98.7820%, 
		Validation : Loss : 1.3131, Accuracy: 80.2772%, Time: 254.1135s
Epoch: 8/40


KeyboardInterrupt: 

In [None]:
history = np.array(history)
plt.plot(history[:,0:2])
plt.legend(['Tr Loss', 'Val Loss'])
plt.xlabel('Epoch Number')
plt.ylabel('Loss')
plt.title('Loss vs Epoch')
plt.ylim(0,3)
plt.show()

In [None]:
correct_pred = {classname: 0 for classname in os.listdir(train_dir)}
total_pred = {classname: 0 for classname in os.listdir(train_dir)}

# again no gradients needed
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
#         images = images.view(images.size(0), -1)
        outputs = model_ft(images)
        _, predictions = torch.max(outputs, 1)
        # collect the correct predictions for each class
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[os.listdir(train_dir)[label]] += 1
            total_pred[os.listdir(train_dir)[label]] += 1


# print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')

In [None]:
# Confusion Matrix

from sklearn.metrics import confusion_matrix
import seaborn as sn
import pandas as pd

y_pred = []
y_true = []

model_ft.to('cpu')

# iterate over test data
for inputs, labels in testloader:
#         #reshape images so they can be fed to a nn.Linear()
#         inputs = inputs.view(inputs.size(0), -1)
        output = model_ft(inputs) # Feed Network

        output = (torch.max(torch.exp(output), 1)[1]).data.cpu().numpy()
        y_pred.extend(output) # Save Prediction
        
        labels = labels.data.cpu().numpy()
        y_true.extend(labels) # Save Truth

# constant for classes
classes = ('a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r',
          's','t','u','v','w','x','y','z')

# Build confusion matrix
cf_matrix = confusion_matrix(y_true, y_pred)
df_cm = pd.DataFrame(cf_matrix/np.sum(cf_matrix) *10, index = [i for i in classes],
                     columns = [i for i in classes])
plt.figure(figsize = (20,10))
sn.heatmap(df_cm, annot=True)

In [None]:
torch.save(model_ft, 'C:/KPT_Project/best_model.pth.tar')
torch.save(model_ft, 'C:/KPT_Project/best_model.pt')