In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import string
import os
import cv2
from glob import glob

import matplotlib.pyplot as plt
%matplotlib inline

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchsummary import summary
from torch.autograd import Variable
import torchvision.transforms as transforms

In [2]:
train_data_raw = np.array(glob('asl-alphabet/asl_alphabet_train/asl_alphabet_train/*/*'))
test_data_raw = np.array(glob('asl-alphabet/asl_alphabet_test/asl_alphabet_test/*'))

In [3]:
print('There are %d total train images.' % len(train_data_raw))
print('There are %d total test images.' % len(test_data_raw)) #No test image for delete

There are 87000 total train images.
There are 28 total test images.


In [4]:
from PIL import Image
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

from torchvision import datasets
from torch.utils.data.sampler import SubsetRandomSampler

In [5]:
train_transform = transforms.Compose([ transforms.Grayscale(num_output_channels=1),
                                transforms.Resize(size=(50,50)),
                                transforms.ToTensor(),
                                transforms.Normalize([0.5], [0.5])])
# valid_transform = transforms.Compose([transforms.Resize(256),
#                                     transforms.CenterCrop(224),
#                                     transforms.ToTensor(),
#                                     transforms.Normalize(mean=[0.485, 0.456, 0.406], 
#                                                         std=[0.229, 0.224, 0.225])])
test_transform = transforms.Compose([ transforms.Grayscale(num_output_channels=1),
                                    transforms.Resize(size=(50,50)),
                                    transforms.ToTensor(),
                                    transforms.Normalize([0.5], [0.5])])

train_data = datasets.ImageFolder(root = 'asl-alphabet/asl_alphabet_train/asl_alphabet_train', transform=train_transform)
# valid_data = datasets.ImageFolder(root = 'dogImages/valid', transform=valid_transform)
test_data = datasets.ImageFolder(root='asl-alphabet/asl_alphabet_test', transform=test_transform)

# print number of images in each dataset
print('There are %d total train images.' % len(train_data))
# print('There are %d total dog validation images.' % len(valid_data))
print('There are %d total test images.' % len(test_data))


trainloader = torch.utils.data.DataLoader(train_data, batch_size=20,shuffle=True)
# validloader = torch.utils.data.DataLoader(valid_data, batch_size=20,shuffle=False)
testloader = torch.utils.data.DataLoader(test_data,batch_size=20, shuffle=False)


loaders = dict(train=trainloader,
#                        valid = validloader,
                       test=testloader)

There are 87000 total train images.
There are 28 total test images.


In [6]:
dim=50

In [7]:
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.conv1 = nn.Conv2d(1,10,3)
        self.conv2 = nn.Conv2d(10,20,3)
        self.conv3 = nn.Conv2d(20,30,3)
        
        self.pool = nn.MaxPool2d(2)
        self.dropout = nn.Dropout2d(0.2)
        
        self.fc1 = nn.Linear(2430, 270)
        self.fc2 = nn.Linear(270,29)
        
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self,x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool(x)
        
        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool(x)
        
        x = self.conv3(x)
        x = F.relu(x)
        x = self.dropout(x)
        
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = self.softmax(F.relu(self.fc2(x)))
        return(x)
    
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [8]:
use_cuda = torch.cuda.is_available()

# move model to GPU if CUDA is available
if use_cuda:
    model = Network().cuda()
else:
    model = Network()

In [9]:
summary(model, (1,dim,dim)) #takes the model and the input tensor shape, displays the output shape

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 10, 48, 48]             100
         MaxPool2d-2           [-1, 10, 24, 24]               0
            Conv2d-3           [-1, 20, 22, 22]           1,820
         MaxPool2d-4           [-1, 20, 11, 11]               0
            Conv2d-5             [-1, 30, 9, 9]           5,430
         Dropout2d-6             [-1, 30, 9, 9]               0
            Linear-7                  [-1, 270]         656,370
            Linear-8                   [-1, 29]           7,859
        LogSoftmax-9                   [-1, 29]               0
Total params: 671,579
Trainable params: 671,579
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.35
Params size (MB): 2.56
Estimated Total Size (MB): 2.92
-------------------------------------------

In [10]:
epochs = 50
learning_rate = 0.001

In [11]:
optimizer = optim.SGD(model.parameters(), learning_rate, momentum=0.007)
criterion = nn.CrossEntropyLoss()

In [12]:
def train(n_epochs, loaders, model, optimizer, criterion, use_cuda):
    """returns trained model"""
    # initialize tracker for minimum validation loss
    valid_loss_min = np.Inf 
    
    for epoch in range(1, n_epochs+1):
        # initialize variables to monitor training and validation loss
        train_loss = 0.0
        valid_loss = 0.0
        
        ###################
        # train the model #
        ###################
        model.train()
        for batch_idx, (data, target) in enumerate(loaders['train']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            ## find the loss and update the model parameters accordingly
            ## record the average training loss, using something like
            ## train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output,target)
            loss.backward()
            optimizer.step()
            
            train_loss = train_loss + ((1/(batch_idx +1))*(loss.data - train_loss))
            
            if batch_idx % 1000 == 0:
                print('Epoch %d, Batch %d loss: %.6f' %(epoch, batch_idx + 1, train_loss))
            
#             print('Epoch: {} \tTraining Loss: {:.6f} '.format(
#             epoch, 
#             train_loss,
#             ))
                
    # return trained model
    return model
            
        ######################    
        # validate the model #
        ######################
#         model.eval()
#         for batch_idx, (data, target) in enumerate(loaders['valid']):
#             # move to GPU
#             if use_cuda:
#                 data, target = data.cuda(), target.cuda()
#             ## update the average validation loss
#             output = model(data)
#             loss = criterion(output,target)
#             valid_loss = valid_loss + ((1 / (batch_idx + 1)) * (loss.data - valid_loss))


        # print training/validation statistics 
#         print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
#             epoch, 
#             train_loss,
#             valid_loss
#             ))
        
        ## TODO: save the model if validation loss has decreased
#         if valid_loss <= valid_loss_min:
#             print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
#             valid_loss_min,
#             valid_loss))
#             torch.save(model.state_dict(), 'model_scratch.pt')
#             valid_loss_min = valid_loss
            
    

In [None]:
# train the model
model_scratch = train(epochs, loaders, model, optimizer, criterion, use_cuda)

Epoch 1, Batch 1 loss: 3.361695
Epoch 1, Batch 1001 loss: 3.367375


In [None]:
plt.figure(figsize=(10,8))
plt.plot(loss_log[2:])
plt.plot(acc_log)
plt.plot(np.ones(len(acc_log)), linestyle='dashed')
plt.show()

In [None]:
predictions = model(Variable(test_x))
accuracy, correct, total = model.test(torch.max(predictions.data, 1)[1], test_y)

In [None]:
print("Accuracy = " + str(accuracy) + " ("+str(correct)+"/"+str(total)+")")

In [None]:
def predict(img_path):
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img = cv2.resize(img, (dim,dim))
    plt.imshow(img, cmap="gray")
    img = img.reshape(1,1, dim, dim)
    np_img = np.array(img)
    input_img = torch.FloatTensor(np_img)
    
    if use_cuda:
        input_img = input_img.cuda()
        
    pred = model(Variable(input_img))
    pred = torch.max(pred.data, 1)[1].cpu().numpy()[0]
    return pred

In [None]:
prediction = predict('./c.jpg')
lab = 'c'

In [None]:
print("Prediction: {}".format(alph[prediction]))
print("Actual Label: {}".format(lab))

In [None]:
# pixels = cv2.imread('./c.jpg').reshape(28, 28)
# plt.subplot(223)
# sns.heatmap(data=pixels)
# lab = 'c'
# test_sample = torch.FloatTensor([pixels.reshape(1, 28, 28).tolist()])
# pred = model(Variable(input_img))
# print("Prediction: {}".format(alph[torch.max(net_out_sample.data, 1)[1].numpy()[0]]))
# print("Actual Label: {}".format(lab))
