# Final ASL Classifier Pipeline

This file will be the code implementation for the pipeline we are using to classify ASL letters.

We will first need to import the necessary libraries as well as load in the training data. Then, we will define the Convolutional Neural Network class which our model is built from. Next, we define the following functions which we call in the two functions:
- PreProcess: Takes in the training data/labels and the test data/labels. Outputs grayscale data in tensor form.
- TrainModel: Takes in a model to be trained along with values for the training batch size, the number of epochs we are training for, a choice of loss function, the initial learning rate, and finally the pre-processed training data and labels. The function then trains the input model using those parameters.
- EvaluteModel: Takes in the pre-processed test data and labels, loads a pre-trained model, then evaluates the model on the test set. Returns accuracy on the test set as well as the predicted class labels in a vector of letters.
- PostProcess: Takes in the predicted class labels encoded as integers, then returns the labels as letters

Next we define the two main functions, TrainFunction and TestFunction. These are pretty self-explanatory; the first trains a model and the second evalutes a pre-trained model. The only parameters passed into these two functions are data and labels, but they each call functions with other dependencies inside.

Finally we will call these two functions, passing in the appropriate datasets.

In [1]:
# Import required libraries

import numpy as np
import random
import matplotlib.pyplot as plt
import seaborn as sb
from tqdm import tqdm
import time
plt.style.use('seaborn')

from skimage.color import rgb2gray
from sklearn import preprocessing
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Using PyTorch methods on training data
import torch
import torchvision as tv
from torchvision import transforms as tv_tf

# Import neural net
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Suppress warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Load in training and testing data

# Load training data for train function
train_data = np.load('provided_train_data.npy')
train_labels = np.load('provided_train_labels.npy')

# Load training data for test function
test_data = np.load('provided_test_data.npy')
test_labels = np.load('provided_test_labels.npy')

In [2]:
# Define convolutional neural net class for grayscale images

class ConvNetGray(nn.Module):
    def __init__(self):
        super().__init__()
        # Convolutional layers
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3)

        # Need to pass some garbage data to determine shape of input to linear layer
        x = torch.randn(100,100).view(-1,1,100,100)
        self._to_linear = None
        self.convs(x)
        self.fc1 = nn.Linear(self._to_linear, 512)
        self.fc2 = nn.Linear(512, 9)

    def convs(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv3(x)), (2, 2))

        if self._to_linear is None:
            self._to_linear = x[0].shape[0]*x[0].shape[1]*x[0].shape[2]
        return x
        
    def forward(self, x):
        x = self.convs(x)
        x = x.view(-1, self._to_linear) 
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.softmax(x, dim=1)

In [3]:
# Preprocessing function:
# Takes in data and labels, returns transformed data and labels
# Transformations: Data will be converted to grayscale, labels will be encoded to be integers

def PreProcess(data, labels):
    # Convert training and test data to grayscale
    X_gray = rgb2gray(data)
    
    # Encode class labels
    LE = LabelEncoder()
    y_flat = labels.ravel()
    y_nums = LE.fit_transform(y_flat)
        
    # Convert to tensors
    X_tn = torch.Tensor(X_gray)
    y_tn = torch.Tensor(y_nums).type(torch.LongTensor)

    return X_tn, y_tn

In [4]:
# Function to train a model
# Parameters passed are an un-trained CNN model, Batch Size, Epochs, Loss Funtion, and Initial Learning Rate
# We will also pass in the data with which we want to train the model - used for comparing the training sets

def TrainModel(CNN, BatchSize, Epochs, Init_LR, X, y):
    
#     print("Training above CNN with Batches of", BatchSize, "over", Epochs, "epochs...")

    BATCH_SIZE = BatchSize
    EPOCHS = Epochs

    opt = optim.Adam(CNN.parameters(), lr=Init_LR)
    loss_function = nn.CrossEntropyLoss()

    for epoch in range(EPOCHS):
        for i in range(0, len(X), BATCH_SIZE): 

            batch_X = X[i:i+BATCH_SIZE].view(-1, 1, 100, 100)
            batch_y = y[i:i+BATCH_SIZE]

            CNN.zero_grad()
            outputs = CNN(batch_X)
        
            loss = loss_function(outputs, batch_y)
            loss.backward()
            opt.step()


In [5]:
# Model evaluation function: Loads a pre-trained model and evaluates it using pre-processed test data w/ labels, outputs accuracy and predicted labels

def EvaluateModel(model, X, y):

    correct = 0
    total = 0
    y_pred_nums = np.zeros(len(y))
    with torch.no_grad():
        for i in range(len(X)):
            output = model(X[i].view(-1, 1, 100, 100))
            y_true = y[i]
            y_pred_nums[i] = torch.argmax(output)
            if y_true == y_pred_nums[i]:
                correct += 1
            total += 1
    
    return round(correct/total, 3), y_pred_nums

In [6]:
# Post-processing function to convert the predicted labels from numbers to letters

def PostProcess(labels, nums):
    
    # First we need to re-establish the initial transformation of labels to numbers
    LE = LabelEncoder()
    y_flat = labels.ravel()
    y_nums = LE.fit_transform(y_flat)
    
    # Now we need to convert the predicted numbers to letters
    # Convert nums from float to int
    nums = nums.astype(int)
        
    # Use inverse_transform method to revert encoding
    letters = LE.inverse_transform(nums)
    
    return letters

Here are the two main functions, TrainFunction and TestFunction:

In [7]:
# TrainFunction will take in the training data, pre-process it, generate an un-trained model, train that model, and return it
def TrainFunction(X, y):

    # First we pre-process the training data (it was loaded in the cell with the imports)
    X_train, y_train = PreProcess(X, y)

    # Next we instantiate an un-trained model and loss function
    untrained_model = ConvNetGray()

    # Train the un-trained model using the same parameters as we did for the model we saved
    TrainModel(CNN=untrained_model, BatchSize=128, Epochs=1000, Init_LR=0.001, X=X_train, y=y_train)
    # untrained_model should now be trained so lets rename it
    trained_model = untrained_model
    
    return trained_model

In [8]:
# TestFunction will take in a dataset, pre-process it, then load a pre-trained model and evaluate it
# Returns accuracy and predicted labels
def TestFunction(model, X, y):

    # First we pre-process the test data 
    X_test, y_test = PreProcess(X, y)

    # Next we evaluate a pre-trained model using the transformed test data
    accuracy, predicted_nums = EvaluateModel(model, X_test, y_test)
    
    # Finally do post-processing to get labels as letters
    predicted_labels = PostProcess(labels=y, nums=predicted_nums)

    return accuracy, predicted_labels

Run this cell as specified in the readme to execute the code:

In [9]:
# Training a new model:
# trained_model = TrainFunction(X=train_data, y=train_labels)

# Loading in our pre-trained model
pretrained_model = torch.load('CNN_23.pt', map_location=torch.device('cpu'))

# Testing the model
accuracy, predicted_labels = TestFunction(model=pretrained_model, X=test_data, y=test_labels)