# Sign Language Dataset

The Sign Language Dataset consists of 9680 grayscale images of hand signs for the digits 0-9 and the alphabets a-z. Thus, this is a multiclass classification problem with 36 classes. Your task is to build a machine learning model that can accurately classify images from this dataset.

## Loading the dataset

You **do not** need to upload any data. Both the visible training dataset and the hidden test dataset are already available on the Jupyter hub.

In [1]:
import os
import csv
import cv2
import random
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Setting the path of the training dataset (that was already provided to you)

running_local = True if os.getenv('JUPYTERHUB_USER') is None else False
DATASET_PATH = "."

# Set the location of the dataset
if running_local:
    # If running on your local machine, the sign_lang_train folder's path should be specified here
    local_path = "sign_lang_train"
    if os.path.exists(local_path):
        DATASET_PATH = local_path
else:
    # If running on the Jupyter hub, this data folder is already available
    # You DO NOT need to upload the data!
        DATASET_PATH = "/data/mlproject21/sign_lang_train"

In [3]:
# Utility function

def read_csv(csv_file):
    with open(csv_file, newline='') as f:
        reader = csv.reader(f)
        data = list(reader)
    return data

## Data Loading using PyTorch

For creating and training your model, you can work with any machine learning library of your choice. 

If you choose to work with [PyTorch](https://pytorch.org/), you will need to create your own [Dataset](https://pytorch.org/docs/stable/data.html#torch.utils.data.Dataset) class for loading the data. This is provided below. See [here](https://pytorch.org/tutorials/beginner/data_loading_tutorial.html) for a nice example of how to create a custom data loading pipeline in PyTorch. 

In [4]:
import torch
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms, utils, io
from torchvision.utils import make_grid

from string import ascii_lowercase

class SignLangDataset(Dataset):
    """Sign language dataset"""

    def __init__(self, csv_file, root_dir, class_index_map=None, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.data = read_csv(os.path.join(root_dir,csv_file))
        self.root_dir = root_dir
        self.class_index_map = class_index_map
        self.transform = transform
        # List of class names in order
        self.class_names = list(map(str, list(range(10)))) + list(ascii_lowercase)

    def __len__(self):
        """
        Calculates the length of the dataset-
        """
        return len(self.data)

    def __getitem__(self, idx):
        """
        Returns one sample (dict consisting of an image and its label)
        """
        if torch.is_tensor(idx):
            idx = idx.tolist()

        # Read the image and labels
        image_path = os.path.join(self.root_dir, self.data[idx][1])
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        # Shape of the image should be H,W,C where C=1
        image = np.expand_dims(image, 0)
        # The label is the index of the class name in the list ['0','1',...,'9','a','b',...'z']
        # because we should have integer labels in the range 0-35 (for 36 classes)
        label = self.class_names.index(self.data[idx][0])
                
        sample = {'image': image, 'label': label}

        if self.transform:
            sample = self.transform(sample)

        return sample

## Prepare Dataset and Dataloaders for training and testing the network

In [5]:
# Create a Dataset object
sign_lang_dataset = SignLangDataset(csv_file="labels.csv", root_dir=DATASET_PATH)#, transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,),(0.3081,))]))

# Size of the entire dataset
data_len = len(sign_lang_dataset)

# What percentage of the dataset to use for training
# The remaining images will go into the validation set
train_ratio = 0.8

# Calculate the size of the training and validation sets
train_size = int(train_ratio * data_len)
val_size = data_len - train_size

# Create Dataset objects for training and validation
train_dataset, val_dataset = random_split(sign_lang_dataset, [train_size, val_size])

# Create Dataloader objects for training and validation
train_dataloader = DataLoader(train_dataset, 
                              batch_size=64,
                              shuffle=True, 
                              num_workers=0)

val_dataloader = DataLoader(val_dataset, 
                            batch_size=64,
                            shuffle=True, 
                            num_workers=0)

leaderboard_dataloader = DataLoader(sign_lang_dataset, 
                                    batch_size=64, 
                                    shuffle=True, 
                                    num_workers=0)

## Definition of our ANN

In the following cell we define our artificial neural network.

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchsummary import summary
from tqdm import tqdm


class Net(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Net, self).__init__()
        self.hidden_size = hidden_size
        self.input_size = input_size
        self.output_size = output_size
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(0.25)

    def forward(self, x):
        # Follow these steps:
        #
        # Flatten the input x keeping the batch dimension the same
        # Use the relu activation on the output of self.fc1(x)
        # Use the relu activation on the output of self.fc2(x)
        # Pass x through fc3 but do not apply any activation function (think why not?)
        
        
        # YOUR CODE HERE (please remove 'raise NotImplementedError()')
        #print(self.input_size)
        #print(x.shape)
        x = x.view(-1, self.input_size)
        x = self.dropout(x)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc3(x)
        
        return x  # Return x (logits)

## Definition of hyperparameters (TODO: GRID SEARCH FOR BETTER PARAMETERS)

In [7]:
LEARNING_RATE = 0.0065 
INPUT_SIZE = 16384 #Size of one image (128 x 128)
OUTPUT_SIZE = 36 #number of different labels
HIDDEN_SIZE = 400 #bigger then 400 seems useless with the current params
MOMENT = 0.94
NUM_EPOCHS = 25

## Function to create and initialize a fresh neural network

In [8]:
def create_and_init_ann():
    ann = Net(INPUT_SIZE, HIDDEN_SIZE, OUTPUT_SIZE)
    crit = nn.CrossEntropyLoss()
    opti = optim.SGD(ann.parameters(), lr=LEARNING_RATE, momentum=MOMENT)
    return ann, crit, opti

## Function to train the network

In [9]:
def train_neural_network_pytorch_minibatch(net, train_loader, optimizer, criterion, num_epochs):
    net.train()
    for epoch in range(num_epochs):
        for _, data in enumerate(tqdm(train_loader)):    
            optimizer.zero_grad()
            outputs = net((data['image']/255))
            loss = criterion(outputs, data['label'])
            loss.backward()
            optimizer.step()

## Function to save the trained network to disk

In [10]:
def save_net(net):
    torch.save(net.state_dict(), "fresh_model.pt")

## Train a fresh network
If you want to train and save a fresh network, uncomment the lines below.
Attention: your old saved network gets overwritten during this process.
Attention_2: currently only a fraction of the data is used for training. 
TODO: Train w whole dataset before handing in

In [11]:
ann, crit, opti = create_and_init_ann()
train_neural_network_pytorch_minibatch(ann, leaderboard_dataloader, opti, crit, NUM_EPOCHS)
save_net(ann)

  Variable._execution_engine.run_backward(
  8%|▊         | 12/152 [00:05<00:56,  2.49it/s]

KeyboardInterrupt: 

In [12]:
def calc_accuracy_minibatch(net, data_loader):
    """
    Calculates the overall accuracy by using minibatches
    """
    net.eval()
    correct = 0
    with torch.no_grad():
        for data in data_loader:
            output = net(data['image']/255)
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(data['label'].view_as(pred)).sum().item()

    accuracy = correct/len(data_loader.dataset)
    return accuracy

If you trained your network using the train_dataloader, you can check the accuracies with the following cell. Note that we used the **complete dataset (laederboard_dataloader)** for our final model, to get more accuracy.

In [13]:
print(calc_accuracy_minibatch(ann, train_dataloader))
print(calc_accuracy_minibatch(ann, val_dataloader))

0.13300619834710745
0.13068181818181818


## Grid search

We used the following code to search for optimal parameters. Note that we edited this several times to find the best hyperparameters (so the current search ranges do not fit the actual parameters we found). This section also contains the leader board predict function from the other notebook (just for completeness) 

In [14]:
def leader_board_predict_fn(input_batch, hidden_size):
   
    prediction = None
    
    batch_size, channels, height, width = input_batch.shape
       
    input_batch = (input_batch/255).astype(np.float32)
    
    net = Net(INPUT_SIZE, hidden_size, OUTPUT_SIZE).float()
    net.load_state_dict(torch.load("saved_model.pt"))
    net.eval()
    data = torch.from_numpy(input_batch.astype(np.float32))
    net_out = net(data)
    pred = net_out.argmax(dim=1, keepdim=True)
    output = pred.numpy().reshape((batch_size,))
    prediction = output
    assert prediction is not None, "Prediction cannot be None"
    assert isinstance(prediction, np.ndarray), "Prediction must be a numpy array"

    return prediction

def accuracy(dataset_path, hidden_size, max_batches=30):

    # Create a Dataset object
    sign_lang_dataset = SignLangDataset(csv_file="labels.csv", root_dir=dataset_path)

    # Create a Dataloader
    sign_lang_dataloader = DataLoader(sign_lang_dataset, 
                                      batch_size=64,
                                      shuffle=True, 
                                      drop_last=True,
                                      num_workers=0)
    
    # Calculate accuracy for each batch
    accuracies = list()
    for batch_idx, sample in enumerate(sign_lang_dataloader):
        x = sample["image"].numpy()
        y = sample["label"].numpy()
        prediction = leader_board_predict_fn(x, hidden_size)
        accuracies.append(accuracy_score(y, prediction, normalize=True))
        
        # We will consider only the first 30 batches
        if batch_idx == (max_batches - 1):
            break

    assert len(accuracies) == max_batches
    
    # Return the average accuracy
    mean_accuracy = np.mean(accuracies)
    return mean_accuracy

def calc_accuracy_minibatch(net, data_loader):
    """
    Calculates the overall accuracy by using minibatches
    """
    net.eval()
    correct = 0
    with torch.no_grad():
        for data in data_loader:
            output = net(data['image']/255)
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(data['label'].view_as(pred)).sum().item()

    accuracy = correct/len(data_loader.dataset)
    return accuracy




def grid_search():
    params = list()
    for lr in np.arange(0.006, 0.01, 0.001):
        for hs in range(350, 500, 50):
            for mm in np.arange(0.92, 0.97, 0.01):
                for ne in range(8, 11):
                    ann = Net(INPUT_SIZE, hs, OUTPUT_SIZE)
                    crit = nn.CrossEntropyLoss()
                    opti = optim.SGD(ann.parameters(), lr=lr, momentum=mm)
                    train_neural_network_pytorch_minibatch(ann, train_dataloader, opti, crit, ne)
                    save_net(ann)
                    train_accuracy = calc_accuracy_minibatch(ann, train_dataloader)
                    test_accuracy = calc_accuracy_minibatch(ann, val_dataloader)
                    params.append((lr, hs, mm, ne, train_accuracy, test_accuracy))
                    output = f'lr = {lr}, hs = {hs}, mm = {mm}, ne = {ne}, acc_train = {train_accuracy}, acc_test = {test_accuracy}'
                    #print(output)
                    with open("params.txt", "a") as file_object:
                        file_object.write(output)
                        file_object.write('\n')
    return params

In [15]:
#params = grid_search()
#import pickle
#with open("param_list.pyobj", "wb") as dest:
    #pickle.dump(params, dest, pickle.HIGHEST_PROTOCOL)

#max_acc = 1e9
#best_params = None
#for param_set in params:
#    if param_set[4] - param_set[5] <= max_acc:
#        max_acc = param_set[5]
#        best_params = param_set
#print(max_acc)
#print(best_params)
#output = f'least_delta: {max_acc}, params: {best_params}'
#with open("params.txt", "a") as file_object:
                        #file_object.write(output)
                        #file_object.write('\n')