# Training Notebook

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import tqdm.notebook as tq
import numpy as np

from network import Network
from parameters import INPUT_SIZE, \
    HIDDEN_SIZE, \
    OUTPUT_SIZE, \
    LEARNING_RATE, \
    MOMENTUM, \
    MAX_ITERATIONS


## Dataloader

In [7]:
import os
from network import SignLangDataset
from torch.utils.data import DataLoader, random_split

running_local = True if os.getenv('JUPYTERHUB_USER') is None else False
DATASET_PATH = "."

# Set the location of the dataset
if running_local:
    # If running on your local machine, the sign_lang_train folder's path should be specified here
    local_path = "sign_lang_train"
    if os.path.exists(local_path):
        DATASET_PATH = local_path
else:
    # If running on the Jupyter hub, this data folder is already available
    # You DO NOT need to upload the data!
    DATASET_PATH = "/data/mlproject21/sign_lang_train"

In [8]:
dataset = SignLangDataset(csv_file="labels.csv", root_dir=DATASET_PATH)

TRAINING_DATA_LENGTH = int(len(dataset) * 0.8)
TESTING_DATA_LENGTH = len(dataset) - TRAINING_DATA_LENGTH

training_data, testing_data = random_split(dataset, [TRAINING_DATA_LENGTH, TESTING_DATA_LENGTH])

full_dataloader = DataLoader(dataset,
                             batch_size=64,
                             shuffle=True,
                             num_workers=0)
train_dataloader = DataLoader(training_data,
                              batch_size=64,
                              shuffle=True,
                              num_workers=0)

test_dataloader = DataLoader(testing_data,
                             batch_size=64,
                             shuffle=True,
                             num_workers=0)

## Training Function

In [4]:
def train_network(net, iterations=MAX_ITERATIONS, learning_rate=LEARNING_RATE, momentum=MOMENTUM):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=momentum)

    net.train()
    for _ in tq.trange(iterations):
        for _, data in enumerate(tq.tqdm(full_dataloader)):
            optimizer.zero_grad()

            inputs = data['image'] / 255
            labels = data['label']

            outputs = net(inputs)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()
    torch.save(net.state_dict(), 'network.pt')

network = Network(INPUT_SIZE, HIDDEN_SIZE, OUTPUT_SIZE)
train_network(network)

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

In [9]:
global_network = None


def leader_board_predict_fn(input_batch):
    """
    Function for making predictions using your trained model.

    Args:
        input_batch (numpy array): Input images (4D array of shape
                                   [batch_size, 1, 128, 128])

    Returns:
        output (numpy array): Predictions of the your trained model
                             (1D array of int (0-35) of shape [batch_size, ])
    """
    prediction = None

    batch_size, channels, height, width = input_batch.shape

    network = global_network or Network(INPUT_SIZE, HIDDEN_SIZE, OUTPUT_SIZE).float()
    network.load_state_dict(torch.load("network.pt"))
    network.eval()
    input_batch = torch.from_numpy(input_batch).float()
    logits = network(input_batch)
    prediction = torch.argmax(logits, dim=1).numpy()

    assert prediction is not None, "Prediction cannot be None"
    assert isinstance(prediction, np.ndarray), "Prediction must be a numpy array"

    return prediction

## Evaluation

Your final model will be evaluated on a hidden test set containing images similar to the dataset that you are provided with.

For evaluating the performance of your model, we will use the normalized [accuracy_score](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html#sklearn.metrics.accuracy_score) metric from sklearn. This is simply the percentage of correct predictions that your model makes for all the images of the hidden test set. Hence, if all the predictions are correct, the score is 1.0 and if all predictions are incorrect, the score is 0.0. We will use the sklearn metric so that the accuracy function is agnostic to the machine learning framework you use.

In [10]:
from sklearn.metrics import accuracy_score


def accuracy(dataset_path, max_batches=30):
    """
    Calculates the average prediction accuracy.

    IMPORTANT
    =========
    In this function, we use PyTorch only for loading the data. When your `leader_board_predict_fn`
    function is called, we pass the arguments to it as numpy arrays. The output of `leader_board_predict_fn`
    is also expected to be a numpy array. So, as long as your `leader_board_predict_fn` function takes
    numpy arrays as input and produces numpy arrays as output (with the proper shapes), it does not
    matter what framework you used for training your network or for producing your predictions.

    Args:
        dataset_path (str): Path of the dataset directory

    Returns:
        accuracy (float): Average accuracy score over all images (float in the range 0.0-1.0)
    """

    # Create a Dataset object
    sign_lang_dataset = SignLangDataset(csv_file="labels.csv", root_dir=dataset_path)

    # Create a Dataloader
    sign_lang_dataloader = DataLoader(sign_lang_dataset,
                                      batch_size=64,
                                      shuffle=True,
                                      drop_last=True,
                                      num_workers=0)

    # Calculate accuracy for each batch
    accuracies = list()
    for batch_idx, sample in enumerate(sign_lang_dataloader):
        x = sample["image"].numpy()
        y = sample["label"].numpy()
        prediction = leader_board_predict_fn(x)
        accuracies.append(accuracy_score(y, prediction, normalize=True))

        # We will consider only the first 30 batches
        if batch_idx == (max_batches - 1):
            break

    assert len(accuracies) == max_batches

    # Return the average accuracy
    mean_accuracy = np.mean(accuracies)
    return mean_accuracy

We will now use your `leader_board_predict_fn` function for calculating the accuracy of your model. As a check, we provide the code for testing your loaded model on the visible training data. There will be a hidden test which will evaluate your model's performance on the hidden test dataset (this is not visible to you when you validate this notebook).

In [11]:
def test_accuracy(max_batches=30):
    accuracies = list()
    for batch_idx, sample in enumerate(test_dataloader):
        x = sample["image"].numpy()
        y = sample["label"].numpy()
        prediction = leader_board_predict_fn(x)
        accuracies.append(accuracy_score(y, prediction, normalize=True))

        # We will consider only the first 30 batches
        if batch_idx == (max_batches - 1):
            break

    assert len(accuracies) == max_batches

    # Return the average accuracy
    mean_accuracy = np.mean(accuracies)
    return mean_accuracy

## Grid Search

Function to find best Hyperparameters \
iterates over
* hidden layer size
* learning rate

In [None]:
def grid_search():
    accuracies = []
    for hidden_layer in tq.trange(50, 601, 25):
        global global_network
        global_network = Network(INPUT_SIZE, hidden_layer, OUTPUT_SIZE)

        for lr in range(1, 41, 3):
            train_network(global_network, iterations=15, learning_rate=(lr / 1000.0))
            seed = 200
            torch.manual_seed(seed)
            np.random.seed(seed)
            dataset_score = accuracy(dataset_path=DATASET_PATH)
            accuracies.append((hidden_layer, lr, dataset_score, test_accuracy()))
    print(max(accuracies, key=lambda x: x[3]))
    print(accuracies)

# grid_search()
