# Neural Network In Pytorch
This assignment aims to get familiarized with building the whole pipeline of deep learning in Pytorch to perform classification and test it out on the MINIST dataset. All the code will be implemented in this notebook.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


First, let's install modules not already installed by Google Colab.

## Task 1: Design the Neural Network and Data Preparation


In the beginning, please import all the packages you need. We provide some packages here, which might be helpful when you build your code.

In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# import modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch import cuda
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader, sampler
import torch.nn.functional as F
import math
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from numpy import inf
import random


from sklearn.model_selection import train_test_split
import os
from glob import glob
from torchvision import datasets, transforms
from torchvision import models
from torch import optim, cuda, Tensor
import tqdm

# Data science tools
import numpy as np

import os

# Image manipulations
from PIL import Image
from timeit import default_timer as timer

# Visualizations
import matplotlib.pyplot as plt
#plt.rcParams['font.size'] = 14

import warnings
warnings.filterwarnings('ignore', category=FutureWarning)
np.random.seed(0)
torch.manual_seed(0)

<torch._C.Generator at 0x7fb1714f5d30>

###Network Design
Then, we need to define a class for your network. The network should include two linear layer, one Relu layer, and one dropout layer.

In [None]:
# define model
class bmodel(nn.Module):
    def __init__(self, input_d, hidden_d, output_d):
        super().__init__()

        # Define all the layers that you need in your network
        # You can use nn.Linear() to define the linear layer
        self.l0 =  nn.Linear(input_d, hidden_d)
        self.l1 =  nn.Linear(hidden_d, output_d)

        # nn.Dropout() to define the dropout layer
        self.dropout = nn.Dropout(0.1)

        # F.relu() to define your ReLu layer
        self.relu = nn.ReLU()

    def forward(self, x):
        output = self.l0(x)
        output = self.dropout(output)
        output = self.relu(output)
        output = self.l1(output)

        return output
        


Now, we can design our toy model to test your network.

In [None]:
# Create a small net and some toy data to check your implementations.
# Note that we set the random seed for repeatable experiments.

input_size = 4
hidden_size = 10
num_classes = 3
num_inputs = 5

def init_toy_model():
    np.random.seed(0)
    return bmodel(input_size, hidden_size, num_classes)

def init_toy_data():
    np.random.seed(0)
    X = 10 * np.random.randn(num_inputs, input_size)
    y = np.array([0, 1, 2, 2, 1])
    return X, y

toy_model = init_toy_model()
train_X, train_Y = init_toy_data()
validation_X, validation_Y = init_toy_data()

Cuda is Compute Unified Device Architecture, which can achieve parallel computing. It will improve our learning speed in the parameter update by using GPU rather than CPU.

In [None]:
# Check whether there is a gpu for cuda
train_on_gpu = cuda.is_available()
print(f'Train on gpu: {train_on_gpu}')

# Number of gpus
if train_on_gpu:
    gpu_count = cuda.device_count()
    print(f'{gpu_count} gpus detected.')
    if gpu_count > 1:
        multi_gpu = True
    else:
        multi_gpu = False
else:
    multi_gpu = False
print(train_on_gpu,multi_gpu)

Train on gpu: True
1 gpus detected.
True False


###Data Preparation
Next, we need to organize the data before we load them into the network. In Pytorch, we use Tensor as the data structure for computing, and we use "dataloaders" to achieve mini-batch from the whole dataset.

In [None]:
# Datasets organization
batch_size = 1

# Transfer the data from numpy to tensor
data = {
    'train':
    # define training dataset
    TensorDataset(torch.from_numpy(train_X).float(), torch.from_numpy(train_Y)),
    'valid':
    # define validation dataset
    TensorDataset(torch.from_numpy(validation_X).float(), torch.from_numpy(validation_Y))

}


# Dataloader iterators, make sure to shuffle
dataloaders = {
    # define training data loader
    'train': DataLoader(data['train'], batch_size=batch_size, shuffle=True,num_workers=10),
    # define validation data loader
    'valid': DataLoader(data['valid'], batch_size=batch_size, shuffle=False, num_workers=10)
}



# Iterate through the dataloader once
trainiter = iter(dataloaders['train'])
features, labels = next(trainiter)
features.shape, labels.shape


### Setting Learning Parameters
In our training process, we need to set up the loss function and optimizer tool.

In [None]:
# Set up criterion and optimizer
# model = init_toy_model()
# bmodel(input_d, hidden_d, output_d).cuda()

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(toy_model.parameters())

for p in optimizer.param_groups[0]['params']:
    if p.requires_grad:
        print(p.shape)

## Task 2: Build the Training Pipeline

### Training Process
We are now defining the train function. Please follow the instruction to finish each part.

If you are going to use a GPU

1. make sure you have GPU
2. Convert model to GPU with model = model.cuda()
3. convert training data to GPU with data = data.cuda(); target = target.cuda()

In [None]:
def train(model,
          criterion,
          optimizer,
          train_loader,
          valid_loader,
          save_file_name,
          max_epochs_stop=3,
          n_epochs=10,
          print_every=1):
    """Train a PyTorch Model

    Params
    --------
        model (PyTorch model): cnn to train
        criterion (PyTorch loss): objective to minimize
        optimizer (PyTorch optimizier): optimizer to compute gradients of model parameters
        train_loader (PyTorch dataloader): training dataloader to iterate through
        valid_loader (PyTorch dataloader): validation dataloader used for early stopping
        save_file_name (str ending in '.pt'): file path to save the model state dict
        max_epochs_stop (int): maximum number of epochs with no improvement in validation loss for early stopping
        n_epochs (int): maximum number of training epochs
        print_every (int): frequency of epochs to print training stats

    Returns
    --------
        model (PyTorch model): trained cnn with best weights
        history (DataFrame): history of train and validation loss and accuracy
    """

    # Early stopping intialization
    epochs_no_improve = 0
    valid_loss_min = np.Inf

    valid_max_acc = 0
    history = []

    # Number of epochs already trained (if using loaded in model weights)
    try:
        print(f'Model has been trained for: {model.epochs} epochs.\n')
    except:
        model.epochs = 0
        print(f'Starting Training from Scratch.\n')

    overall_start = timer()

    # Main loop
    for epoch in range(n_epochs):

        # keep track of training and validation loss each epoch
        train_loss = 0.0
        valid_loss = 0.0

        train_acc = 0
        valid_acc = 0

        # Set to training
        model = model.cuda()
        model.train()

        start = timer()

        # Training loop
        for ii, (data, target) in enumerate(train_loader):
            
            # Tensors to gpu, both model parameters, data, and target need to be tensors.

            # You can use .cuda() function
            data = data.cuda()
            target = target.cuda()

            # Clear gradients
            optimizer.zero_grad()

            # Forward path
            output = model(data).cuda()

            # Loss function 
            loss = criterion(output, target)

            # Backward path (backpropagation)
            loss.backward()

            # Update the parameters
            optimizer.step()

            # Track train loss by multiplying average loss by number of examples in batch
            train_loss += loss.item() * data.size(0)

            # Calculate accuracy by finding max log probability
            _, pred = torch.max(output, dim=1)
            correct_tensor = pred.eq(target.data.view_as(pred))

            # Need to convert correct tensor from int to float to average
            accuracy = torch.mean(correct_tensor.type(torch.FloatTensor))

            # Multiply average accuracy times the number of examples in batch
            train_acc += accuracy.item() * data.size(0)

            # Track training progress
            print(
                f'Epoch: {epoch}\t{100 * (ii + 1) / len(train_loader):.2f}% complete. {timer() - start:.2f} seconds elapsed in epoch.',
                end='\r')

        # After training loops ends, start validation
        else:
            model.epochs += 1

            # Don't need to keep track of gradients
            with torch.no_grad():

                # Set to evaluation mode
                model.eval()

                # Validation loop
                for data, target in valid_loader:
                    # Tensors to gpu
                    data = data.cuda()
                    target = target.cuda()

                    # Forward path
                    output = model(data).cuda()

                    # Validation loss computation
                    loss = criterion(output, target)

                    # Multiply average loss times the number of examples in batch
                    valid_loss += loss.item() * data.size(0)

                    # Calculate validation accuracy
                    _, pred = torch.max(output, dim=1)
                    correct_tensor = pred.eq(target.data.view_as(pred))
                    accuracy = torch.mean(
                    correct_tensor.type(torch.FloatTensor))

                    # Multiply average accuracy times the number of examples
                    valid_acc += accuracy.item() * data.size(0)


                # Calculate average losses and Calculate average accuracy
                train_loss = train_loss / len(train_loader.dataset)
                valid_loss = valid_loss / len(valid_loader.dataset)

                train_acc = train_acc / len(train_loader.dataset)
                valid_acc = valid_acc / len(valid_loader.dataset)

                history.append([train_loss, valid_loss, train_acc, valid_acc])

                # Print training and validation results
                if (epoch + 1) % print_every == 0:
                    print(
                        f'\nEpoch: {epoch} \tTraining Loss: {train_loss:.4f} \tValidation Loss: {valid_loss:.4f}'
                    )
                    print(
                        f'\t\tTraining Accuracy: {100 * train_acc:.2f}%\t Validation Accuracy: {100 * valid_acc:.2f}%'
                    )

                # Save the model if validation loss decreases
                if valid_loss < valid_loss_min:
                    # Save model 
                    torch.save(model,'/content/drive/MyDrive/school/2022_Fall/400m/HW3/' + str(save_file_name))

                    # Track improvement
                    epochs_no_improve = 0
                    valid_loss_min = valid_loss
                    valid_best_acc = valid_acc
                    best_epoch = epoch


    # Attach the optimizer
    model.optimizer = optimizer
    # Record overall time and print out stats
    total_time = timer() - overall_start
    print(
        f'\nBest epoch: {best_epoch} with loss: {valid_loss_min:.2f} and acc: {100 * valid_best_acc:.2f}%'
    )
    print(
        f'{total_time:.2f} total seconds elapsed. {total_time / (epoch+1):.2f} seconds per epoch.'
    )
    # Format history
    history = pd.DataFrame(
        history,
        columns=['train_loss', 'valid_loss', 'train_acc', 'valid_acc'])
    return model, history

### Train a Network With Toy Data


Well done! Once we finish our train process design, we can start to train our network with our toy dataset.

In [None]:
from timeit import default_timer as timer
save_file_name = f'toy_model_best_model.pt'
train_on_gpu = cuda.is_available()

model, history = train(toy_model,
    criterion,
    optimizer,
    dataloaders['train'], 
    dataloaders['valid'],
    save_file_name=save_file_name,
    n_epochs=300,
    print_every=1)

Now, we can check the losses and accuracy during the training.

In [None]:
plt.figure(figsize=(8, 6))
for c in ['train_loss', 'valid_loss']:
    plt.plot(
        history[c], label=c)
plt.legend()
plt.xlabel('Epoch')
plt.ylabel('Average Negative Log Likelihood')
plt.title('Training and Validation Losses')
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
for c in ['train_acc', 'valid_acc']:
    plt.plot(
        100 * history[c], label=c)
plt.legend()
plt.xlabel('Epoch')
plt.ylabel('Average Accuracy')
plt.title('Training and Validation Accuracy')
plt.show()

## Task 3: Train and Improve The Performance of a Network With MINIST Data
Now we may use the MINIST dataset to train our model. First, we will load the MINIST dataset.

In [None]:
## download and transform the data
transform=transforms.Compose([
    transforms.ToTensor(), 
    transforms.Normalize((0.1307,), (0.3081,))
    ])
dataset1 = datasets.MNIST('../data', train=True, download=True,
                   transform=transform)
dataset1 = torch.utils.data.Subset(dataset1, random.sample(range(len(dataset1)), 3000))
## train and valiation split
train_size = int(0.8 * len(dataset1))
val_size = len(dataset1) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset1, [train_size, val_size])

test_dataset = datasets.MNIST('../data', train=False,
                   transform=transform)

In [None]:
train_loader = torch.utils.data.DataLoader(train_dataset,batch_size = 64,shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset,batch_size = 64)
test_loader = torch.utils.data.DataLoader(test_dataset,batch_size = 64)

Since MINIST has a larger size of data, which is harder to train, we need to increase our network parameters to solve this task. 


In [None]:
input_size = 28 * 28 * 1
num_classes = 10
# Iterate through the dataloader once
trainiter = iter(train_loader)
features, labels = next(trainiter)
features.shape, labels.shape

(torch.Size([64, 1, 28, 28]), torch.Size([64]))


Here, we need to modify given network with different set of sequential number and hidden layer dimension pairs [1,128], [2,512] and [6,5000] to see to see how the validation accuracy changes. 
For this question, we won't expect to achieve a perfect performance, just to see some results.

---



---



In [None]:
class Net(nn.Module):
    def __init__(self,num_classes = 10, hidden = 128, N = 1):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.fc1 = nn.Linear(5408, hidden)
        layers = [nn.Linear(hidden, hidden),nn.ReLU()] * N
        self.fcL = nn.Sequential(*layers)
        self.fc2 = nn.Linear(hidden, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fcL(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output

In [None]:
# run this with [1,128], [2,512], [6,5000]
model = Net(num_classes = 10,hidden = 128) # N = 1
# model = Net(num_classes = 10,hidden = 512) # N = 2
# model = Net(num_classes = 10,hidden = 5000) # N = 6

# I ended up manually changing N each time I trained on a different sequential number and uncommenting the corresponding line

Set up our criterion and optimizer for the new model.

In [None]:
# Set up your criterion and optimizer
# Please use nn.CrossEntropyLoss() and optim.Adam()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

Train your new network and get the results. At this time, the training process may need more time.

In [None]:
from timeit import default_timer as timer
save_file_name = f'MINIST_model_best_model_3.pt' # the best model path
train_on_gpu = cuda.is_available()

## you can adjust the parameters in the training function
model, history = train(model,
    criterion,
    optimizer,
    train_loader, 
    val_loader,
    save_file_name=save_file_name,
    n_epochs=30,
    print_every=1)

It seems that the process will end very early since there is no progress during the training. Let's print the results.

In [None]:
plt.figure(figsize=(8, 6))
for c in ['train_loss', 'valid_loss']:
    plt.plot(
        history[c], label=c)
plt.legend()
plt.xlabel('Epoch')
plt.ylabel('Average Negative Log Likelihood')
plt.title('Training and Validation Losses')
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
for c in ['train_acc', 'valid_acc']:
    plt.plot(
        100 * history[c], label=c)
plt.legend()
plt.xlabel('Epoch')
plt.ylabel('Average Accuracy')
plt.title('Training and Validation Accuracy')
plt.show()

Load your best model and Evaluate the model on the test dataset with accuracy metric. You could reuse the previous code.

In [None]:
save_file_name = 'test_data_results'

# replace validation dataset with the test data 
model, history = train(model,
    criterion,
    optimizer,
    train_loader, 
    test_loader,
    save_file_name=save_file_name,
    n_epochs=30,
    print_every=1)

# plot accuracy
plt.figure(figsize=(8, 6))
for c in ['valid_acc']:
    plt.plot(
        100 * history[c])
plt.legend()
plt.xlabel('Epoch')
plt.ylabel('Average Accuracy')
plt.title('Test Accuracy')
plt.show()

# plot loss
plt.figure(figsize=(8, 6))
for c in ['valid_loss']:
    plt.plot(
        history[c])
plt.legend()
plt.xlabel('Epoch')
plt.ylabel('Average Negative Log Likelihood')
plt.title('Test Loss')
plt.show()