# Libraries used



*   Deep Learning: torch, torchvision 
*   Data Manipulation: numpy, pandas, sklearn
*   Plotting: PIL, seaborn, matplotlib







In [None]:
import torch
from torch import nn, optim
from torchvision import datasets, transforms, models
import seaborn as sb
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# LeNet5 architecture for Transfer Learning

LeNet5 is a Convolutional Neural Network architecture made by Yann Lecun on 1998 using specifically the MNIST handwritten 28x28 images dataset which consists of 60,000 labeled images for trainning and 10,000 images for testing the model.

For this Kaggle competition, the dataset given has images of 28x28 but the architecture used recieves data of dimension 32x32, this is why an extra adaptative-average-pooling layer was added before the flattering process for getting the required dimensions to fit the first fully-connected layer.

In [None]:
class LeNet5(torch.nn.Module):          
     
    def __init__(self):     
        super(LeNet5, self).__init__()
        # Convolution (In LeNet-5, 32x32 images are given as input. Hence padding of 2 is done below)
        self.conv1 = torch.nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1, padding=2, bias=True)
        # Max-pooling
        self.max_pool_1 = torch.nn.MaxPool2d(kernel_size=2)
        # Convolution
        self.conv2 = torch.nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0, bias=True)
        # Max-pooling
        self.max_pool_2 = torch.nn.MaxPool2d(kernel_size=2) 
        # Adaptative-average-pooling
        self.avgpool = nn.AdaptiveAvgPool2d((5, 5))
        
        # Fully connected layer
        self.fc1 = torch.nn.Linear(16*5*5, 120)   # convert matrix with 16*5*5 (= 400) features to a matrix of 120 features (columns)
        self.fc2 = torch.nn.Linear(120, 84)       # convert matrix with 120 features to a matrix of 84 features (columns)
        self.fc3 = torch.nn.Linear(84, 10)        # convert matrix with 84 features to a matrix of 10 features (columns)
        
    def forward(self, x):
        # convolve, then perform ReLU non-linearity
        x = torch.nn.functional.relu(self.conv1(x))  
        # max-pooling with 2x2 grid 
        x = self.max_pool_1(x) 
        # convolve, then perform ReLU non-linearity
        x = torch.nn.functional.relu(self.conv2(x))
        # max-pooling with 2x2 grid
        x = self.max_pool_2(x) 
        # avg-pooling
        x = self.avgpool(x)
        # first flatten 'max_pool_2_out' to contain 16*5*5 columns
        x = x.view(-1, 16*5*5)
        # FC-1, then perform ReLU non-linearity
        x = torch.nn.functional.relu(self.fc1(x))
        # FC-2, then perform ReLU non-linearity
        x = torch.nn.functional.relu(self.fc2(x))
        # FC-3
        x = self.fc3(x)
        
        return x

# Data Preprocessing




In [None]:
# Reading the data from the CSVs given.

train = pd.read_csv('train.csv',dtype = np.float32)
test = pd.read_csv('test.csv',dtype = np.float32)


# Extracting the values for the training set

X = train.drop("label", axis=1).values
y = train["label"].values


# Normalizing data

X = X/255.0

X_test = test.values/255.0


# Train - Valid split

X_train, X_val, y_train, y_val = train_test_split(X,y,test_size = 0.2,random_state = 42)


# Converting data to tensor

X_train = torch.from_numpy(X_train)

X_val = torch.from_numpy(X_val)

X_test= torch.from_numpy(X_test)

y_train = torch.from_numpy(y_train).type(torch.LongTensor)

y_val = torch.from_numpy(y_val).type(torch.LongTensor)


# Unsqueezing the data for getting the gray channel

X_train = X_train.unsqueeze(1)

X_val = X_val.unsqueeze(1)

X_test = X_test.unsqueeze(1)


# Reshaping data for getting the right dimensions for LeNet5 input

X_train = X_train.reshape(-1,1,28,28)

X_val = X_val.reshape(-1,1,28,28)

X_test = X_test.reshape(-1,1,28,28)



# Converting all to TensorDatasets

train_tensor = torch.utils.data.TensorDataset(X_train, y_train)

validation_tensor = torch.utils.data.TensorDataset(X_val, y_val)

test_tensor = torch.utils.data.TensorDataset(X_test)


# Final DataLoaders

train_loader = torch.utils.data.DataLoader(train_tensor, 
                                           batch_size = 64,
                                           shuffle = True)

validation_loader = torch.utils.data.DataLoader(validation_tensor, 
                                           batch_size = 64,
                                           shuffle = False)

test_loader = torch.utils.data.DataLoader(test_tensor, 
                                          batch_size = 64,
                                          shuffle = False)

# Model Training 

In [None]:
model = LeNet5()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

optimizer = optim.Adam(model.parameters(), lr=0.001)


epochs = 10
criterion = nn.CrossEntropyLoss()
model.to(device)
running_loss = 0
steps = 0

print('Training Started!')

for e in range(epochs):

  print('Epoch number: ', e+1)

  for inputs, labels in train_loader:

    #Training Loop

    inputs, labels = inputs.to(device), labels.to(device)
    optimizer.zero_grad()
    outputs = model.forward(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    running_loss += loss.item()

    steps += 1


    # Validating after 3,200 sample images

    if steps == 50:
        model.eval()
        accuracy = 0
        valid_loss = 0

        with torch.no_grad():

            for inputs, labels in validation_loader:

                #Validation Loop

                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model.forward(inputs)
                top_p, top_class = outputs.topk(1, dim=1)
                loss_valid = criterion(outputs, labels)
                valid_loss += loss_valid.item()

                equals = top_class == labels.view(*top_class.shape)
                accuracy += torch.mean(equals.type(torch.FloatTensor)).item()

            print(
              f"Train loss: {running_loss/steps:.3f}.. "
              f"Valid loss: {valid_loss/len(validation_loader):.3f}.. "
              f"Valid accuracy: {accuracy/len(validation_loader):.3f}")

        running_loss = 0 
        steps = 0        
        model.train()


print('Training finished!')



Training Started!
Epoch number:  1
Train loss: 1.725.. Valid loss: 0.833.. Valid accuracy: 0.729
Train loss: 0.565.. Valid loss: 0.407.. Valid accuracy: 0.876
Train loss: 0.350.. Valid loss: 0.306.. Valid accuracy: 0.905
Train loss: 0.275.. Valid loss: 0.259.. Valid accuracy: 0.922
Train loss: 0.235.. Valid loss: 0.218.. Valid accuracy: 0.936
Train loss: 0.175.. Valid loss: 0.194.. Valid accuracy: 0.942
Train loss: 0.196.. Valid loss: 0.186.. Valid accuracy: 0.942
Train loss: 0.139.. Valid loss: 0.142.. Valid accuracy: 0.956
Train loss: 0.127.. Valid loss: 0.144.. Valid accuracy: 0.957
Train loss: 0.129.. Valid loss: 0.150.. Valid accuracy: 0.955
Epoch number:  2
Train loss: 0.105.. Valid loss: 0.128.. Valid accuracy: 0.960
Train loss: 0.101.. Valid loss: 0.128.. Valid accuracy: 0.959
Train loss: 0.097.. Valid loss: 0.110.. Valid accuracy: 0.967
Train loss: 0.100.. Valid loss: 0.100.. Valid accuracy: 0.969
Train loss: 0.098.. Valid loss: 0.111.. Valid accuracy: 0.967
Train loss: 0.102.

# Submission results

In [None]:
model.eval()
outputsTensor = torch.LongTensor()
with torch.no_grad():
    for inputs in test_loader:
        #Test Loop
        inputs = inputs[0].to(device)
        outputs = model.forward(inputs)
        top_p, top_class = outputs.topk(1, dim=1)
        outputsTensor = torch.cat((outputsTensor, top_class), dim = 0)


# Submission format arrangement

In [None]:
outputsTensor = outputsTensor.tolist()
lista = []
for i in outputsTensor:
  i = str(i)
  lista.append(i[1])

Labels = pd.Series(lista)
ImageID = pd.Series(np.arange(1,28001))

submission = pd.DataFrame({"ImageId": ImageID,
                          "Labels": Labels})

# CSV and Model saving

In [None]:
submission.to_csv("submission.csv", index = False)
torch.save(model, 'mnist.pth')