In [10]:
# Importing the relevant Python libraries and setting up the environment

%reset -f

import numpy as np
import pandas as pd

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
np.set_printoptions(threshold = 1e6)

import matplotlib as mlp
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

from sklearn.model_selection import train_test_split
from sklearn import preprocessing

# function to count number of parameters
def get_n_params(model):
    np=0
    for p in list(model.parameters()):
        np += p.nelement()
    return np

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [11]:
# Importing the data

data_train=pd.read_csv('../Data/train.csv')
data_test=pd.read_csv('../Data/test.csv')

# Defining dataframes

df_train=pd.DataFrame(data_train)
df_test=pd.DataFrame(data_test)

# # Validation - Checking random images from the training and testing sets
# plt.imshow(df_test.loc[0].to_numpy().reshape(1, 28, 28).squeeze())
# plt.imshow(df_train.drop(columns='label').loc[0].to_numpy().reshape(1, 28, 28).squeeze())

In [12]:
# Defining the input matrices and output vectors

X=df_train.drop(columns='label').values # Inputs for the training and validation set
Y=df_train['label'].values  # Labels for the training and validation set
X_test=df_test.values # Inputs for the testing set

# # Validation - Checking some images
# plt.imshow(X_test[0,:].reshape(1, 28, 28).squeeze())

# Normalising the features
scaler = preprocessing.StandardScaler(with_mean=True, with_std=True).fit(X) # Defining the scaler
X=scaler.transform(X) # Applying the scaler to the training and validation set
X_test=scaler.transform(X_test) # Applying the scaler to the testing set

# # Validation - Checking some images
# plt.imshow(X_test[0,:].reshape(1, 28, 28).squeeze())

X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.2) # Splitting into a training and validation set

# Checking the shape of the resulting matrices/vectors
print('Shape of Y_train:', Y_train.shape)
print('Shape of X_train:', X_train.shape)
print('Shape of Y_val:', Y_val.shape)
print('Shape of X_val:', X_val.shape)
print('Shape of X_test:',X_test.shape)

Shape of Y_train: (33600,)
Shape of X_train: (33600, 784)
Shape of Y_val: (8400,)
Shape of X_val: (8400, 784)
Shape of X_test: (28000, 784)


In [13]:
# Defining the Dataset Class

class Digit_train(Dataset): # Class for the training and cross-validation set
    def __init__(self, X1, Y1):
        self.X = X1 # Contains the image 
        self.Y = Y1 # Contains the label
        
    def __len__(self):
        return len(self.X[:,0])

    def __getitem__(self, idx):
        image = self.X[idx, :]
        image = image.reshape(1, 28, 28)
        label = self.Y[idx]
        
        return image, label
    
class Digit_test(Dataset): # Class for the testing set
    def __init__(self, X1):
        self.X = X1 # Contains the image 
        
    def __len__(self):
        return len(self.X[:,0])

    def __getitem__(self, idx):
        image = self.X[idx, :]
        image = image.reshape(1, 28, 28)
        
        return image
    
# Creating the datasets

train_dataset = Digit_train(X_train, Y_train)
val_dataset = Digit_train(X_val,Y_val)
test_dataset = Digit_test(X_test)

In [14]:
# Defining the dataloaders

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

In [25]:
# Defining the class corresponding to the Convolutional Neural Net (CNN)

class CNN(nn.Module):
    def __init__(self, input_size, n_feature, output_size):
        super(CNN, self).__init__()
        self.n_feature = n_feature
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=n_feature, kernel_size=5)
        self.conv2 = nn.Conv2d(n_feature, n_feature, kernel_size=5)
        self.fc1 = nn.Linear(n_feature*4*4, 50)
        self.fc2 = nn.Linear(50, 10)
        self.dropout = nn.Dropout2d(0.1)
        
    def forward(self, x, verbose=False):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, kernel_size=2)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, kernel_size=2)
        x = x.view(-1, self.n_feature*4*4)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = F.log_softmax(x, dim=1)
        return x

In [32]:
# Training the network

input_size  = X.shape[1]   # number of pixels in the image
output_size = 10      # there are 10 classes
n_features = 100 # number of feature maps

model_cnn = CNN(input_size, n_features, output_size) # Instantiating the network
model_cnn.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_cnn.parameters(), lr=0.01, momentum=0.5)
print('Number of parameters: {}'.format(get_n_params(model_cnn)))

model_cnn = model_cnn.double()

n_epochs = 100 # Number of times the entire dataset is used to train the network

for epoch in range(n_epochs):
    # Training loop 
    model_cnn.train() # Setting the model to train mode
    train_loss = 0
    correct_val = 0
    total_val = 0
    correct_train = 0
    total_train = 0
    
    for i, (data, target) in enumerate(train_loader):
        # clear the old gradients from optimized variables
        optimizer.zero_grad() 
        # forward pass: feed inputs to the model to get outputs
        output = model_cnn(data)
        # calculate the training batch loss
        loss = criterion(output, target)
        # backward: perform gradient descent of the loss w.r. to the model params
        loss.backward()
        # update the model parameters by performing a single optimization step
        optimizer.step()
        # accumulate the training loss
        train_loss += loss.item()
        # calculate the accuracy
        predicted_train = torch.argmax(output, 1)
        correct_train += (predicted_train == target).sum().item()
        total_train += target.size(0)

    # Validation loop 
    model_cnn.eval() # Setting the model to eval mode
    val_loss = 0
    # turn off gradients for validation
    with torch.no_grad():
        for i, (data, target) in enumerate(val_loader):
            # forward pass
            output = model_cnn(data)
            # validation batch loss
            loss = criterion(output, target) 
            # accumulate the valid_loss
            val_loss += loss.item()
            # calculate the accuracy
            predicted_val = torch.argmax(output, 1)
            correct_val += (predicted_val == target).sum().item()
            total_val += target.size(0)

    # Printing epoch results        
    train_loss /= len(train_loader)
    val_loss /= len(val_loader)
    print(f'Epoch: {epoch+1}/{n_epochs} -- Training loss: {train_loss:.3f} -- Training Accuracy: {100*correct_train/total_train:.3f}% -- Validation Accuracy: {100*correct_val/total_val:.3f}%')

Number of parameters: 333260
Epoch: 1/100 -- Training loss: 0.478 -- Training Accuracy: 86.637% -- Validation Accuracy: 95.583%
Epoch: 2/100 -- Training loss: 0.139 -- Training Accuracy: 95.875% -- Validation Accuracy: 97.131%
Epoch: 3/100 -- Training loss: 0.094 -- Training Accuracy: 97.250% -- Validation Accuracy: 97.571%
Epoch: 4/100 -- Training loss: 0.075 -- Training Accuracy: 97.661% -- Validation Accuracy: 98.048%
Epoch: 5/100 -- Training loss: 0.065 -- Training Accuracy: 97.964% -- Validation Accuracy: 98.214%
Epoch: 6/100 -- Training loss: 0.056 -- Training Accuracy: 98.292% -- Validation Accuracy: 98.298%
Epoch: 7/100 -- Training loss: 0.047 -- Training Accuracy: 98.560% -- Validation Accuracy: 98.333%
Epoch: 8/100 -- Training loss: 0.043 -- Training Accuracy: 98.619% -- Validation Accuracy: 98.500%
Epoch: 9/100 -- Training loss: 0.040 -- Training Accuracy: 98.768% -- Validation Accuracy: 98.524%
Epoch: 10/100 -- Training loss: 0.035 -- Training Accuracy: 98.949% -- Validatio

In [33]:
# Predicting the output and writing results to a file

result=[] # Used to store the results
ind=[]  # Used to store the indices

model_cnn.eval() # setting the model to eval mode
# turn off gradients for validation
with torch.no_grad():
    for index, data_test in enumerate(test_loader):
        output_test = model_cnn(data_test.to(device))
        # save the results
        predicted_test = torch.argmax(output_test, 1)
        ind.append(index+1)
        result.append(predicted_test.item())

final_data=np.column_stack((ind, result)) # Stacking the two columns
np.savetxt("submission.csv", final_data, delimiter=",", header="ImageId,Label", fmt='%d,%d', comments='') # Saving to file