In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/mnist-in-csv/mnist_test.csv
/kaggle/input/mnist-in-csv/mnist_train.csv


In [2]:
#core pytorch functionality
import torch
#building blocks for neural networks - layers , activation functions, loss functions,etc.
import torch.nn as nn
#functional versions of neural network components(activations)
import torch.nn.functional as F
##data loaders,samplers,datasets
import torch.utils.data
#provides automatic differentiation for backpropogation
from torch.autograd import Variable
#optimization algorithms - stochastic gradient descent , adam
from torch.optim import Adam
#functions for measuring time and measring code execution
import time
#splitting data into training and validation sets
from torch.utils.data import random_split
import matplotlib.pyplot as plt
from torchvision.datasets import MNIST
#converts numpy array to pytorch tensor
from torchvision.transforms import ToTensor
#flattens tensor into 1D tensor
from torch import flatten
#splitting data into training and test sets
from sklearn.model_selection import train_test_split

In [3]:
mnist_train = pd.read_csv('/kaggle/input/mnist-in-csv/mnist_train.csv')
mnist_test = pd.read_csv('/kaggle/input/mnist-in-csv/mnist_test.csv')

In [4]:
y_train,x_train = mnist_train['label'].values, mnist_train.iloc[:,1:].values
y_test,x_test = mnist_test['label'].values,mnist_test.iloc[:,1:].values

In [5]:
INIT_LR = 1e-3
BATCH_SIZE = 32
EPOCHS = 10
TRAIN_SPLIT = 0.80
VAL_SPLIT = 0.2
device = torch.device('cuda'if torch.cuda.is_available() else 'cpu')

torch_X_train = torch.from_numpy(x_train).type(torch.LongTensor)
torch_y_train = torch.from_numpy(y_train).type(torch.LongTensor) # data type is long
print(torch_X_train.shape)
# create feature and targets tensor for test set.
torch_X_test = torch.from_numpy(x_test).type(torch.LongTensor)
torch_y_test = torch.from_numpy(y_test).type(torch.LongTensor) # data type is long

# Pytorch train and test sets (tensor objects)
train = torch.utils.data.TensorDataset(torch_X_train,torch_y_train)
test = torch.utils.data.TensorDataset(torch_X_test,torch_y_test)

numTrainSamples = int(len(train)*TRAIN_SPLIT)
numValSamples = int(len(train)*VAL_SPLIT)
(trainData,valData) = random_split(train,[numTrainSamples,numValSamples])

# data loader object
train_loader = torch.utils.data.DataLoader(trainData, batch_size = BATCH_SIZE, shuffle = False) #true?
val_loader = torch.utils.data.DataLoader(valData, batch_size = BATCH_SIZE, shuffle = False)
test_loader = torch.utils.data.DataLoader(test, batch_size = BATCH_SIZE, shuffle = False)
print(train_loader)
#calculkate steps per epcoh for training and validation set
trainSteps = len(train_loader.dataset)
valSteps = len(val_loader.dataset)
print('Training steps:',trainSteps)
print('Validation steps: ', valSteps)

torch.Size([60000, 784])
<torch.utils.data.dataloader.DataLoader object at 0x7f7618ee1b90>
Training steps: 48000
Validation steps:  12000


In [6]:
class LeNet(nn.Module):
    def __init__(self,numChannels,classes):
        #call the parent constructor
        super(LeNet,self).__init__()
        #initialise first set of convolutions
        self.conv1 = nn.Conv2d(in_channels = numChannels, out_channels = 20,kernel_size = (5,5))
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size = (2,2),stride = (2,2))
        
        #initialise second set of convolutions
        
        self.conv2 = nn.Conv2d(in_channels = 20,out_channels = 50,kernel_size = (5,5))
        self.relu2 = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d(kernel_size = (2,2),stride = (2,2))
        
        #initialise first set of FC 
        self.fc1 = nn.Linear(in_features = 800,out_features = 500)
        self.relu3 = nn.ReLU()
        
        #initialise softmax classifier
        self.fc2 = nn.Linear(in_features=500,out_features = classes)
        self.logSoftmax = nn.LogSoftmax(dim = 1)
    def forward(self,x):
        x = x.to(torch.float)
        x = x.view(32,1,28,28)
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.maxpool1(x)
    
    # pass the output from the previous layer through the second
    # set of CONV => RELU => POOL layers
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.maxpool2(x)
    
    # flatten the output from the previous layer and pass it
    # through our only set of FC => RELU layers
        x = flatten(x, 1)
        x = self.fc1(x)
        x = self.relu3(x)
    
    # pass the output to our softmax classifier to get our output
    # predictions
        x = self.fc2(x)
        output = self.logSoftmax(x)
    
    # return the output predictions
        return output
    

In [7]:
print('Initialising the lenet model')
model = LeNet(numChannels = 1,classes=10)
print(type(model))
#initialising optimizer and loss function
opt = Adam(model.parameters(),lr = INIT_LR)
print(model.parameters())
# cross entropy loss
lossFn = nn.NLLLoss()
#initialising a dictionary to store training history
H = {"train_loss":[],
    "train_acc":[],
    "val_loss":[],
    "val_acc":[]
    }
print('training the network')
# starting out timer
startTime = time.time()

Initialising the lenet model
<class '__main__.LeNet'>
<generator object Module.parameters at 0x7f7618f588d0>
training the network


In [8]:
for e in range(EPOCHS):
    model.train()
    totalTrainLoss = 0
    totalValLoss = 0
    trainCorrect = 0
    valCorrect = 0
    for(x,y) in train_loader:
        (x,y) = (x.to(device),y.to(device))
        # forward pass
        pred = model(x)
        loss = lossFn(pred,y)
        # zero out the gradients, perform backpropagation and update weights
        opt.zero_grad()
        loss.backward()
        opt.step()
        
        # add loss to total training losss
        # calculate the number of correct predictions
        totalTrainLoss = totalTrainLoss + loss
        trainCorrect = trainCorrect + (pred.argmax(1)==y).type(torch.float).sum().item()
        

In [9]:
with torch.no_grad(): # used to evaluate validation set, hence gradients not computed or updated in this dataset
    model.eval()     # model put into evaluation mode - diables dropout and batch normalization
    for (x,y) in val_loader:  #input data moved to device , loss calculated by applying loss function to predictions
        (x,y) = (x.to(device),y.to(device))
        pred = model(x)
        totalValLoss = totalValLoss + lossFn(pred,y)
        valCorrect += (pred.argmax(1)==y).type(torch.float).sum().item()   #valcorrect used to keep mtrack of correct predictions

In [10]:
avgTrainLoss = totalTrainLoss / trainSteps
avgValLoss = totalValLoss / valSteps
# calculate the training and validation accuracy
trainCorrect = trainCorrect / len(train_loader.dataset)
valCorrect = valCorrect / len(val_loader.dataset)
# update our training history by appending to 'H' dictionary
H["train_loss"].append(avgTrainLoss.cpu().detach().numpy())
H["train_acc"].append(trainCorrect)
H["val_loss"].append(avgValLoss.cpu().detach().numpy())
H["val_acc"].append(valCorrect)
# print the model training and validation information
print("[INFO] EPOCH: {}/{}".format(e + 1, EPOCHS))
print("Train loss: {:.6f}, Train accuracy: {:.4f}".format(avgTrainLoss, trainCorrect))
print("Val loss: {:.6f}, Val accuracy: {:.4f}\n".format(avgValLoss, valCorrect))

[INFO] EPOCH: 10/10
Train loss: 0.001076, Train accuracy: 0.9919
Val loss: 0.004125, Val accuracy: 0.9828

