In [1]:
import numpy as np  # to handle matrix and data operation
import pandas as pd # to read csv and handle dataframe

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data
from torch.autograd import Variable # used to implement backprop

from sklearn.model_selection import train_test_split

In [2]:
# importing data
data = pd.read_csv('../input/train.csv')

print(data.shape)

(42000, 785)


In [3]:
# assigning target variable 'y' and features 'x'

y = data['label'].values
X = data.drop(['label'],1).values

# splitting data into test and train

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15)

In [4]:

print(y_test.shape)


(6300,)


In [5]:
#setting batch size as 2^n
BATCH_SIZE = 32

# creating Tensors
torch_X_train = torch.from_numpy(X_train).type(torch.LongTensor)
torch_y_train = torch.from_numpy(y_train).type(torch.LongTensor)

torch_X_test = torch.from_numpy(X_test).type(torch.LongTensor)
torch_y_test = torch.from_numpy(y_test).type(torch.LongTensor)

    
# constructing datasets of tensors
train = torch.utils.data.TensorDataset(torch_X_train,torch_y_train)
test = torch.utils.data.TensorDataset(torch_X_test,torch_y_test)

# creating dataloaders for transformation, managing batches
train_loader = torch.utils.data.DataLoader(train, batch_size = BATCH_SIZE, shuffle = False)
test_loader = torch.utils.data.DataLoader(test, batch_size = BATCH_SIZE, shuffle = False)

In [6]:
def fit(model, train_loader):
    # implementing Adam with default parameters - lr = 0.001, beta1 = 0.9, beta2 = 0.999, eps = 1e-08, weight_decay (L2) = 0
    optimizer = torch.optim.Adam(model.parameters())
    # loss function - cross entropy
    error = nn.CrossEntropyLoss()
    EPOCHS = 5
    model.train()
    # implementing MiniBatch GD
    for epoch in range(EPOCHS):
        correct = 0
        for batch_index, (X_batch, y_batch) in enumerate(train_loader):    # enumerate keeps count of loop and is used for enumerating an object
            var_X_batch = Variable(X_batch).float()                        # packing the tensor - node
            var_y_batch = Variable(y_batch)
            optimizer.zero_grad()                                          # setting gradients to 0 after every iteration to avoid accumalation of old and new gradient
            output = model(var_X_batch)
            loss = error(output, var_y_batch)
            loss.backward()                                                # computing gradients
            optimizer.step()                                               # performs a single optimization step

            # computing total correct predictions
            predicted = torch.max(output.data, 1)[1]
            correct += (predicted == var_y_batch).sum()           # used to compute accuracy
            
            if batch_index % 50 == 0:
                print('Epoch : {} \tLoss: {:.6f}\t Accuracy:{:.3f}%'.format(
                    epoch, loss.data[0], float(correct*100) / float(BATCH_SIZE*(batch_index+1))))
                

In [7]:
def evaluate(model):
    correct = 0 
    for test_imgs, test_labels in test_loader:
        #print(test_imgs.shape)
        test_imgs = Variable(test_imgs).float()
        output = model(test_imgs)
        predicted = torch.max(output,1)[1]
        correct += (predicted == test_labels).sum()
    print("Test accuracy:{:.3f}% ".format( float(correct) / (len(test_loader)*BATCH_SIZE)))

In [8]:
# resizing arrays to float 4D arrays
# -1 parameter in tensor.view adapts to the existing size of the array and respects the order of elements (.resize() does not).
    
torch_X_train = torch_X_train.view(-1, 1,28,28).float()
torch_X_test = torch_X_test.view(-1,1,28,28).float()
print(torch_X_train.shape)
print(torch_X_test.shape)

train = torch.utils.data.TensorDataset(torch_X_train,torch_y_train)
test = torch.utils.data.TensorDataset(torch_X_test,torch_y_test)

train_loader = torch.utils.data.DataLoader(train, batch_size = BATCH_SIZE, shuffle = False)
test_loader = torch.utils.data.DataLoader(test, batch_size = BATCH_SIZE, shuffle = False)

torch.Size([35700, 1, 28, 28])
torch.Size([6300, 1, 28, 28])


In [9]:
class CNN(nn.Module):
    
    # initializing 3 conv layers and 2 FC layers
    
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=5)
        self.conv3 = nn.Conv2d(32,64, kernel_size=5)
        self.fc1 = nn.Linear(3*3*64, 256)
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(F.max_pool2d(self.conv2(x), 2))
        x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(F.max_pool2d(self.conv3(x),2))
        x = F.dropout(x, p=0.5, training=self.training)
        x = x.view(-1,3*3*64 )
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)
 
# initialising the cnn object
cnn = CNN()
print(cnn)

it = iter(train_loader)
X_batch, y_batch = next(it)
# printing dimensions of forward feed layers
print(cnn.forward(X_batch).shape)

CNN(
  (conv1): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=10, bias=True)
)
torch.Size([32, 10])


fit(cnn,train_loader)

In [12]:
fit(cnn, train_loader)



Epoch : 0 	Loss: 0.303737	 Accuracy:90.625%
Epoch : 0 	Loss: 0.237594	 Accuracy:95.404%
Epoch : 0 	Loss: 0.209804	 Accuracy:95.606%
Epoch : 0 	Loss: 0.066408	 Accuracy:95.550%
Epoch : 0 	Loss: 0.019424	 Accuracy:95.460%
Epoch : 0 	Loss: 0.247089	 Accuracy:95.344%
Epoch : 0 	Loss: 0.037978	 Accuracy:95.380%
Epoch : 0 	Loss: 0.051325	 Accuracy:95.379%
Epoch : 0 	Loss: 0.154822	 Accuracy:95.449%
Epoch : 0 	Loss: 0.190033	 Accuracy:95.420%
Epoch : 0 	Loss: 0.080272	 Accuracy:95.415%
Epoch : 0 	Loss: 0.073463	 Accuracy:95.383%
Epoch : 0 	Loss: 0.166663	 Accuracy:95.461%
Epoch : 0 	Loss: 0.507654	 Accuracy:95.411%
Epoch : 0 	Loss: 0.051344	 Accuracy:95.417%
Epoch : 0 	Loss: 0.281531	 Accuracy:95.431%
Epoch : 0 	Loss: 0.129989	 Accuracy:95.471%
Epoch : 0 	Loss: 0.035250	 Accuracy:95.516%
Epoch : 0 	Loss: 0.430209	 Accuracy:95.557%
Epoch : 0 	Loss: 0.023211	 Accuracy:95.544%
Epoch : 0 	Loss: 0.134804	 Accuracy:95.589%
Epoch : 0 	Loss: 0.011384	 Accuracy:95.632%
Epoch : 0 	Loss: 0.012373	 Accur

In [14]:
evaluate(cnn)

Test accuracy:0.959% 
