IMPORT DATASET & LIBRARIES, HYPERPARAMETERS 

In [6]:
import pandas as pd
import torch as torch
from sklearn.model_selection import train_test_split
from torch import nn, optim

data = pd.read_csv("mnist.csv")


input_size = 784
hidden_sizes = [256, 10, 10, 10, 10, 10]
output_size = 10
batch_size = 100
epochs = 10
lr = 0.1
momentum = 0.9


NORMALIZING AND SPLIT DATASET INTO 80% TRAIN AND 20% FOR VALIDATION

In [7]:
X = data.loc[:,data.columns != "label"].values/255   #Normalizing the values
Y = data.label.values
features_train, features_test, targets_train, targets_test = train_test_split(X,Y,test_size=0.2, random_state=42)

LOAD DATASET AS TENSORS AND ALSO SPLIT INTO BATCHES

In [8]:
X_train = torch.from_numpy(features_train)
X_test = torch.from_numpy(features_test)

Y_train = torch.from_numpy(targets_train).type(torch.LongTensor)
Y_test = torch.from_numpy(targets_test).type(torch.LongTensor)

train = torch.utils.data.TensorDataset(X_train,Y_train)
test = torch.utils.data.TensorDataset(X_test,Y_test)

train_loader = torch.utils.data.DataLoader(train, batch_size = batch_size, shuffle = True)
test_loader = torch.utils.data.DataLoader(test, batch_size = batch_size, shuffle = True)


NETWORK SETUP

In [9]:
class Feedforward(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(Feedforward, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_sizes[0])
        #nn.init.normal_(self.fc1.weight, mean=0, std=0.01)
        nn.init.xavier_normal_(self.fc1.weight)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3)
        self.fc2 = nn.Linear(hidden_sizes[0], output_size)
        self.fc3 = nn.Linear(hidden_sizes[1], hidden_sizes[2])
        self.fc4 = nn.Linear(hidden_sizes[2], hidden_sizes[3])
        self.fc5 = nn.Linear(hidden_sizes[3], output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        output = self.fc1(x)
        output = self.dropout(output)
        output = self.relu(output)
        output = self.fc2(output)
        output = self.softmax(output)
        return output


LAST PARAMETERS AND MODEL TRAINING

In general we tried to adjust the learning rate with 0.001 in fist but we saw that in very low values was very slow
and in high values such as 1 it never converged. 

After we adjust the batch number and saw that in 10000 the accuracy was around 90% but at 10 it was very bad. So the
optimal value was 100 because 1000 was also not very good in accuracy

According to the network we tried to add 2 more hidden layers, 3 in total excluding the input/output layers. 

We noticed that even we put 1024 unit on each one the accuracy was 97% when the same accuracy was noticed with only one
hidden layer with 256 units as the optimal number. 

We changed the gaussian weight initialization with xavier one and did not notice any difference.

Finally we applied dropout in two layers at first but with one hidden layer once was applied with p=0.5

In [10]:
model = Feedforward(input_size, hidden_sizes, output_size)

model.double()

criterion = nn.CrossEntropyLoss()

images, labels = next(iter(train_loader))
images = images.view(images.shape[0], -1)

logps = model(images) #log probabilities
loss = criterion(logps, labels) #calculate the NLL loss

model.train()

optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
for e in range(epochs):
    running_loss = 0
    for images, labels in train_loader:
        # Flatten MNIST images into a 784 long vector
        images = images.view(images.shape[0], -1)

        # Training pass
        optimizer.zero_grad()

        output = model(images)
        loss = criterion(output, labels)

        # This is where the model learns by backpropagating
        loss.backward()

        # And optimizes its weights here
        optimizer.step()

        running_loss += loss.item()
    else:
        print("Epoch {} - Training loss: {}".format(e, running_loss / len(train_loader)))


Epoch 0 - Training loss: 0.3644384476045995
Epoch 1 - Training loss: 0.1634927003568831
Epoch 2 - Training loss: 0.12229644579367538
Epoch 3 - Training loss: 0.09980145037674487
Epoch 4 - Training loss: 0.08852390619689933
Epoch 5 - Training loss: 0.07646291952088921
Epoch 6 - Training loss: 0.06579911293779074
Epoch 7 - Training loss: 0.06163150298869221
Epoch 8 - Training loss: 0.052137173629731436
Epoch 9 - Training loss: 0.04847681830572696


MODEL VALIDATION 

In [11]:
model.eval()

correct_count, all_count = 0, 0
for images, labels in test_loader:
    for i in range(len(labels)):
        img = images[i].view(1, 784)
        with torch.no_grad():
            logps = model(img)

        ps = torch.exp(logps)
        probab = list(ps.numpy()[0])
        pred_label = probab.index(max(probab))
        true_label = labels.numpy()[i]
        if (true_label == pred_label):
            correct_count += 1
        all_count += 1

print("Number Of Images Tested =", all_count)
print("\nModel Accuracy =", (correct_count / all_count))


Number Of Images Tested = 8400

Model Accuracy = 0.9753571428571428
