### Optimisation of a Neural Network for Image Classification

In [3]:
# Libraries will be imported as needed at that particular point
import torch
import pandas as pd
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.optim as optim

#### Data Loading

In [4]:
# loading the data, using pandas\
X_train_df = pd.read_csv("./Datasets/fashion-mnist_train.csv")
X_test_df = pd.read_csv("./Datasets/fashion-mnist_test.csv")

In [5]:
X_train_df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Data pre-processing

In [6]:
y_train_df = X_train_df["label"]

In [7]:
y_train_df.head

<bound method NDFrame.head of 0        2
1        9
2        6
3        0
4        3
        ..
59995    9
59996    1
59997    8
59998    8
59999    7
Name: label, Length: 60000, dtype: int64>

In [8]:
# removing the labels
X_train_df = X_train_df.iloc[:, 1:]

In [9]:
X_train_df.head()

Unnamed: 0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,pixel10,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,5,0,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,1,2,0,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [10]:
X_train_df.shape

(60000, 784)

#### Ready data variables to be used in training, validation and testing

In [11]:
X_train = X_train_df.to_numpy()
y_train = y_train_df.to_numpy()
X_train, X_val, y_train, y_val = train_test_split(
            X_train, y_train,
            test_size=0.2,
            random_state=30,
            stratify=y_train
)
X_test = X_test_df.to_numpy()

# Data normalisation
X_train = X_train.astype('float32')/ 255.0      # in-place division, kinda fast than normal division of having to create a new array in memery
X_test = X_test.astype('float32')/ 255.0
X_val = X_val.astype('float32')/ 255.0

In [12]:
# Coverting to tensors. If getting an error, run the above box again

X_train = torch.from_numpy(X_train).float()
X_test = torch.from_numpy(X_test).float()
X_val = torch.from_numpy(X_val).float()

y_train = torch.from_numpy(y_train).long()
y_val = torch.from_numpy(y_val).long()

In [13]:
X_train.shape[0]

48000

### Building and training a Baseline model

In [14]:
class MultiLayerPerceptron(nn.Module):

    def __init__(self, input_dim, hidden_dim, hidden_dim2, output_dim):
        super(MultiLayerPerceptron, self).__init__()
        # defining the layers
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(input_dim, hidden_dim)    # Hidden Layer 1
        self.fc2 = nn.Linear(hidden_dim, hidden_dim2)  # Hidden Layer 2
        self.fc3 = nn.Linear(hidden_dim2, output_dim)  # Output Layer
        self.relu = nn.ReLU()

        # extras for the model optimization
        self.loss_fn = nn.CrossEntropyLoss() # torch gives us this
        self.optimizer = optim.Adam(self.parameters(), lr=0.001)
        
    def forward(self, X):
        # defining the architcture
        X = self.flatten(X) # flattening
        layer_one = self.relu(self.fc1(X))
        layer_two = self.relu(self.fc2(layer_one))
        output = self.fc3(layer_two)  # just the weights
        return output  # returning the compututed 
        
    def train_model(self, X_train, y_train, X_val = None, y_val = None, num_iters = 10, batch_size = 20):  # maybe patience as stoping condition  too, will see
        # Data is already in tensors
        self.train()  # setting the model to training mode
        n_samples = X_train.shape[0]  # number of samples we have in the data

        # Training loop
        for epoch in range(num_iters):
            epoch_loss = 0

            for i in range(0, n_samples, batch_size): # training in batches
                X_batch = X_train[i:i+batch_size]
                y_batch = y_train[i:i+batch_size]

                # forwad pass
                outputs = self.forward(X_batch)  # outputs given by the activation fucntion
                loss = self.loss_fn(outputs, y_batch) # the loss

                # backwards propagation
                self.optimizer.zero_grad()   # clearing old gradients
                loss.backward()              # computing new gradients
                self.optimizer.step()        # updating weights

                epoch_loss += loss.item()  
            # Print the loss for each epoch
            print(f"The loss at epoch {epoch+1} is {epoch_loss/len(X_train):.5f}")
    
            # validation at each epoch
            if X_val is not None and y_val is not None:
                self.eval() # setting to evaluation mode
                self.eval()  # set model to evaluation mode
                with torch.no_grad():  # disabling gradient computation for validation
                    
                    val_outputs = self.forward(X_val)
                    val_loss = self.loss_fn(val_outputs, y_val)
            
                    # computing the accuracy
                    _, preds = torch.max(val_outputs, 1)
                    correct = (preds == y_val).sum().item()
                    val_acc = correct / len(y_val)
            
                print(f"Validation Loss: {val_loss.item():.4f}, Validation Accuracy: {val_acc*100:.2f}%")
            
                self.train()  # switching back to training mode
            
            

#### Training of the model

In [15]:
input_dim = X_train.shape[1]
model = MultiLayerPerceptron(input_dim, 128, 64, 10)

#Betting our data is pre-processed correctly
model.train_model(X_train, y_train, X_val, y_val)

The loss at epoch 1 is 0.02659
Validation Loss: 0.4160, Validation Accuracy: 84.97%
The loss at epoch 2 is 0.01935
Validation Loss: 0.3783, Validation Accuracy: 86.34%
The loss at epoch 3 is 0.01735
Validation Loss: 0.3600, Validation Accuracy: 86.67%
The loss at epoch 4 is 0.01605
Validation Loss: 0.3589, Validation Accuracy: 86.65%
The loss at epoch 5 is 0.01507
Validation Loss: 0.3528, Validation Accuracy: 86.68%
The loss at epoch 6 is 0.01429
Validation Loss: 0.3486, Validation Accuracy: 87.08%
The loss at epoch 7 is 0.01361
Validation Loss: 0.3371, Validation Accuracy: 87.73%
The loss at epoch 8 is 0.01295
Validation Loss: 0.3298, Validation Accuracy: 88.04%
The loss at epoch 9 is 0.01243
Validation Loss: 0.3358, Validation Accuracy: 87.76%
The loss at epoch 10 is 0.01204
Validation Loss: 0.3437, Validation Accuracy: 87.39%


####  Hyperparameter Optimisation Experiment