# Fashion Mnist Dataset Artificial Neural Network Optimized with Dropout, Batch Normalization and L2 Regularization using Pytorch

## Import Libraries

In [None]:
# library to see model summary
!pip install torchinfo

: 

In [22]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
import kagglehub
import random
import matplotlib.pyplot as plt
from torchinfo import summary

In [2]:
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

## Loading the Dataset

In [3]:
dataset_path = kagglehub.dataset_download('zalando-research/fashionmnist')
csv_file_path = f'{dataset_path}/fashion-mnist_train.csv'
df_train = pd.read_csv(csv_file_path)

In [4]:
df_train.head()

In [5]:
df_train.shape

In [6]:
csv_file_path = f'{dataset_path}/fashion-mnist_test.csv'
df_test = pd.read_csv(csv_file_path)

In [7]:
df_test.head()

In [8]:
df_test.shape

## Viewing the Random Image

In [10]:
# train image
random_index = random.randint(0,len(df_train))
image = df_train.iloc[random_index,1:].values.reshape(28,28)
plt.imshow(image)
plt.show()

In [11]:
# test image
random_index = random.randint(0,len(df_test))
image = df_test.iloc[random_index,1:].values.reshape(28,28)
plt.imshow(image)
plt.show()

## Splitting the Dataset into Training and Testing Sets

In [9]:
X_train = df_train.iloc[:, 1:].values
y_train = df_train.iloc[:, 0].values
X_test = df_test.iloc[:, 1:].values
y_test = df_test.iloc[:, 0].values

In [10]:
X_train = X_train / 255.0
X_test = X_test / 255.0

## Crating Custom Dataset Class

In [11]:
class CustomDataset(Dataset):

    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return len(self.features)

    def __getitem__(self, index):
        return self.features[index], self.labels[index]

In [12]:
train_datataset = CustomDataset(features=X_train, labels=y_train)
test_datataset = CustomDataset(features=X_test, labels=y_test)

In [13]:
train_dataloader = DataLoader(train_datataset, batch_size=32, shuffle=True, pin_memory=True)
test_dataloader = DataLoader(test_datataset, batch_size=32, shuffle=False, pin_memory=True)

## Model Building, Training and Evaluation

In [38]:
# crating model Class
class Model(nn.Module):

    def __init__(self, num_features):
        super().__init__()

        self.model = nn.Sequential(
            # first layer
            nn.Linear(num_features, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            # second layer
            nn.Linear(128,64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            # output layer
            nn.Linear(64,10)
        )

    def forward(self, features):
        return self.model(features)

In [39]:
learning_rate=0.1
epochs=100

In [42]:
# creatind model object of Mode Class we creates above
model = Model(num_features=X_train.shape[1]).to(device)

# loss function
loss_function = nn.CrossEntropyLoss()

# optimizer with weight decay for L2 regularization
optimizer = optim.SGD(params=model.parameters(), lr=learning_rate, weight_decay=1e-4)

In [41]:
print(summary(model))

In [20]:
# training loop
for epoch in range(epochs):

    total_epoch_loss=0
    for batch_features, batch_labels in train_dataloader:

        # move data to gpu
        batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

        # forward pass
        y_pred = model(batch_features)
        
        # loss calculate
        loss = loss_function(y_pred, batch_labels)
        
        # clear gradients
        optimizer.zero_grad()

        # backward pass
        loss.backward()

        # update gradients
        optimizer.step()

        # calculate loss for each batchs
        total_epoch_loss += loss.item()

    # average loss of batches
    avg_loss = total_epoch_loss/len(train_dataloader)

    print(f"Epoch: {epoch + 1}, Loss:{avg_loss}")

In [21]:
# setting model to evaluation mode
model.eval()

In [22]:
# evaluation code
total = 0 
correct = 0
with torch.no_grad():

    for batch_features, batch_labels in test_dataloader:

        # move data to gpu
        batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

        y_pred = model(batch_features)

        _, predicted = torch.max(y_pred, 1)

        total += batch_labels.shape[0]

        correct += (predicted==batch_labels).sum().item()

print(f"Accuracy on Test Data: {(correct/total)*100}%")

In [23]:
# evaluation code
total = 0 
correct = 0
with torch.no_grad():

    for batch_features, batch_labels in train_dataloader:

        # move data to gpu
        batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

        y_pred = model(batch_features)

        _, predicted = torch.max(y_pred, 1)

        total += batch_labels.shape[0]

        correct += (predicted==batch_labels).sum().item()

print(f"Accuracy on Train Data: {(correct/total)*100}%")