<a href="https://colab.research.google.com/github/Aftabgazali/CNN_ON_Fashion_MNIST_Using_TinyVGGModel.ipynb/blob/main/CNN_ON_Fashion_MNIST_Using_TinyVGGModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importing the Libraries

In [None]:
import torch
from torch import nn
import torchvision
from torchvision import datasets
from torchvision import transforms
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
import numpy as np
import matplotlib.pyplot as plt

# Importing the Dataset & Preparing the Transform

### Because Fashion Mnist dataset is already in a gray scale form, hence we only need to convert the images into a Tensor data, Hence `ToTensor()`
(check the shape of the image, we get 1,28,28 which is a gray scale image
`image.shape`),

In [None]:
transform = transforms.Compose([ToTensor()])

In [None]:
train_data = datasets.FashionMNIST(root="data", train = True, transform = transform, target_transform= None, download = True)
test_data = datasets.FashionMNIST(root="data", train= False, transform= transform, target_transform=None, download=True)

In [None]:
len(train_data), len(test_data)

In [None]:
class_names = train_data.classes
class_names

In [None]:
class_ids = train_data.class_to_idx
class_ids

# Visualizing the Dataset

In [None]:
random_index = torch.randint(0, len(train_data), size=[1]).item()
random_index

In [None]:
figure = plt.figure(figsize=(10,7))

rows,cols=4,4

for i in range(1,rows*cols + 1):
  random_index = torch.randint(0, len(train_data), size=[1]).item()
  image, label = train_data[random_index]
  figure.add_subplot(rows,cols,i)
  plt.imshow(image.squeeze())
  plt.title(class_names[label])
  plt.axis(False)


In [None]:
# check the shape of the image, we get 1,28,28 which is a gray scale image
image.shape

# Preparing the Data into Batches

In [None]:
BATCH_SIZE = 32
train_dataloader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
test_dataloader = DataLoader(dataset=test_data, batch_size= BATCH_SIZE, shuffle=False)

In [None]:
len(train_dataloader), len(test_dataloader)

# Visualize the Images from Batches

In [None]:
train_features_per_batchs, train_labels_per_batches = next(iter(train_dataloader))

In [None]:
len(train_features_per_batchs)

In [None]:
random_index = torch.randint(0, len(train_features_per_batchs), size=[1]).item()

random_index

In [None]:
train_features_per_batchs[random_index]

In [None]:
figure = plt.figure(figsize=(10,7))
rows,cols = 4,4
for i in range(1, rows*cols+1):
  random_index = torch.randint(0, len(train_features_per_batchs), size = [1]).item()
  image, label = train_features_per_batchs[random_index], train_labels_per_batches[random_index]
  figure.add_subplot(rows,cols,i)
  plt.imshow(image.squeeze())
  plt.title(class_names[label])
  plt.axis(False)

In [None]:
figure = plt.figure(figsize=(10,7))
rows, cols= 4,4
for i in range(1, rows*cols+1):
  random_index = torch.randint(0, len(train_features_per_batchs), size=[1]).item()
  image, label = train_features_per_batchs[random_index], train_labels_per_batches[random_index]
  figure.add_subplot(rows,cols,i)
  plt.imshow(image.squeeze())
  plt.title(class_names[label])
  plt.axis(False)

# Building our Baseline Model

In [None]:
# baseline model
class FashionMnistV0(nn.Module):
  def __init__(self, in_features:int,
               out_features:int,
               hidden_units: int):
    super().__init__()
    self.layer_stacked = nn.Sequential(
        nn.Flatten(),
        nn.Linear(in_features=in_features, out_features=hidden_units),
        nn.Linear(in_features=hidden_units, out_features=out_features)
    )
  def forward(self, x:torch.Tensor()):
    return self.layer_stacked(x)


# Create the instance of the baseline model
baseline_model = FashionMnistV0(in_features=28*28,out_features=len(class_names), hidden_units=10).to(device)
baseline_model

In [None]:
baseline_model.state_dict()

## Defining Loss and Optimizer for our Baseline Model

In [None]:
model_loss = nn.CrossEntropyLoss()
model_optimizer = torch.optim.Adam(params = baseline_model.parameters(), lr = 0.02)

## Defining the accuracy function

In [None]:
def model_accuracy(y_true, y_predictions):
  accuracy = torch.eq(y_true, y_predictions).sum().item()
  return((accuracy/len(y_true))*100)

## Training & Testing Loop

In [None]:
from timeit import default_timer as timer

def print_train_time(start: float,
                     end: float,
                     device: torch.device = None):
  """ Prints difference between start & end time. """

  total_time = end - start
  print(f"Train time on {device}: {total_time:.3f} seconds")
  return total_time

In [None]:
from tqdm.auto import tqdm

torch.manual_seed(42)

epochs=3

train_start_on_cpu = timer()
for epoch in tqdm(range(epochs)):
  print(f"Epoch: {epoch}\n-----------------")
  # Training Mode
  train_loss = 0

  for batch, (X,y) in enumerate(train_dataloader):
    baseline_model.train()
    X, y = X.to(device), y.to(device)
    # Forward Pass
    y_pred = baseline_model(X)

    # Training Loss
    loss = model_loss(y_pred,y)
    train_loss+=loss

    #Optimizer zero grad
    model_optimizer.zero_grad()

    # Backpropogation
    loss.backward()

    # Optimizer step
    model_optimizer.step()

    if batch % 400 == 0:
      print(f"Looked Through {(batch * len(X))}/{len(train_dataloader.dataset)} samples")
  # Update the training loss, we have accumulated train_loss in one batch of a train_dataloader, hence the final loss must be the
  # the average with the length of the train_dataloader per epoch.
  train_loss /=len(train_dataloader)

  # Testing Mode
  test_loss, test_acc = 0,0
  baseline_model.eval()
  with torch.inference_mode():
    for X_test, y_test in test_dataloader:
      # Forward Pass
      test_pred = baseline_model(X)

      # Testing Loss
      test_loss += model_loss(test_pred, y)

      # Testing acc
      test_acc += model_accuracy(y, test_pred.argmax(dim=1))

    # Update the Testing Loss & Accuracy
    print(f"Training Loss per Batch {train_loss:.2f} | Testing Loss per Batch {test_loss/len(test_dataloader):.2f} | Testing Accuracy per Batch {test_acc/len(test_dataloader):.2f}")

train_time_end_on_cpu = timer()

total_train_time_baseline_model = print_train_time(start=train_start_on_cpu,
                                                   end=train_time_end_on_cpu,
                                                   device=str(next(baseline_model.parameters()).device))

In [None]:
total_train_time_baseline_model

## Building Model Evaluation Function

In [None]:
torch.manual_seed(42)

def eval_model(model: torch.nn.Module,
              data_loader: torch.utils.data.DataLoader,
              model_loss: torch.nn.Module,
              model_accuracy,
              device: torch.device = device):
  """ Returns a dictionary containing the results of model predicting on data_loader """
  loss, acc = 0,0
  model.eval()
  with torch.inference_mode():
    for X, y in tqdm(data_loader):
      X, y = X.to(device), y.to(device)
      test_pred = model(X)
      loss+= model_loss(test_pred, y)

      acc+= model_accuracy(y, test_pred.argmax(dim=1))

    # Scale loss & accuracy to find the average loss/ per batches as well as average accuracy per batches
    loss /= len(data_loader)
    acc /= len(data_loader)

  return {"model_name": model.__class__.__name__,"model_loss": loss.item(), "model_acc":acc}

### If we get an error here stating that one or more parameter on cpu, that means our main baseline model is still on cpu use `.to(device)` to put it on gpu

In [None]:
baseline_model_results = eval_model(baseline_model, test_dataloader, model_loss, model_accuracy)
baseline_model_results

# Setting up Device Agnostic code

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

device

# Building Non Linear Model

In [None]:
class FashionMnistV1(nn.Module):
  def __init__(self, in_features:int,
               out_features:int,
               hidden_units: int):
    super().__init__()
    self.layer_stacked = nn.Sequential(
        nn.Flatten(),
        nn.Linear(in_features=in_features, out_features=hidden_units),
        nn.ReLU(),
        nn.Linear(in_features=hidden_units, out_features=hidden_units*2),
        nn.ReLU(),
        nn.Linear(in_features=hidden_units*2, out_features=out_features)
    )
  def forward(self, x:torch.Tensor()):
    return self.layer_stacked(x)


# Create the instance of the baseline model
model_v1 = FashionMnistV1(in_features=28*28,out_features=len(class_names), hidden_units=10).to(device)
model_v1

## Setting up Loss & Optimizer function

In [None]:
model_loss = nn.CrossEntropyLoss() # measures how wrong our model is
model_optimizer = torch.optim.Adam(params = model_v1.parameters(), lr = 0.02) # tries to update our model's parameters to reduce the loss

## Building Training & Testing Loop Function

In [None]:
from tqdm.auto import tqdm

def train_step(model: torch.nn.Module,
                    data_loader:torch.utils.data.DataLoader,
                    no_of_epochs: int,
                    model_loss: torch.nn.Module,
                    model_optimizer: torch.optim,
                    model_accuracy,
               device: torch.device = device):

  torch.manual_seed(42)
  epochs= no_of_epochs
  for epoch in tqdm(range(epochs)):
    print(f"Epoch: {epoch}\n-----------------")
    # Training Mode
    train_loss,train_acc = 0,0
    model.train()
    for batch, (X,y) in enumerate(data_loader):
      # Put X & y on GPU
      X, y = X.to(device), y.to(device)
      # Forward Pass
      y_pred = model(X)

      # Training Loss
      loss = model_loss(y_pred,y)
      train_loss+=loss

      train_acc+= model_accuracy(y, y_pred.argmax(dim=1))

      #Optimizer zero grad
      model_optimizer.zero_grad()

      # Backpropogation
      loss.backward()

      # Optimizer step
      model_optimizer.step()

      if batch % 400 == 0:
        print(f"Looked Through {(batch * len(X))}/{len(data_loader.dataset)} samples")
    # Update the training loss
    train_loss /=len(data_loader)
    train_acc /=len(data_loader)
    print(f"Train Loss {train_loss:.2f} | Training Accuracy {train_acc:.2f}")

In [None]:
from tqdm.auto import tqdm
def test_step(model: torch.nn.Module,
                    data_loader:torch.utils.data.DataLoader,
                    model_loss: torch.nn.Module,
                    model_accuracy,
              device: torch.device = device):
  """ Performs a Testing loop step over the test data loader """
    # Testing Mode
  test_loss, test_acc = 0,0
  model.eval()
  with torch.inference_mode():
    for X_test, y_test in tqdm(data_loader):
      X_test, y_test = X_test.to(device), y_test.to(device)
      # Forward Pass
      test_pred = model(X_test)

      # Testing Loss
      test_loss += model_loss(test_pred, y_test)

      # Testing acc
      test_acc += model_accuracy(y_test, test_pred.argmax(dim=1))

    # Update the Testing Loss & Accuracy
    print(f"Testing Loss per Batch {test_loss/len(data_loader):.2f} | Testing Accuracy per Batch {test_acc/len(data_loader):.2f}")

In [None]:
train_step(model_v1, train_dataloader, 3, model_loss, model_optimizer, model_accuracy, device)

In [None]:
test_step(model_v1, test_dataloader,model_loss, model_accuracy,device)

In [None]:
model_v1_results = eval_model(model_v1, test_dataloader, model_loss, model_accuracy, device)
model_v1_results

# Model: V2 Building Convolutional Neural Networks

***Note:*** For more insights visit https://poloclub.github.io/cnn-explainer/

In [None]:
# Create an test image to pass through our model

test_image = torch.randn(size=(1,28,28))

In [None]:
class FashionMnistV2(nn.Module):
  """
  Model architecture that replicates the TinyVGG, a model from the above polo link

  """
  def __init__(self, input_shape: int, hidden_units:int, output_shape: int):
    super().__init__()
    self.conv_block_1 = nn.Sequential(
        nn.Conv2d(in_channels=input_shape,
                  out_channels=hidden_units,
                  kernel_size=3,
                  stride=1,
                  padding=1),# Values that we can set ourself, this are called as Hyper parameters
        nn.ReLU(),
        nn.Conv2d(in_channels=hidden_units,
                  out_channels=hidden_units,
                  kernel_size=3,
                  stride=1,
                  padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2))# Output will be the divided by the kernel size(i.e. half) the size of input layer, what maxpool does is, in a kernel size of 2x2 on the input layer it takes the max value and assign it in place of the 2x2 matrix.
    self.conv_block_2 = nn.Sequential(
        nn.Conv2d(in_channels=hidden_units,
                  out_channels=hidden_units,
                  kernel_size=3,
                  stride=1,
                  padding=1),# Values that we can set ourself, this are called as Hyper parameters
        nn.ReLU(),
        nn.Conv2d(in_channels=hidden_units,
                  out_channels=hidden_units,
                  kernel_size=3,
                  stride=1,
                  padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2))
    self.classifier_layer = nn.Sequential(
        nn.Flatten(),
        nn.Linear(in_features=hidden_units*7*7, out_features=output_shape) # Finding the units for last layer is a mystery
    )

  # Forward Pass Method
  def forward(self, x:torch.Tensor):
    x = self.conv_block_1(x)
    x = self.conv_block_2(x)
    return self.classifier_layer(x)

model_v2 = FashionMnistV2(input_shape=1,hidden_units=10, output_shape=len(class_names)).to(device)

In [None]:
model_v2(test_image.unsqueeze(0).to(device))

In [None]:
model_loss = nn.CrossEntropyLoss() # measures how wrong our model is
model_optimizer = torch.optim.Adam(params = model_v2.parameters(), lr = 0.005) # tries to update our model's parameters to reduce the loss

## Building Training & Testing Step

In [None]:
from tqdm.auto import tqdm
def train_step(model:torch.nn.Module,
               no_of_epochs: int,
               data_loader: torch.utils.data.DataLoader,
               model_loss: torch.nn.Module,
               model_acc,
               model_optimizer: torch.optim,
               device: torch.device = device):
  for epoch in tqdm(range(no_of_epochs)):
    print(f"Epoch {epoch} ----------------------")
    # Training Mode
    model.train()
    train_loss, train_acc = 0,0
    for batch, (X,y) in enumerate(data_loader):
      X, y = X.to(device), y.to(device)

      # Forward pass
      train_pred = model(X)

      # Calculate the training loss
      loss = model_loss(train_pred, y)
      train_loss += loss

      # Calculate the training acc
      train_acc += model_acc(y, train_pred.argmax(dim=1))

      # optimizer zero grad
      model_optimizer.zero_grad()

      # Loss Backward
      loss.backward()

      # Optimizer step
      model_optimizer.step()

      if batch % 400 == 0:
        print(f"Looked Through {batch * len(X)}/{len(data_loader.dataset)} samples")

    print(f"Training Loss per batch {train_loss/len(data_loader)} Train Accuracy per batch {train_acc/len(data_loader)}")

In [None]:
train_step(model_v2, 3, train_dataloader, model_loss, model_accuracy, model_optimizer)

In [None]:
from tqdm.auto import tqdm
def test_step(model:torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               model_loss: torch.nn.Module,
               model_acc,
               device: torch.device = device):
  model.eval()
  with torch.inference_mode():
    test_loss, test_acc = 0,0
    for (X,y) in tqdm(data_loader):
      X, y = X.to(device), y.to(device)

      # Forward pass
      test_pred = model(X)

      # Calculate the training loss
      loss = model_loss(test_pred, y)
      test_loss += loss

      # Calculate the training acc
      test_acc += model_acc(y, test_pred.argmax(dim=1))

    print(f"Testing Loss per batch {test_loss/len(data_loader)} Test Accuracy per batch {test_acc/len(data_loader)}")

In [None]:
test_step(model_v2, test_dataloader, model_loss, model_accuracy)

In [None]:
baseline_model_results = eval_model(baseline_model, test_dataloader, model_loss, model_accuracy)
model_v1_results = eval_model(model_v1, test_dataloader, model_loss, model_accuracy)
model_v2_results = eval_model(model_v2, test_dataloader, model_loss, model_accuracy)

In [None]:
import pandas as pd
df = pd.DataFrame([baseline_model_results, model_v1_results,model_v2_results])
df

In [None]:
df.set_index("model_name")["model_acc"].plot(kind="barh")
plt.xlabel("Accuracy %")
plt.ylabel("Models ")

# Making Predictions

In [None]:
test_images_per_batch, test_labels_per_batch = next(iter(test_dataloader))

In [None]:
random_index = torch.randint(0, len(test_images_per_batch), size=[1]).item()
random_index

In [None]:
test_images_per_batch[random_index].shape

In [None]:
test_prediction = model_v2(test_images_per_batch[random_index].unsqueeze(0).to(device))
test_prediction

In [None]:
plt.imshow(test_images_per_batch[random_index].squeeze())

In [None]:
test_label = test_labels_per_batch[random_index]
print(f"Actual Image class name {class_names[test_label]} | Predicted Image class name is {class_names[test_prediction.argmax(dim=1)]}")

## Even Better Visualization

In [None]:
figure = plt.figure(figsize=(10,7))
rows,cols = 4,4
for i in range(1, rows*cols+1):
  random_index = torch.randint(0, len(test_images_per_batch), size = [1]).item()
  image, label = test_images_per_batch[random_index], test_labels_per_batch[random_index]

  # Make prediction, unsqueeze() to add the batch dimension which is required by the model
  y_logits = model_v2(image.unsqueeze(0).to(device))
  # From logits we pick the index value which has maximum activation value so here from 10(class) values we pick one index with maximum value
  test_prediction_label = y_logits.argmax(dim=1)
  figure.add_subplot(rows,cols,i)
  plt.imshow(image.squeeze())
  plt.title(class_names[test_prediction_label])
  plt.axis(False)