<a href="https://colab.research.google.com/github/HoneyBohra26/Fashion-Forward-MNIST-Model-Optimization-and-Deployment/blob/main/Fashion_mnist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install hyperopt



In [None]:
pip install mlflow




In [None]:
import mlflow
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn.functional as F

import torch
import torch.nn as nn
from torch.autograd import Variable

import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import StepLR

from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score
import os
import matplotlib.pyplot as plt
import seaborn as sns

from hyperopt import fmin, tpe, hp, STATUS_OK, Trials



In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


In [None]:
train_set = torchvision.datasets.FashionMNIST("./data", download=True, transform=
                                                transforms.Compose([transforms.ToTensor()]))
test_set = torchvision.datasets.FashionMNIST("./data", download=True, train=False, transform=
                                               transforms.Compose([transforms.ToTensor() ]))

In [None]:
batch_size = 100
train_loader = torch.utils.data.DataLoader(train_set,
                                           batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(test_set,
                                          batch_size=batch_size)

train_loss_list = []
train_accuracy_list = []

test_loss_list = []
test_accuracy_list = []
global best_test_accuracy
best_test_accuracy = 0

In [None]:
def output_label(label):
    output_mapping = {
                 0: "T-shirt/Top",
                 1: "Trouser",
                 2: "Pullover",
                 3: "Dress",
                 4: "Coat",
                 5: "Sandal",
                 6: "Shirt",
                 7: "Sneaker",
                 8: "Bag",
                 9: "Ankle Boot"
                 }
    input = (label.item() if type(label) == torch.Tensor else label)
    return output_mapping[input]

In [None]:
class FashionCNN(nn.Module):

    def __init__(self):
        super(FashionCNN, self).__init__()

        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        self.fc1 = nn.Linear(in_features=64*6*6, out_features=600)
        self.drop = nn.Dropout(0.25)
        self.fc2 = nn.Linear(in_features=600, out_features=120)
        self.fc3 = nn.Linear(in_features=120, out_features=10)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)
        out = self.fc1(out)
        out = self.drop(out)
        out = self.fc2(out)
        out = self.fc3(out)

        return out

In [None]:
def validate_model(model,test_loader,checkpoint_dir,epoch,best_test_accuracy):

      error = nn.CrossEntropyLoss()

      labels_list2 = []
      predictions_list2 = []
      correct = 0
      total2 = 0
      running_loss2 = 0

      with torch.inference_mode():

        for images, labels in test_loader:
          images, labels = images.to(device), labels.to(device)
          labels_list2.append(labels)

          if images.shape[1] == 1:
            images = images.repeat(1, 3, 1, 1)  # Repeat the grayscale channel 3 times

          test = (images.view(100, 3,28, 28))

          outputs = model(test)
          loss = error(outputs, labels)
          running_loss2 += loss.item() * images.size(0)

          predictions = torch.max(outputs, 1)[1].to(device)
          predictions_list2.append(predictions)
          correct += (predictions == labels).sum()

          total2 += len(labels)

        labels_cpu2 = torch.cat(labels_list2).cpu().numpy() # Move to CPU and convert to NumPy
        predictionss_cpu2 = torch.cat(predictions_list2).cpu().numpy() # Move to CPU and convert to NumPy


        test_accuracy = accuracy_score(labels_cpu2, predictionss_cpu2)
        test_loss = running_loss2 / total2
        print("Test_Loss: {},Test_Accuracy: {}%".format(test_loss, test_accuracy))
        mlflow.log_metrics({"Test_Accuracy": float(test_accuracy), "Test_Loss": float(test_loss)})

        # Save model checkpoint if test accuracy improves
        if test_accuracy > best_test_accuracy:
            best_test_accuracy = test_accuracy
            checkpoint_path = os.path.join(checkpoint_dir, f'model_epoch_{epoch + 1}.pth')
            torch.save(model.state_dict(), checkpoint_path)
            print(f'Model checkpoint saved at {checkpoint_path}')

        test_loss_list.append(test_loss)
        test_accuracy_list.append(test_accuracy)












In [None]:
def log_model_n_params(model,labels_list,predictions_list,params):

    labels_cpu = torch.cat(labels_list).cpu().numpy() # Move to CPU and convert to NumPy
    predictionss_cpu = torch.cat(predictions_list).cpu().numpy() # Move to CPU and convert to NumPy

    mlflow.pytorch.log_model(
    pytorch_model=model,
    artifact_path="model1",
    signature=None,
    registered_model_name="model1",)

    # Log confusion matrix (example assuming you convert it to JSON)
    cm = confusion_matrix(labels_cpu, predictionss_cpu)
    cm_json = {'confusion_matrix': cm.tolist()}  # Convert to JSON or other suitable format
    mlflow.log_param('confusion_matrix', cm_json)

    # Log parameters
    mlflow.log_params(params)

    # plot and log confusion matrix
    plt.figure(figsize=(12, 12))
    sns.heatmap(cm_json['confusion_matrix'], annot=True, cmap='coolwarm', square=True)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.show()
    plt.savefig('confusion_matrix.png')
    mlflow.log_artifact('confusion_matrix.png')

    print(cm_json.keys())
    print(cm_json['confusion_matrix'])




In [None]:
def evaluation_metrics_n_Hyperparameters(labels_list,predictions_list,train_loss,epoch):
      # Calculate evaluation metrics
      labels_cpu = torch.cat(labels_list).cpu().numpy() # Move to CPU and convert to NumPy
      predictionss_cpu = torch.cat(predictions_list).cpu().numpy() # Move to CPU and convert to NumPy


      accuracy = accuracy_score(labels_cpu, predictionss_cpu)
      precision = precision_score(labels_cpu, predictionss_cpu, average='macro')
      recall = recall_score(labels_cpu, predictionss_cpu, average='macro')

      # Log metrics

      mlflow.log_metric('accuracy', accuracy)
      mlflow.log_metric('precision', precision)
      mlflow.log_metric('recall', recall)
      mlflow.log_metric('loss', train_loss)

      print("epoch: {}, Loss: {}, Accuracy: {}%".format(epoch, train_loss, accuracy))

      train_loss_list.append(train_loss)
      train_accuracy_list.append(accuracy)


In [None]:
def plot_loss_accuracy(train_loss_list, train_accuracy_list, test_loss_list, test_accuracy_list,num_epochs):

    epochs = range(0, num_epochs )

    plt.figure(figsize=(12, 5))

    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_loss_list, 'b', label='Training Loss')
    plt.plot(epochs, test_loss_list, 'r', label='Test Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.title('Training and Test Loss')
    plt.savefig('Training and Test Loss.png')
    mlflow.log_artifact('Training and Test Loss.png')


    plt.subplot(1, 2, 2)
    plt.plot(epochs, train_accuracy_list, 'b', label='Training Accuracy')
    plt.plot(epochs, test_accuracy_list, 'r', label='Test Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.title('Training and Test Accuracy')
    plt.savefig('Training and Test Accuracy.png')
    mlflow.log_artifact('Training and Test Accuracy.png')

    plt.show()




In [None]:
def train_model(model,train_loader,params,scheduler):

    # hyperparameters
    learning_rate = params['learning_rate']
    num_epochs = int(params['num_epochs'])
    optimizer = params['optimizer']
    batch_size = int(params['batch_size'])
    error = nn.CrossEntropyLoss()

    if optimizer_name == 'Adam':
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    elif optimizer_name == 'SGD':
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

    count = 0
    # Lists for knowing classwise accuracy
    predictions_list = []
    labels_list = []

    running_loss = 0
    total = 0

    # Directory to save model checkpoints
    checkpoint_dir = 'checkpoints'
    os.makedirs(checkpoint_dir, exist_ok=True)


    for epoch in range(num_epochs):

      for images, labels in train_loader:

          # Transfering images and labels to GPU if available
          images, labels = images.to(device), labels.to(device)

          if images.shape[1] == 1:
           images = images.repeat(1, 3, 1, 1)  # Repeat the grayscale channel 3 times

          train = (images.view(100, 3, 28, 28))
          labels = (labels)


          # Forward pass
          outputs = model(train)
          loss = error(outputs, labels)
          running_loss += loss.item() * images.size(0)
          predictions = torch.max(outputs, 1)[1].to(device)
          predictions_list.append(predictions)
          labels_list.append(labels)

          # Initializing a gradient as 0 so there is no mixing of gradient among the batches
          optimizer.zero_grad()

          #Propagating the error backward
          loss.backward()

          # Optimizing the parameters
          optimizer.step()

          # Total
          total += len(labels)

      # loss calculation
      train_loss = running_loss / total

      scheduler.step()  # Update learning rate

      # validation
      validate_model(model,test_loader,checkpoint_dir,epoch,best_test_accuracy)

      #logging and evaluating metrics_n_Hyperparameters
      evaluation_metrics_n_Hyperparameters(labels_list,predictions_list,train_loss,epoch)

    #logging the model
    log_model_n_params(model,labels_list,predictions_list,params)
    #plotting loss and accuracy
    plot_loss_accuracy(train_loss_list, train_accuracy_list, test_loss_list, test_accuracy_list,num_epochs)

    return model














In [None]:
def objective(params):
    """Objective function for hyperparameter optimization."""
    learning_rate = params['learning_rate']
    num_epochs = int(params['num_epochs'])
    optimizer_name = params['optimizer']
    batch_size = int(params['batch_size'])

    count = 0
    # Lists for knowing classwise accuracy
    predictions_list = []
    labels_list = []

    running_loss = 0
    total = 0


    for epoch in range(num_epochs):

      for images, labels in train_loader:

          # Transfering images and labels to GPU if available
          images, labels = images.to(device), labels.to(device)

          if images.shape[1] == 1:
           images = images.repeat(1, 3, 1, 1)  # Repeat the grayscale channel 3 times

          train = (images.view(100, 3, 28, 28))
          labels = (labels)


          # Forward pass
          outputs = model(train)
          loss = error(outputs, labels)
          running_loss += loss.item() * images.size(0)
          predictions = torch.max(outputs, 1)[1].to(device)
          predictions_list.append(predictions)
          labels_list.append(labels)

          # Initializing a gradient as 0 so there is no mixing of gradient among the batches
          optimizer.zero_grad()

          #Propagating the error backward
          loss.backward()

          # Optimizing the parameters
          optimizer.step()

          # Total
          total += len(labels)

      # loss calculation
      train_loss = running_loss / total

      scheduler.step()  # Update learning rate

    # validation
    labels_list2 = []
    predictions_list2 = []
    correct = 0
    total2 = 0
    running_loss2 = 0

    with torch.inference_mode():

      for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        labels_list2.append(labels)

        if images.shape[1] == 1:
          images = images.repeat(1, 3, 1, 1)  # Repeat the grayscale channel 3 times

        test = (images.view(100, 3,28, 28))

        outputs = model(test)
        loss = error(outputs, labels)
        running_loss2 += loss.item() * images.size(0)

        predictions = torch.max(outputs, 1)[1].to(device)
        predictions_list2.append(predictions)
        correct += (predictions == labels).sum()

        total2 += len(labels)

      labels_cpu2 = torch.cat(labels_list2).cpu().numpy() # Move to CPU and convert to NumPy
      predictionss_cpu2 = torch.cat(predictions_list2).cpu().numpy() # Move to CPU and convert to NumPy


      val_accuracy = accuracy_score(labels_cpu2, predictionss_cpu2)
      val_loss = running_loss2 / total2

    return {'loss': val_loss, 'status': STATUS_OK}


In [None]:
mlflow.set_experiment("MNIST_fashion model_resnet")

with mlflow.start_run() as run:


  model = torchvision.models.resnet18(pretrained=True)
  num_ftrs = model.fc.in_features

  model.fc = nn.Linear(num_ftrs, 10)

  model = model.to(device)

  # error = nn.CrossEntropyLoss()
  # learning_rate = 0.001
  # optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
  # scheduler = StepLR(optimizer, step_size=3, gamma=0.1)  # Decay LR by a factor of 0.1 every 3 epochs
  # num_epochs = 6

#   params = {
#     "learning_rate": learning_rate,
#     "num_epochs": num_epochs,
#     "optimizer": optimizer,
#     "batch_size": batch_size,
#     "Activation" : "ReLU",
#     "Loss_Function" : error
# }

  # Define the search space for hyperparameters
  search_space = {
        'learning_rate': hp.loguniform('learning_rate', -6, -1),
        'num_epochs': hp.quniform('num_epochs', 1, 3, 1),
        'optimizer': hp.choice('optimizer', ['Adam', 'SGD']),
        'batch_size': hp.choice('batch_size', [32, 64, 128]),
    }

  # Initialize Trials object to store results
  trials = Trials()

    # Run hyperparameter optimization
  best_params = fmin(
      fn=objective,
      space=search_space,
      algo=tpe.suggest,
      max_evals=1,  # Number of evaluations to run
      trials=trials,
  )

  print("Best hyperparameters:", best_params)

  model =  train_model(model,train_loader,best_params,scheduler)

#   model_conv = torchvision.models.resnet18(pretrained=True)
#   for param in model_conv.parameters():
#       param.requires_grad = False

#   num_ftrs = model_conv.fc.in_features
#   model_conv.fc = nn.Linear(num_ftrs, 10)

#   model_conv = model_conv.to(device)

#   model_conv =  train_model(model_conv,train_loader)

mlflow.end_run()



100%|██████████| 2/2 [01:01<00:00, 30.65s/trial, best loss: 0.29014128014445306]
Best hyperparameters: {'batch_size': 1, 'learning_rate': 0.01681145159632138, 'num_epochs': 2.0, 'optimizer': 0}


AttributeError: 'numpy.int64' object has no attribute 'zero_grad'

In [None]:
# with mlflow.start_run() as run:

#   model = FashionCNN()
#   model.to(device)

#   error = nn.CrossEntropyLoss()

#   learning_rate = 0.001
#   optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

#   num_epochs = 10
#   count = 0
#   # Lists for visualization of loss and accuracy
#   loss_list = []
#   iteration_list = []
#   accuracy_list = []

#   # Lists for knowing classwise accuracy
#   predictions_list = []
#   labels_list = []

#   for epoch in range(num_epochs):
#     for images, labels in train_loader:
#         # Transfering images and labels to GPU if available
#         images, labels = images.to(device), labels.to(device)

#         train = Variable(images.view(100, 1, 224, 224))
#         labels = Variable(labels)

#         # Forward pass
#         outputs = model(train)
#         loss = error(outputs, labels)

#         # Initializing a gradient as 0 so there is no mixing of gradient among the batches
#         optimizer.zero_grad()

#         #Propagating the error backward
#         loss.backward()

#         # Optimizing the parameters
#         optimizer.step()

#         count += 1
#         # Testing the model
#         with torch.inference_mode():

#           if not (count % 50):    # It's same as "if count % 50 == 0"
#               total = 0
#               correct = 0

#               for images, labels in test_loader:
#                   images, labels = images.to(device), labels.to(device)
#                   labels_list.append(labels)

#                   test = Variable(images.view(100, 1,224, 224))

#                   outputs = model(test)

#                   predictions = torch.max(outputs, 1)[1].to(device)
#                   predictions_list.append(predictions)
#                   correct += (predictions == labels).sum()

#                   total += len(labels)



#               accuracy = correct * 100 / total
#               loss_list.append(loss.data)
#               iteration_list.append(count)
#               accuracy_list.append(accuracy)



#           if not (count % 500):
#               print("Iteration: {}, Loss: {}, Accuracy: {}%".format(count, loss.data, accuracy))
#               mlflow.log_metrics({"accuracy": float(accuracy), "loss": float(loss.data)})




#   mlflow.pytorch.log_model(
#     pytorch_model=model,
#     artifact_path="model1",
#     signature=None,
#     registered_model_name="model1",
# )