In [1]:
# MobileNetV3 model initiation

import torch
from torchvision.models import mobilenet_v3_small
import torch.nn as nn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_classes = 104

model = mobilenet_v3_small(pretrained=False)
model.classifier[3] = nn.Linear(model.classifier[3].in_features, num_classes)

model.load_state_dict(torch.load('final_image_classifier.pth', map_location=device))

model.to(device)
model.eval()




MobileNetV3(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): Hardswish()
    )
    (1): InvertedResidual(
      (block): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): SqueezeExcitation(
          (avgpool): AdaptiveAvgPool2d(output_size=1)
          (fc1): Conv2d(16, 8, kernel_size=(1, 1), stride=(1, 1))
          (fc2): Conv2d(8, 16, kernel_size=(1, 1), stride=(1, 1))
          (activation): ReLU()
          (scale_activation): Hardsigmoid()
        )
        (2): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(1, 1), 

In [2]:
# Training dataset - Encoding

import csv

def normalize_label(label):
    return label.strip().lower()

class_name_to_id = {}
with open(r"C:\Users\kaush\Documents\ASE-ECE\Sem 5\AIML\FoodSeg103 Stuff\category_id.txt", 'r', encoding='utf-8') as f:
    for line in f:
        parts = line.strip().split(maxsplit=1)
        if len(parts) == 2:
            idx, name = parts
            class_name_to_id[normalize_label(name)] = int(idx)

num_classes = max(class_name_to_id.values()) + 1

train_labels_encoded = {}
with open(r"C:\Users\kaush\Documents\ASE-ECE\Sem 5\AIML\FoodSeg103 Stuff\Kaush Stuff\FoodSeg103_export\train\labels.csv", newline='', encoding='utf-8') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        filename = row['filename']
        labels = row['labels'].split(',')
        label_indices = []
        for label in labels:
            norm_label = normalize_label(label)
            if norm_label in class_name_to_id:
                idx = class_name_to_id[norm_label]
                if idx < num_classes:
                    label_indices.append(idx)
                else:
                    print(f"Warning: Index {idx} for class '{norm_label}' out of bounds for image {filename}")
            else:
                print(f"Warning: Class '{norm_label}' not found in mapping for image {filename}")
                print("Check for trailing spaces, capitalization, or typos!")
        label_vector = [0] * num_classes
        for idx in label_indices:
            label_vector[idx] = 1
        train_labels_encoded[filename] = label_vector

for k, v in list(train_labels_encoded.items())[:3]:
    print(f"{k}: {v}")


0.jpg: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
1.jpg: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
2.jpg: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [3]:
# Validation dataset - Encoding

import csv

def normalize_label(label):
    return label.strip().lower()

class_name_to_id = {}
with open(r"C:\Users\kaush\Documents\ASE-ECE\Sem 5\AIML\FoodSeg103 Stuff\category_id.txt", 'r', encoding='utf-8') as f:
    for line in f:
        parts = line.strip().split(maxsplit=1)
        if len(parts) == 2:
            idx, name = parts
            class_name_to_id[normalize_label(name)] = int(idx)

num_classes = max(class_name_to_id.values()) + 1

val_labels_encoded = {}
with open(r"C:\Users\kaush\Documents\ASE-ECE\Sem 5\AIML\FoodSeg103 Stuff\Kaush Stuff\FoodSeg103_export\validation\labels.csv", newline='', encoding='utf-8') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        filename = row['filename']
        labels = row['labels'].split(',')
        label_indices = []
        for label in labels:
            norm_label = normalize_label(label)
            if norm_label in class_name_to_id:
                idx = class_name_to_id[norm_label]
                if idx < num_classes:
                    label_indices.append(idx)
                else:
                    print(f"Warning: Index {idx} for class '{norm_label}' out of bounds for image {filename}")
            else:
                print(f"Warning: Class '{norm_label}' not found in mapping for image {filename}")
                print("Check for trailing spaces, capitalization, or typos!")
        label_vector = [0] * num_classes
        for idx in label_indices:
            label_vector[idx] = 1
        val_labels_encoded[filename] = label_vector

for k, v in list(val_labels_encoded.items())[:3]:
    print(f"{k}: {v}")


0.jpg: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
1.jpg: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
2.jpg: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [None]:
# Dataloaders

import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch

class CustomImageDataset(Dataset):
    def __init__(self, image_folder, labels, transform=None):
        self.image_folder = image_folder
        self.labels = labels
        self.transform = transform
        self.image_filenames = list(labels.keys())

    def __len__(self):
        return len(self.image_filenames)

    def __getitem__(self, idx):  
        img_name = self.image_filenames[idx]
        img_path = os.path.join(self.image_folder, img_name)
        image = Image.open(img_path).convert("RGB")
        label = self.labels[img_name]
        if self.transform:
            image = self.transform(image)
        label = torch.tensor(label, dtype=torch.float)  
        return image, label

train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
val_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_image_folder = r"C:\Users\kaush\Documents\ASE-ECE\Sem 5\AIML\FoodSeg103 Stuff\Kaush Stuff\FoodSeg103_export\train"
val_image_folder = r"C:\Users\kaush\Documents\ASE-ECE\Sem 5\AIML\FoodSeg103 Stuff\Kaush Stuff\FoodSeg103_export\validation"

train_dataset = CustomImageDataset(train_image_folder, train_labels_encoded, transform=train_transforms)
val_dataset = CustomImageDataset(val_image_folder, val_labels_encoded, transform=val_transforms)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=0, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=0, pin_memory=True)

In [None]:
# Model Evaluation Function

import torch
import numpy as np
from sklearn.metrics import precision_recall_fscore_support, accuracy_score, multilabel_confusion_matrix
from tqdm import tqdm

def load_class_names(txt_path):
    with open(txt_path, 'r', encoding='utf-8') as f:
        class_names = [line.strip() for line in f if line.strip()]
    return class_names

def evaluate_model(model, class_txt_path):
    class_names = load_class_names(class_txt_path)
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in tqdm(val_loader, desc="Evaluating model"):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = torch.sigmoid(model(inputs))
            preds = (outputs > 0.5).float()

            all_preds.append(preds.cpu().numpy())
            all_labels.append(labels.cpu().numpy())

    all_preds = np.vstack(all_preds)
    all_labels = np.vstack(all_labels)

    overall_acc = accuracy_score(all_labels.flatten(), all_preds.flatten())
    precision, recall, f1, support = precision_recall_fscore_support(all_labels, all_preds, average='weighted')

    print(f"Overall Accuracy: {overall_acc:.4f}")
    print(f"Weighted Precision: {precision:.4f}")
    print(f"Weighted Recall: {recall:.4f}")
    print(f"Weighted F1 Score: {f1:.4f}\n")

    print("Confusion Matrix (multilabel):")
    cm = multilabel_confusion_matrix(all_labels, all_preds)
    for idx, matrix in enumerate(cm):
        print(f"Class {class_names[idx]}:")
        print(matrix)
        print()


In [12]:
# Evaluating model
evaluate_model(model, r"C:\Users\kaush\Documents\ASE-ECE\Sem 5\AIML\FoodSeg103 Stuff\category_id.txt")

Evaluating model: 100%|██████████| 67/67 [00:29<00:00,  2.24it/s]

Overall Accuracy: 0.9738
Weighted Precision: 0.7228
Weighted Recall: 0.5483
Weighted F1 Score: 0.6059

Confusion Matrix (multilabel):
Class 0	background:
[[   0    0]
 [   0 2135]]

Class 1	candy:
[[2123    1]
 [  10    1]]

Class 2	egg tart:
[[2134    0]
 [   1    0]]

Class 3	french fries:
[[2049    8]
 [  49   29]]

Class 4	chocolate:
[[2115    2]
 [  17    1]]

Class 5	biscuit:
[[2038   20]
 [  57   20]]

Class 6	popcorn:
[[2130    1]
 [   3    1]]

Class 7	pudding:
[[2133    1]
 [   1    0]]

Class 8	ice cream:
[[1793   65]
 [ 144  133]]

Class 9	cheese butter:
[[1997   18]
 [ 110   10]]

Class 10	cake:
[[1980   29]
 [  80   46]]

Class 11	wine:
[[2086    8]
 [  36    5]]

Class 12	milkshake:
[[2097   11]
 [  11   16]]

Class 13	coffee:
[[2071    7]
 [  32   25]]

Class 14	juice:
[[2073    8]
 [  39   15]]

Class 15	milk:
[[2103    4]
 [  19    9]]

Class 16	tea:
[[2129    0]
 [   5    1]]

Class 17	almond:
[[2104    7]
 [  21    3]]

Class 18	red beans:
[[2121    0]
 [  13    1]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
