In [None]:
# MobileNetV3 model initiation

import torch
import torch.nn as nn
from torchvision.models import mobilenet_v3_small

class MobileNetV3Classifier(nn.Module):
    def __init__(self, num_classes):
        super(MobileNetV3Classifier, self).__init__()
        self.mobilenet = mobilenet_v3_small(pretrained=True)
        
        in_features = self.mobilenet.classifier[3].in_features
        self.mobilenet.classifier[3] = nn.Linear(in_features, num_classes)
        
    def forward(self, x):
        return self.mobilenet(x)

num_classes = 104
model = MobileNetV3Classifier(num_classes)


In [None]:
# Training dataset - Encoding

import csv

def normalize_label(label):
    return label.strip().lower()

class_name_to_id = {}
with open(r"C:\Users\Hithesh\Downloads\Kaush Stuff\FoodSeg103 Stuff\category_id.txt", 'r', encoding='utf-8') as f:
    for line in f:
        parts = line.strip().split(maxsplit=1)
        if len(parts) == 2:
            idx, name = parts
            class_name_to_id[normalize_label(name)] = int(idx)

num_classes = max(class_name_to_id.values()) + 1

train_labels_encoded = {}
with open(r"C:\Users\Hithesh\Downloads\Kaush Stuff\FoodSeg103_export\train\labels.csv", newline='', encoding='utf-8') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        filename = row['filename']
        labels = row['labels'].split(',')
        label_indices = []
        for label in labels:
            norm_label = normalize_label(label)
            if norm_label in class_name_to_id:
                idx = class_name_to_id[norm_label]
                if idx < num_classes:
                    label_indices.append(idx)
        label_vector = [0] * num_classes
        for idx in label_indices:
            label_vector[idx] = 1
        train_labels_encoded[filename] = label_vector

for k, v in list(train_labels_encoded.items())[:3]:
    print(f"{k}: {v}")


0.jpg: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
1.jpg: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
2.jpg: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [None]:
# Validation dataset - Encoding

import csv

def normalize_label(label):
    return label.strip().lower()

class_name_to_id = {}
with open(r"C:\Users\Hithesh\Downloads\Kaush Stuff\FoodSeg103 Stuff\category_id.txt", 'r', encoding='utf-8') as f:
    for line in f:
        parts = line.strip().split(maxsplit=1)
        if len(parts) == 2:
            idx, name = parts
            class_name_to_id[normalize_label(name)] = int(idx)

num_classes = max(class_name_to_id.values()) + 1

val_labels_encoded = {}
with open(r"C:\Users\Hithesh\Downloads\Kaush Stuff\FoodSeg103_export\validation\labels.csv", newline='', encoding='utf-8') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        filename = row['filename']
        labels = row['labels'].split(',')
        label_indices = []
        for label in labels:
            norm_label = normalize_label(label)
            if norm_label in class_name_to_id:
                idx = class_name_to_id[norm_label]
                if idx < num_classes:
                    label_indices.append(idx)
        label_vector = [0] * num_classes
        for idx in label_indices:
            label_vector[idx] = 1
        val_labels_encoded[filename] = label_vector

for k, v in list(val_labels_encoded.items())[:3]:
    print(f"{k}: {v}")


0.jpg: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
1.jpg: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
2.jpg: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [None]:
# Dataloaders

import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch

class CustomImageDataset(Dataset):
    def __init__(self, image_folder, labels, transform=None):
        self.image_folder = image_folder
        self.labels = labels
        self.transform = transform
        self.image_filenames = list(labels.keys())

    def __len__(self):
        return len(self.image_filenames)

    def __getitem__(self, idx): 
        img_name = self.image_filenames[idx]
        img_path = os.path.join(self.image_folder, img_name)
        image = Image.open(img_path).convert("RGB")
        label = self.labels[img_name]
        if self.transform:
            image = self.transform(image)
        label = torch.tensor(label, dtype=torch.float)
        return image, label

train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
val_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_image_folder = r"C:\Users\Hithesh\Downloads\Kaush Stuff\FoodSeg103_export\train"
val_image_folder = r"C:\Users\Hithesh\Downloads\Kaush Stuff\FoodSeg103_export\validation"

train_dataset = CustomImageDataset(train_image_folder, train_labels_encoded, transform=train_transforms)
val_dataset = CustomImageDataset(val_image_folder, val_labels_encoded, transform=val_transforms)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=0, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=0, pin_memory=True)


In [None]:
# Hyperparameter Tuning

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.models import mobilenet_v3_small
from torch.utils.data import DataLoader
from tqdm import tqdm
import optuna

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = 104

def objective(trial):
    lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
    optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'SGD'])
    batch_size = trial.suggest_categorical('batch_size', [32, 64])

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)

    model = mobilenet_v3_small(pretrained=True)
    model.classifier[3] = nn.Linear(model.classifier[3].in_features, num_classes)
    model.to(device)

    criterion = nn.BCEWithLogitsLoss()
    if optimizer_name == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=lr)
    else:
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)

    epochs = 3
    for epoch in range(epochs):
        model.train()
        for batch in tqdm(train_loader, desc=f"Training Epoch {epoch+1}"):
            if isinstance(batch, (list, tuple)) and len(batch) >= 2:
                inputs, labels = batch[0], batch[1]
            else:
                raise ValueError('Expected batch of (inputs, labels)')
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for batch in val_loader:
            if isinstance(batch, (list, tuple)) and len(batch) >= 2:
                inputs, labels = batch[0], batch[1]
            else:
                raise ValueError('Expected batch of (inputs, labels)')
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = torch.sigmoid(model(inputs))
            preds = (outputs > 0.5).float()
            correct += (preds == labels).sum().item()
            total += torch.numel(labels)

    val_acc = correct / total
    return val_acc

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)

print("Best hyperparameters:", study.best_params)
print(f"Best validation accuracy: {study.best_value:.4f}")


In [None]:
# Model Training

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.models import mobilenet_v3_small
from tqdm import tqdm
from collections import OrderedDict

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def load_class_names(txt_path):
    with open(txt_path, 'r', encoding='utf-8') as f:
        return [line.strip() for line in f.readlines()]

class_names = load_class_names(r"C:\Users\Hithesh\Downloads\Kaush Stuff\FoodSeg103 Stuff\category_id.txt")

num_classes = len(class_names)
model = mobilenet_v3_small(pretrained=False)
model.classifier[3] = nn.Linear(model.classifier[3].in_features, num_classes)

checkpoint_path = r"C:\Users\Hithesh\Downloads\Kaush Stuff\mobilenetv3_food_classifier.pth"
checkpoint = torch.load(checkpoint_path, map_location=device)
new_state_dict = OrderedDict()
for k, v in checkpoint.items():
    new_key = k[len('mobilenet.'):] if k.startswith('mobilenet.') else k
    new_state_dict[new_key] = v
model.load_state_dict(new_state_dict, strict=False)  
model.to(device)

criterion = nn.BCEWithLogitsLoss()
learning_rate = 1e-4
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

num_extra_epochs = 20

for epoch in range(num_extra_epochs):
    model.train()
    running_loss = 0.0
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch + 1}/{num_extra_epochs}", leave=False)
    for inputs, labels in progress_bar:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        avg_loss = running_loss / (progress_bar.n if progress_bar.n > 0 else 1)
        progress_bar.set_postfix(loss=avg_loss)

    print(f"Epoch {epoch + 1} training loss: {running_loss / len(train_loader):.4f}")

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
    print(f"Epoch {epoch + 1} validation loss: {val_loss / len(val_loader):.4f}")

torch.save(model.state_dict(), "mobilenetv3_food_classifier_finetuned_more_epochs.pth")


                                                                          

Epoch 1 training loss: 0.0719
Epoch 1 validation loss: 0.0820


                                                                          

Epoch 2 training loss: 0.0687
Epoch 2 validation loss: 0.0817


                                                                          

Epoch 3 training loss: 0.0683
Epoch 3 validation loss: 0.0817


                                                                          

Epoch 4 training loss: 0.0668
Epoch 4 validation loss: 0.0817


                                                                          

Epoch 5 training loss: 0.0658
Epoch 5 validation loss: 0.0821


                                                                          

Epoch 6 training loss: 0.0651
Epoch 6 validation loss: 0.0821


                                                                          

Epoch 7 training loss: 0.0648
Epoch 7 validation loss: 0.0826


                                                                          

Epoch 8 training loss: 0.0640
Epoch 8 validation loss: 0.0830


                                                                          

Epoch 9 training loss: 0.0635
Epoch 9 validation loss: 0.0831


                                                                           

Epoch 10 training loss: 0.0622
Epoch 10 validation loss: 0.0832


                                                                           

Epoch 11 training loss: 0.0622
Epoch 11 validation loss: 0.0835


                                                                           

Epoch 12 training loss: 0.0619
Epoch 12 validation loss: 0.0836


                                                                           

Epoch 13 training loss: 0.0614
Epoch 13 validation loss: 0.0837


                                                                           

Epoch 14 training loss: 0.0609
Epoch 14 validation loss: 0.0839


                                                                           

Epoch 15 training loss: 0.0601
Epoch 15 validation loss: 0.0841


                                                                           

Epoch 16 training loss: 0.0594
Epoch 16 validation loss: 0.0846


                                                                           

Epoch 17 training loss: 0.0597
Epoch 17 validation loss: 0.0850


                                                                           

Epoch 18 training loss: 0.0589
Epoch 18 validation loss: 0.0852


                                                                           

Epoch 19 training loss: 0.0586
Epoch 19 validation loss: 0.0853


                                                                           

Epoch 20 training loss: 0.0577
Epoch 20 validation loss: 0.0852


In [None]:
# Saving Model

torch.save(model.state_dict(), "final_image_classifier.pth")
