In [1]:
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [2]:
import pandas as pd
import os

df = pd.read_csv("training_metadata.csv")
df = df.dropna(subset=["filename", "category"])
df = df[df["filename"].apply(os.path.exists)]

In [3]:
counts = df["category"].value_counts()
print(df["category"].nunique())

valid_labels = counts[counts >= 2].index
df = df[df["category"].isin(valid_labels)].reset_index(drop=True)

print(df["category"].nunique())

10
10


In [4]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
df["label"] = label_encoder.fit_transform(df["category"])
num_classes = df["label"].nunique()


In [5]:
import json

label_map = {
    label: int(index)
    for label, index in zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_))
}

with open("label_map_category.json", "w") as f:
    json.dump(label_map, f, indent=2)


In [6]:
from sklearn.model_selection import train_test_split

train_df, val_df = train_test_split(df, test_size=0.2, stratify=df["label"], random_state=42)


In [7]:
from torchvision import transforms
from torch.utils.data import Dataset
from PIL import Image
import torch

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

class GroceryDataset(Dataset):
    def __init__(self, df, transform):
        self.df = df.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image = Image.open(row["filename"]).convert("RGB")
        image = self.transform(image)
        label = row["label"]
        return image, label


In [8]:
from torch.utils.data import DataLoader
from torchvision import models
import torch.nn as nn

# Pretrained ResNet
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_ds = GroceryDataset(train_df, transform)
val_ds = GroceryDataset(val_df, transform)

train_loader = DataLoader(
    train_ds,
    batch_size=32,
    shuffle=True,
    pin_memory=True,   # Speeds up transfer to GPU
    pin_memory_device="cuda" if torch.cuda.is_available() else "cpu"
)

val_loader = DataLoader(val_ds, batch_size=32, pin_memory=True,   # Speeds up transfer to GPU
    pin_memory_device="cuda" if torch.cuda.is_available() else "cpu")




In [9]:
import mlflow
import mlflow.pytorch
import torch
import torch.nn as nn
import torch.optim as optim

# num_epochs = 20
# criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters(), lr=1e-4)
def train_model(model_name="resnet18", optimizer_name="adam", num_epochs=10, lr=1e-4):
    model = getattr(models, model_name)(pretrained=True)
    if "resnet" in model_name or "resnext" in model_name or "wide_resnet" in model_name:
        model.fc = nn.Linear(model.fc.in_features, df["label"].max().item() + 1)
    elif "densenet" in model_name:
        model.classifier = nn.Linear(model.classifier.in_features, df["label"].max().item() + 1)
    elif "mobilenet" in model_name or "efficientnet" in model_name:
        model.classifier[1] = nn.Linear(model.classifier[1].in_features, df["label"].max().item() + 1)
    else:
        raise ValueError(f"Model head replacement not implemented for: {model_name}")

    criterion = nn.CrossEntropyLoss()
    if optimizer_name == "adam":
        optimizer = optim.Adam(model.parameters(), lr=lr)
    elif optimizer_name == "sgd":
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
    else:
        raise ValueError(f"Unsupported optimizer: {optimizer_name}")

    model = model.to(device)

    mlflow.set_experiment("grocery_classifier")
    best_train_loss = float('inf')
    best_epoch = -1
    print(f"{model_name} initialized and ready for training.")
    with mlflow.start_run(run_name=f"{model_name}_{optimizer_name}") as run:
        mlflow.log_params({
            "learning_rate": lr,
            "optimizer": optimizer_name,
            "epochs": num_epochs,
            "model": model_name,
        })

        for epoch in range(num_epochs):
            model.train()
            train_loss = 0
            for images, labels in train_loader:
                images, labels = images.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = model(images)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()

            model.eval()
            val_loss = 0
            correct = 0
            total = 0
            with torch.no_grad():
                for images, labels in val_loader:
                    images, labels = images.to(device), labels.to(device)
                    outputs = model(images)
                    loss = criterion(outputs, labels)
                    val_loss += loss.item()
                    _, preds = torch.max(outputs, 1)
                    correct += (preds == labels).sum().item()
                    total += labels.size(0)

            val_acc = correct / total
            print(f"Epoch {epoch+1} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")

            mlflow.log_metric("train_loss", train_loss, step=epoch)
            mlflow.log_metric("val_loss", val_loss, step=epoch)
            mlflow.log_metric("val_acc", val_acc, step=epoch)
            dir_path = f"./assets/models/{run.info.run_name}"
            os.makedirs(dir_path, exist_ok=True)
            model_path = f"{dir_path}/model_epoch_{epoch+1}.pth"
            torch.save(model.state_dict(), model_path)

            mlflow.log_artifact(model_path, artifact_path="checkpoints")

            if train_loss < best_train_loss:
                best_train_loss = train_loss
                best_epoch = epoch + 1
                mlflow.pytorch.log_model(model, artifact_path=f"best_model_{epoch+1}")

        mlflow.set_tag("best_epoch", best_epoch)
        mlflow.set_tag("best_train_loss", round(best_train_loss, 4))


In [10]:
from torchvision import models
print(dir(models))


['AlexNet', 'AlexNet_Weights', 'ConvNeXt', 'ConvNeXt_Base_Weights', 'ConvNeXt_Large_Weights', 'ConvNeXt_Small_Weights', 'ConvNeXt_Tiny_Weights', 'DenseNet', 'DenseNet121_Weights', 'DenseNet161_Weights', 'DenseNet169_Weights', 'DenseNet201_Weights', 'EfficientNet', 'EfficientNet_B0_Weights', 'EfficientNet_B1_Weights', 'EfficientNet_B2_Weights', 'EfficientNet_B3_Weights', 'EfficientNet_B4_Weights', 'EfficientNet_B5_Weights', 'EfficientNet_B6_Weights', 'EfficientNet_B7_Weights', 'EfficientNet_V2_L_Weights', 'EfficientNet_V2_M_Weights', 'EfficientNet_V2_S_Weights', 'GoogLeNet', 'GoogLeNetOutputs', 'GoogLeNet_Weights', 'Inception3', 'InceptionOutputs', 'Inception_V3_Weights', 'MNASNet', 'MNASNet0_5_Weights', 'MNASNet0_75_Weights', 'MNASNet1_0_Weights', 'MNASNet1_3_Weights', 'MaxVit', 'MaxVit_T_Weights', 'MobileNetV2', 'MobileNetV3', 'MobileNet_V2_Weights', 'MobileNet_V3_Large_Weights', 'MobileNet_V3_Small_Weights', 'RegNet', 'RegNet_X_16GF_Weights', 'RegNet_X_1_6GF_Weights', 'RegNet_X_32GF_

In [11]:
train_model("resnet50",num_epochs=20)



resnet50 initialized and ready for training.




Epoch 1 | Train Loss: 808.8885 | Val Loss: 156.3222 | Val Acc: 0.7326




Epoch 2 | Train Loss: 489.1387 | Val Loss: 140.0333 | Val Acc: 0.7645




Epoch 3 | Train Loss: 306.9470 | Val Loss: 152.5475 | Val Acc: 0.7611




Epoch 4 | Train Loss: 214.2102 | Val Loss: 156.7749 | Val Acc: 0.7696




Epoch 5 | Train Loss: 158.1197 | Val Loss: 159.0090 | Val Acc: 0.7847




Epoch 6 | Train Loss: 138.1483 | Val Loss: 177.1964 | Val Acc: 0.7726




Epoch 7 | Train Loss: 122.7730 | Val Loss: 180.0790 | Val Acc: 0.7786




Epoch 8 | Train Loss: 113.5063 | Val Loss: 180.7715 | Val Acc: 0.7831




Epoch 9 | Train Loss: 104.6810 | Val Loss: 178.5060 | Val Acc: 0.7827




Epoch 10 | Train Loss: 90.2872 | Val Loss: 194.8541 | Val Acc: 0.7777




Epoch 11 | Train Loss: 99.5829 | Val Loss: 193.8388 | Val Acc: 0.7768
Epoch 12 | Train Loss: 83.1779 | Val Loss: 230.0286 | Val Acc: 0.7497




Epoch 13 | Train Loss: 84.5225 | Val Loss: 209.8439 | Val Acc: 0.7775
Epoch 14 | Train Loss: 74.6587 | Val Loss: 201.7306 | Val Acc: 0.7880




Epoch 15 | Train Loss: 73.2431 | Val Loss: 210.7295 | Val Acc: 0.7860




Epoch 16 | Train Loss: 81.4401 | Val Loss: 212.1212 | Val Acc: 0.7824
Epoch 17 | Train Loss: 63.7487 | Val Loss: 218.7478 | Val Acc: 0.7808




Epoch 18 | Train Loss: 73.2874 | Val Loss: 208.0762 | Val Acc: 0.7864
Epoch 19 | Train Loss: 63.7374 | Val Loss: 232.1293 | Val Acc: 0.7687




Epoch 20 | Train Loss: 69.9755 | Val Loss: 217.8964 | Val Acc: 0.7863
