**This is the final model we worked on after trying several pretrained models  and. We found that EfficientNet-B3 gave the best results, and after further tuning, we reached an F1-score of 0.51049, slightly after the competition ended.**
**The code has been updated constantly throughout the work, so this is mainly the final version of the code and the steps we took to build this model.**

In [None]:
#1.Mounting Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#2. Importing Required Libraries and setting file path
import os
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torchvision.models.efficientnet import efficientnet_b3, EfficientNet_B3_Weights
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

#3. Setting File Paths and Parameters
BASE_DIR = "/content/drive/MyDrive/food-recognition"
TRAIN_DIR = os.path.join(BASE_DIR, "images_train")
TEST_DIR = os.path.join(BASE_DIR, "images_test")
LABEL_FILE = os.path.join(BASE_DIR, "train_onehot.csv")

#setting values

BATCH_SIZE = 32
EPOCHS = 25
THRESHOLD = 0.25
FOLDS = 3

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
#Loading Label CSV and Process
df = pd.read_csv(LABEL_FILE)
df['Filename'] = df['Filename'].str.strip()  # removing extra spaces if any occurs

#Preparing inputs and targets
image_paths = [os.path.join(TRAIN_DIR, fname) for fname in df['Filename']]
labels = df.drop(columns='Filename').values.astype(np.float32)
num_classes = labels.shape[1]

In [None]:
#Defining Dataset Class for Training and Validation

class FoodDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        try:
            image = Image.open(self.image_paths[idx]).convert("RGB")
        except Exception as e:
            print(f"Skipping file: {self.image_paths[idx]} due to error: {e}")
            return None  # Return None for problematic images ( Since have enouctered few times )

        if self.transform:
            image = self.transform(image)
        label = torch.tensor(self.labels[idx])
        return image, label

In [None]:
#Defining Data Augmentation and Normalization

transform = transforms.Compose([
    transforms.Resize((300, 300)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

In [None]:
# Defining Training Function for Each Fold
def train_fold(train_loader, val_loader, fold):
    model = efficientnet_b3(weights=EfficientNet_B3_Weights.IMAGENET1K_V1)
    model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)
    model = model.to(device)

# ✅ Loading previously saved weights to resume from 12 to 25
    model_path = os.path.join(BASE_DIR, f"efficientnet_b3_fold{fold+1}_best.pth")
    if os.path.exists(model_path):
        print(f"Resuming training from {model_path}")
        model.load_state_dict(torch.load(model_path))

    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    best_f1 = 0

    for epoch in range(12, EPOCHS + 1):
        model.train()
        total_loss = 0

    for images, targets in tqdm(train_loader, desc=f"Fold {fold+1} - Epoch {epoch+1}"):
        images, targets = images.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * images.size(0)

        # Running validation after each epoch
        val_f1 = evaluate_model(model, val_loader)
        print(f"Fold {fold+1} - Epoch {epoch+1} - Loss: {total_loss / len(train_loader.dataset):.4f} - Val F1: {val_f1:.4f}")

        # Saving best model per fold
        if val_f1 > best_f1:
           best_f1 = val_f1
           torch.save(model.state_dict(), os.path.join(BASE_DIR, f"efficientnet_b3_fold{fold+1}_best.pth"))


In [None]:
#Also Validating function to evaluate F1 on Val Set
def evaluate_model(model, loader):
    model.eval()
    all_preds, all_targets = [], []
    with torch.no_grad():
        for images, targets in loader:
            images = images.to(device)
            outputs = model(images)
            probs = torch.sigmoid(outputs).cpu().numpy()
            all_preds.append(probs)
            all_targets.append(targets.numpy())
    all_preds = np.vstack(all_preds)
    all_targets = np.vstack(all_targets)
    return f1_score(all_targets, (all_preds > THRESHOLD).astype(int), average="samples")

In [None]:
# then run training and validation across folds manual fold 3 training only,already run 1-12 folds
skf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)
for fold, (train_idx, val_idx) in enumerate(skf.split(image_paths)):
    if fold != 2:
        continue

train_paths = np.array(image_paths)[train_idx]
val_paths = np.array(image_paths)[val_idx]
train_labels = labels[train_idx]
val_labels = labels[val_idx]

train_loader = DataLoader(FoodDataset(train_paths, train_labels, transform), batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(FoodDataset(val_paths, val_labels, transform), batch_size=BATCH_SIZE, shuffle=False)

train_fold(train_loader, val_loader, fold)

Fold 3 - Epoch 13: 100%|██████████| 833/833 [1:49:53<00:00,  7.92s/it]


Fold 3 - Epoch 13 - Loss: 0.0491 - Val F1: 0.1237


Fold 3 - Epoch 14: 100%|██████████| 833/833 [12:43<00:00,  1.09it/s]


Fold 3 - Epoch 14 - Loss: 0.0172 - Val F1: 0.2738


Fold 3 - Epoch 15: 100%|██████████| 833/833 [12:40<00:00,  1.09it/s]


Fold 3 - Epoch 15 - Loss: 0.0150 - Val F1: 0.3615


Fold 3 - Epoch 16: 100%|██████████| 833/833 [12:33<00:00,  1.11it/s]


Fold 3 - Epoch 16 - Loss: 0.0134 - Val F1: 0.4063


Fold 3 - Epoch 17: 100%|██████████| 833/833 [12:33<00:00,  1.11it/s]


Fold 3 - Epoch 17 - Loss: 0.0122 - Val F1: 0.4463


Fold 3 - Epoch 18: 100%|██████████| 833/833 [12:34<00:00,  1.10it/s]


Fold 3 - Epoch 18 - Loss: 0.0111 - Val F1: 0.4742


Fold 3 - Epoch 19: 100%|██████████| 833/833 [12:34<00:00,  1.10it/s]


Fold 3 - Epoch 19 - Loss: 0.0103 - Val F1: 0.4872


Fold 3 - Epoch 20: 100%|██████████| 833/833 [12:33<00:00,  1.10it/s]


Fold 3 - Epoch 20 - Loss: 0.0095 - Val F1: 0.4986


Fold 3 - Epoch 21: 100%|██████████| 833/833 [12:34<00:00,  1.10it/s]


Fold 3 - Epoch 21 - Loss: 0.0087 - Val F1: 0.5051


Fold 3 - Epoch 22: 100%|██████████| 833/833 [12:32<00:00,  1.11it/s]


Fold 3 - Epoch 22 - Loss: 0.0080 - Val F1: 0.5172


Fold 3 - Epoch 23: 100%|██████████| 833/833 [13:05<00:00,  1.06it/s]


Fold 3 - Epoch 23 - Loss: 0.0074 - Val F1: 0.5151


Fold 3 - Epoch 24: 100%|██████████| 833/833 [13:09<00:00,  1.06it/s]


Fold 3 - Epoch 24 - Loss: 0.0068 - Val F1: 0.5202


Fold 3 - Epoch 25: 100%|██████████| 833/833 [13:12<00:00,  1.05it/s]


Fold 3 - Epoch 25 - Loss: 0.0062 - Val F1: 0.5196


Fold 3 - Epoch 26: 100%|██████████| 833/833 [13:11<00:00,  1.05it/s]


Fold 3 - Epoch 26 - Loss: 0.0057 - Val F1: 0.5180


In [None]:
#8-1 defining Dataset Class for Test Data
class TestDataset(Dataset):
    def __init__(self, image_paths, transform=None):
        self.image_paths = image_paths
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, os.path.basename(self.image_paths[idx])

In [None]:
#8-2efining Inference Function for Final Predictions
def predict_test():
# Loading model trained on the last fold
    model = efficientnet_b3(weights=EfficientNet_B3_Weights.IMAGENET1K_V1)
    model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)
    model.load_state_dict(torch.load(os.path.join(BASE_DIR, f"efficientnet_b3_fold{FOLDS}_best.pth")))
    model = model.to(device)
    model.eval()

# Defining transform for test images
    test_transform = transforms.Compose([
        transforms.Resize((300, 300)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

    test_image_names = sorted(os.listdir(TEST_DIR))
    test_image_paths = [os.path.join(TEST_DIR, name) for name in test_image_names]
    test_loader = DataLoader(TestDataset(test_image_paths, transform=test_transform), batch_size=32, shuffle=False)

    final_preds, filenames = [], []
    with torch.no_grad():
        for images, names in test_loader:
            images = images.to(device)
            probs = torch.sigmoid(model(images)).cpu().numpy()
            final_preds.append(probs)
            filenames.extend(names)


# Converting probabilities to binary predictions using threshold
    final_binary = (np.vstack(final_preds) > THRESHOLD).astype(int)

    return final_binary, filenames

#9 Creating submission dataFrame
    submission = pd.DataFrame(final_binary, columns=[str(i) for i in range(num_classes)])
    submission.insert(0, "Filename", filenames)
    submission.to_csv(os.path.join(BASE_DIR, "submission_efficientnet_b3_F3_25.csv"), index=False)

#Running Inference on Test Set and Save it as CSV
predict_test()