[![Open in Kaggle](https://kaggle.com/static/images/open-in-kaggle.svg)](https://www.kaggle.com/code/phandai/food101-training)

# Preparation

In [None]:
import pandas as pd
import numpy as np
import os 
import torch
import random
import json
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import albumentations as A
from albumentations.pytorch import ToTensorV2
from sklearn.preprocessing import LabelEncoder
import copy
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold
import timm
from tqdm import tqdm
import torch.nn as nn
from sklearn.metrics import accuracy_score, f1_score,classification_report

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(42)

# Config 

In [None]:
IMAGE_BASE_DIR = "/kaggle/input/food41/images"
META_BASE_DIR = "/kaggle/input/food41/meta/meta"
SAVE_WEIGHT_DIR = "/kaggle/working/weight" 
SAVE_OUTPUT_DIR = "/kaggle/working/output" 

In [None]:
num_epochs = 20
num_workers, batch_size = 4, 64
resized_width = 224
resized_height = 224
verbose = 0

In [None]:
model_name = 'resnet50'
optim_lr = 5e-4
optim_weight_decay = 1e-5

scheduler_name = 'ReduceLROnPlateau' 
# scheduler_name = 'CosineAnnealing'

scheduler_params = {
    'patience': 2, # for ReduceLROnPlateau
    'factor': 0.5,
    'min_lr': 1e-6,
}

# scheduler_params = {
#     'T_max': num_epochs,  # for CosineAnnealing
#     'min_lr': 1e-6,
# }


# Train and test df

In [None]:
# Load json for train and test
with open(f"{META_BASE_DIR}/train.json", 'r') as file:
    train_json = json.load(file)
with open(f"{META_BASE_DIR}/test.json", 'r') as file:
    test_json = json.load(file)

In [None]:
# Now, let convert those json format to dataframe
def json_to_dataframe(data_json):
    data_rows = []
    for label, image_paths in data_json.items():
        for image_path in image_paths:
            data_rows.append([label, image_path])
    df = pd.DataFrame(data_rows, columns=['label', 'image_path'])
    df['filepath'] = IMAGE_BASE_DIR+'/' + df['image_path'] +'.jpg'
    return df

In [None]:
df_train = json_to_dataframe(train_json)
df_test = json_to_dataframe(test_json)

In [None]:
print(df_train.columns)
print(df_test.columns)

In [None]:
num_classes = df_train['label'].nunique()

# Dataset

In [None]:
label_encoder = LabelEncoder()
label_encoder.fit(df_train['label'])
df_train['num_label'] = label_encoder.transform(df_train['label'])
df_test['num_label'] = label_encoder.transform(df_test['label'])

In [None]:
class FoodDataset(Dataset):
    def __init__(self, df, transforms=None):
        """
        Args:
            df (pd.DataFrame): DataFrame with 'filepath' and 'label' columns.
            transforms (albumentations.Compose): Augmentation pipeline.
            label_encoder (LabelEncoder): Fitted label encoder. If None, a new one will be created.
        """
        self.df = df
        self.transforms = transforms
        
        # Convert string labels to numeric IDs
        self.numeric_labels = df['num_label']

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        record = self.df.iloc[idx]
        img_path = record['filepath']
        
        # Verify if the image path exists
        if not os.path.isfile(img_path):
            raise FileNotFoundError(f"Image file {img_path} does not exist.")

        # Load and convert image
        image = Image.open(img_path).convert("RGB")
        image = np.array(image)

        # Apply transforms if any
        if self.transforms:
            transformed = self.transforms(image=image)
            image = transformed['image']

        # Get numeric label
        label = self.numeric_labels[idx]
        return image, label

# Augmentation

Essential Augmentation Techniques
1. Horizontal and Vertical Flips
2. Random Rotations
3. Random Cropping and Resizing
4. Color Jitter (Brightness, Contrast, Saturation, Hue)
5. Scaling and Zooming
6. Random Shear and Affine Transformations
7. Random Grayscale Conversion
8. Normalization

In [None]:
def get_transforms(mode="train"):
    if mode == "train":
        return A.Compose([
            A.Transpose(p=0.5),
            A.VerticalFlip(p=0.5),
            A.HorizontalFlip(p=0.5),
            A.OneOf([
                A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2),
                A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=20, val_shift_limit=10),
            ], p=0.7),
            A.OneOf([
                A.GaussianBlur(blur_limit=3),
                A.GaussNoise(var_limit=(5.0, 30.0)),
            ], p=0.5),
            A.OneOf([
                A.OpticalDistortion(distort_limit=0.5),  # Reduced distort_limit
                A.GridDistortion(num_steps=5, distort_limit=0.5), # Reduced distort_limit
            ], p=0.3), # Reduced probability
            A.Resize(resized_height, resized_width),
            A.Cutout(max_h_size=int(resized_height * 0.15), max_w_size=int(resized_width * 0.15), num_holes=1, p=0.7),  # Potentially smaller cutout
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ImageNet stats
        ], p=1.0) # Overall probability of applying the transform
    else:
        return A.Compose([
            A.CenterCrop(height=resized_height, width=resized_width),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ImageNet stats
        ])

## Visualize transformed images

In [None]:
train_dataset = FoodDataset(df_train, transforms=get_transforms(mode="train"))

# Function to display images
def visualize_augmentations(dataset, num_samples=10, cols=5):
    """
    Visualizes augmented images from the dataset.

    Args:
        dataset (Dataset): The dataset to visualize.
        num_samples (int): Number of samples to visualize.
        cols (int): Number of columns in the grid.
    """
    dataset_copy = copy.deepcopy(dataset)
    transforms = dataset_copy.transforms.transforms
    transforms_filtered = [t for t in transforms if not isinstance(t, (A.Normalize, ToTensorV2))]
    dataset_copy.transforms = A.Compose(transforms_filtered)
    
    rows = num_samples // cols + int(num_samples % cols > 0)
    figure, ax = plt.subplots(nrows=rows, ncols=cols, figsize=(cols * 3, rows * 3))
    ax = ax.flatten() if num_samples > 1 else [ax]
    
    for i in range(num_samples):
        idx = np.random.randint(0, len(dataset_copy))
        image, _ = dataset_copy[idx]  # Ignore the label
        if isinstance(image, torch.Tensor):
            image = image.permute(1, 2, 0).cpu().numpy()
            image = np.clip(image, 0, 1)
        else:
            image = image.astype(np.uint8)
        ax[i].imshow(image)
        ax[i].axis('off')
    for j in range(num_samples, len(ax)):
        ax[j].axis('off')
    
    plt.tight_layout()
    plt.show()

# Visualize some augmented images
visualize_augmentations(train_dataset, num_samples=10, cols=5)

# KFold and train-val splits

In [None]:
n_splits = 5
skf = StratifiedKFold(n_splits, shuffle=True, random_state=42)
# Select fold 0
fold = 0
def get_train_val_from_fold(fold,val=False):
    for fold_idx, (train_idx, val_idx) in enumerate(skf.split(X=df_train, y=df_train['label'])):
        if fold_idx == fold:
            print(f"Selected Fold: {fold}")
            train_df = df_train.iloc[train_idx].reset_index(drop=True)
            val_df = df_train.iloc[val_idx].reset_index(drop=True)
            if val: return train_df, val_df, val_idx
            return train_df, val_df
            

# Dataloader

In [None]:
def get_loader_from_fold(fold, num_workers, batch_size,val=False):
    if val: train_df, val_df, val_idx = get_train_val_from_fold(fold,val=True)
    else: train_df, val_df = get_train_val_from_fold(fold)
    train_dataset = FoodDataset(
        df=train_df, 
        transforms=get_transforms(mode="train")
    )
    val_dataset = FoodDataset(
        df=val_df, 
        transforms=get_transforms(mode="val")
    )
    train_loader = DataLoader(
        train_dataset, 
        batch_size=batch_size, 
        shuffle=True, 
        num_workers=num_workers
    )
    val_loader = DataLoader(
        val_dataset, 
        batch_size=batch_size, 
        shuffle=False, 
        num_workers=num_workers
    )
    if val: return train_loader,val_loader, val_idx 
    return train_loader,val_loader

# Model | Criterion | Optimizer | Scheduler

In [None]:
# --- Set Device ---
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
def load_model(model_name,device=device):
    model = timm.create_model(model_name, num_classes=num_classes, pretrained=True)
    return model.to(device)

In [None]:
def create_scheduler(scheduler_name, optimizer, **kwargs):
    """
    Create a learning rate scheduler based on name.
    
    Args:
        scheduler_name (str): Name of the scheduler ('ReduceLROnPlateau' or 'CosineAnnealing')
        optimizer: PyTorch optimizer
        **kwargs: Additional arguments for specific schedulers
    
    Returns:
        scheduler: PyTorch scheduler
        is_metric_based (bool): Whether scheduler needs validation metrics
    """
    scheduler_name = scheduler_name.lower()
    
    if scheduler_name == 'reducelronplateau':
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer,
            mode='min',
            patience=kwargs.get('patience', 2),
            factor=kwargs.get('factor', 0.5),
            min_lr=kwargs.get('min_lr', 1e-6),
        )
        return scheduler, True
    
    elif scheduler_name == 'cosineanealing':
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer,
            T_max=kwargs.get('T_max', 10),
            eta_min=kwargs.get('min_lr', 0),
        )
        return scheduler, False
    
    else:
        raise ValueError(f"Unsupported scheduler: {scheduler_name}")

In [None]:
def setup_crit_opt_sche(model):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=optim_lr, weight_decay=optim_weight_decay)
    scheduler, is_metric_based = create_scheduler(scheduler_name, optimizer, **scheduler_params)
    return criterion, optimizer, scheduler, is_metric_based

# Training

In [None]:
def train_one_epoch(model, train_loader, criterion, optimizer, device, verbose=0):
    model.train()
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0
    if verbose == 1:
        progress_bar = tqdm(enumerate(train_loader), total=len(train_loader))
    else:
        progress_bar = enumerate(train_loader)
    for batch_idx, batch in progress_bar:
        data, target = batch
        
        # Move data to device
        data = data.to(device)
        target = target.to(device)
        
        # Reset gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(data)
        loss = criterion(outputs, target)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        # Calculate metrics
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total_predictions += target.size(0)
        correct_predictions += (predicted == target).sum().item()

        if verbose == 1:
            # Update progress bar
            progress_bar.set_description(f"Train Loss: {loss.item():.4f}")
    
    # Calculate epoch metrics
    epoch_loss = running_loss / len(train_loader)
    epoch_accuracy = correct_predictions / total_predictions
    
    return epoch_loss, epoch_accuracy

In [None]:
# --- Validation Loop ---
def validate(model, val_loader, criterion, device,verbose=0):
    model.eval()
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0

    with torch.no_grad():
        if verbose == 1:
            progress_bar = tqdm(enumerate(val_loader), total=len(val_loader))
        else: progress_bar = enumerate(val_loader)
        for batch_idx, (data, target) in progress_bar:
            data, target = data.to(device), target.to(device)

            outputs = model(data)
            loss = criterion(outputs, target)

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total_predictions += target.size(0)
            correct_predictions += (predicted == target).sum().item()
            if verbose == 1:
                progress_bar.set_description(f"Val Loss: {loss.item():.4f}")

    epoch_loss = running_loss / len(val_loader)
    epoch_accuracy = correct_predictions / total_predictions
    return epoch_loss, epoch_accuracy

In [None]:
def train(model_name, fold_num, train_loader, val_loader, num_epochs, device, SAVE_WEIGHT_DIR, verbose=0):
    """
    Training function that tracks metrics history for visualization.
    
    Returns:
        history (dict): Dictionary containing lists of metrics for each epoch
    """
    model = load_model(model_name)
    criterion, optimizer, scheduler, is_metric_based = setup_crit_opt_sche(model)
    # Initialize history dictionary to store metrics
    history = {
        'train_loss': [],
        'train_acc': [],
        'val_loss': [],
        'val_acc': [],
        'learning_rates': []
    }
    
    best_val_loss = float('inf')
    best_epoch = -1
    for epoch in range(num_epochs):
        # Training phase
        train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device,verbose)
        
        # Validation phase
        val_loss, val_acc = validate(model, val_loader, criterion, device)
        
        # Store current learning rate
        current_lr = optimizer.param_groups[0]['lr']
        
        # Store metrics in history
        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)
        history['learning_rates'].append(current_lr)
        
        # Print epoch results
        print(f"Epoch: {epoch+1}/{num_epochs}| Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}| Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
        if verbose == 1:
            print(f"Learning Rate: {current_lr:.6f}")
        
        # Update learning rate using the scheduler
        if scheduler:
            if is_metric_based:
                scheduler.step(val_loss)
            else:
                scheduler.step()
        
        # Save the best model
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            os.makedirs(SAVE_WEIGHT_DIR, exist_ok=True)
            best_epoch = epoch + 1
            # Save both model weights and training history
            checkpoint = {
                'epoch': epoch + 1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'train_loss': train_loss,
                'val_loss': val_loss,
                'train_acc': train_acc,
                'val_acc': val_acc,
                'history': history
            }
            
            torch.save(
                checkpoint,
                os.path.join(SAVE_WEIGHT_DIR, f"{model_name}_fold_{fold_num}.pth")
            )
            if verbose ==1:
                print(f"Model saved at epoch {epoch+1}")
        if verbose ==1: print("-" * 50)
    print('Best epoch:',best_epoch)
    return history

In [None]:
# # --- Test training ---
# num_epochs = 1
# SAVE_WEIGHT_DIR = "/kaggle/working" 
# train(model_name, fold, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs, device, SAVE_WEIGHT_DIR)

In [None]:
# --- Training ---
print("MODEL NAME:", model_name)
print()
for fold_num in range(n_splits):
    train_loader,val_loader = get_loader_from_fold(fold_num, num_workers, batch_size)
    train(model_name, fold_num, train_loader, val_loader, num_epochs, device, SAVE_WEIGHT_DIR, verbose)