In [2]:
import os
import sys
import json
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter

import pandas as pd
from PIL import Image
import torchvision.transforms as T
import torchvision.transforms.functional as TF
from torchvision import models
import random
from typing import Tuple, Optional

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Set random seed for reproducibility
torch.manual_seed(789)
np.random.seed(789)
random.seed(789)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(789)


Using device: cuda


### Data Loader

In [3]:
# Dataset Implementation
class Nutrition5KDataset(Dataset):
    """
    Dataset class for Nutrition5K with multi-modal inputs (RGB + Depth)
    for calorie prediction only (no segmentation)
    """
    
    def __init__(
        self,
        csv_path: str,
        data_root: str,
        split: str = 'train',
        augment: bool = True,
        img_size: int = 224,
    ):
        """
        Args:
            csv_path: Path to the CSV file with dish IDs and calorie values
            data_root: Root directory containing color/, depth_raw/ subdirectories
            split: 'train' or 'val'
            augment: Whether to apply data augmentation
            img_size: Target image size for resizing
            use_segmentation: Not used (kept for compatibility)
        """
        self.data_root = data_root
        self.split = split
        self.augment = augment
        self.img_size = img_size
        
        # Load CSV
        self.df = pd.read_csv(csv_path)
        # Rename 'Value' column to 'calories' if it exists
        if 'Value' in self.df.columns and 'calories' not in self.df.columns:
            self.df = self.df.rename(columns={'Value': 'calories'})
        # Make sure calories column exists
        if 'calories' not in self.df.columns:
            raise ValueError("CSV file must contain a 'calories' column or a 'Value' column that can be renamed")
        # Filter out high-calorie samples
        self.df = self.df[self.df['calories'] < 3000].reset_index(drop=True)
                
        # Build paths
        self.color_dir = os.path.join(data_root, 'color')
        self.depth_raw_dir = os.path.join(data_root, 'depth_raw')
        
        # Validate dataset
        self.valid_indices = self._validate_dataset()
        print(f"Loaded {len(self.valid_indices)} valid samples out of {len(self.df)}")
        
        # Color normalization (ImageNet stats as baseline)
        self.color_normalize = T.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
        )
        
    def _validate_dataset(self):
        """Pre-validate all samples and return valid indices"""
        valid_indices = []
        
        for idx in range(len(self.df)):
            dish_id = self.df.iloc[idx]['ID']
            
            rgb_path = os.path.join(self.color_dir, dish_id, 'rgb.png')
            depth_path = os.path.join(self.depth_raw_dir, dish_id, 'depth_raw.png')
            
            # Check if files exist
            if not os.path.exists(rgb_path):
                continue
            if not os.path.exists(depth_path):
                continue
            
            # Try to load images to check for corruption
            try:
                with Image.open(rgb_path) as img:
                    img.verify()
                with Image.open(depth_path) as img:
                    img.verify()
                valid_indices.append(idx)
            except Exception as e:
                continue
                
        return valid_indices
    
    def __len__(self):
        return len(self.valid_indices)
    
    def _load_image_safe(self, path: str, mode: str = 'RGB') -> Optional[Image.Image]:
        """Safely load an image with error handling"""
        try:
            with Image.open(path) as img:
                return img.convert(mode).copy()
        except Exception as e:
            return None
    
    def _apply_augmentation(self, rgb_img, depth_img):
        """Apply geometric augmentation only (no color changes)"""
        if not self.augment:
            return rgb_img, depth_img
        
        # Convert to tensors first
        rgb_tensor = TF.to_tensor(rgb_img)
        depth_tensor = TF.to_tensor(depth_img)
        
        # Random horizontal flip
        if random.random() > 0.5:
            rgb_tensor = TF.hflip(rgb_tensor)
            depth_tensor = TF.hflip(depth_tensor)
        
        # Random rotation (±15 degrees)
        if random.random() > 0.5:
            angle = random.uniform(-15, 15)
            rgb_tensor = TF.rotate(rgb_tensor, angle)
            depth_tensor = TF.rotate(depth_tensor, angle)
        
        # Random resized crop
        if random.random() > 0.4:  # 60% probability
            i, j, h, w = T.RandomResizedCrop.get_params(
                rgb_tensor, scale=(0.75, 1.0), ratio=(0.9, 1.1)
            )
            rgb_tensor = TF.resized_crop(rgb_tensor, i, j, h, w, (self.img_size, self.img_size))
            depth_tensor = TF.resized_crop(depth_tensor, i, j, h, w, (self.img_size, self.img_size))
        
        # Convert back to PIL
        rgb_img = TF.to_pil_image(rgb_tensor)
        depth_img = TF.to_pil_image(depth_tensor)
        
        return rgb_img, depth_img
    
    def _resize_and_center_crop(self, img, target_size: int = 256):
        """
        Resize and center crop image to target_size x target_size
        Matches the preprocessing in the Nutrition5k paper
        
        Args:
            img: PIL Image
            target_size: Target size (default 256x256 as per paper)
        
        Returns:
            Cropped PIL Image
        """
        # Get original dimensions
        width, height = img.size
        
        # Resize so the shorter side is target_size
        if width < height:
            new_width = target_size
            new_height = int(target_size * height / width)
        else:
            new_height = target_size
            new_width = int(target_size * width / height)
        
        img = img.resize((new_width, new_height), Image.LANCZOS)
        
        # Center crop to target_size x target_size
        left = (new_width - target_size) // 2
        top = (new_height - target_size) // 2
        right = left + target_size
        bottom = top + target_size
        
        img = img.crop((left, top, right, bottom))
        
        return img
    
    def __getitem__(self, idx):
        """Get a single sample"""
        actual_idx = self.valid_indices[idx]
        row = self.df.iloc[actual_idx]
        
        dish_id = row['ID']
        calorie = float(row['calories'])
        
        # Load images
        rgb_path = os.path.join(self.color_dir, dish_id, 'rgb.png')
        depth_path = os.path.join(self.depth_raw_dir, dish_id, 'depth_raw.png')
        
        rgb_img = self._load_image_safe(rgb_path, 'RGB')
        depth_img = self._load_image_safe(depth_path, 'L')  # Grayscale for depth
        
        if rgb_img is None or depth_img is None:
            # Fallback: return a black image
            rgb_img = Image.new('RGB', (self.img_size, self.img_size), (0, 0, 0))
            depth_img = Image.new('L', (self.img_size, self.img_size), 0)
        
        # Apply augmentation
        rgb_img, depth_img = self._apply_augmentation(rgb_img, depth_img)
        
        # Resize and center crop to match paper preprocessing (256x256)
        rgb_img = self._resize_and_center_crop(rgb_img, target_size=self.img_size)
        depth_img = self._resize_and_center_crop(depth_img, target_size=self.img_size)
        
        # Convert to tensors
        rgb_tensor = TF.to_tensor(rgb_img)  # (3, H, W)
        depth_tensor = TF.to_tensor(depth_img)  # (1, H, W)
        
        # Normalize RGB
        rgb_tensor = self.color_normalize(rgb_tensor)
        
        # Normalize depth (0-1 range, assuming depth is already in reasonable range)
        depth_tensor = depth_tensor / 255.0
        
        return {
            'dish_id': dish_id,
            'rgb': rgb_tensor,
            'depth': depth_tensor,
            'calorie': torch.tensor(calorie, dtype=torch.float32)
        }


def create_train_val_split(csv_path: str, val_ratio: float = 0.15, random_seed: int = 42):
    """
    Create train/validation split CSV files
    """
    # Read original CSV
    df = pd.read_csv(csv_path)    
    
    # Shuffle with fixed seed
    df_shuffled = df.sample(frac=1, random_state=random_seed).reset_index(drop=True)
    
    # Split
    val_size = int(len(df_shuffled) * val_ratio)
    train_df = df_shuffled[val_size:]
    val_df = df_shuffled[:val_size]
    
    # Save temporary CSV files
    base_dir = os.path.dirname(csv_path)
    train_csv = os.path.join(base_dir, 'train_split.csv')
    val_csv = os.path.join(base_dir, 'val_split.csv')
    
    train_df.to_csv(train_csv, index=False)
    val_df.to_csv(val_csv, index=False)
    
    return train_csv, val_csv

### Trainer

In [4]:
# Training Utilities - Simplified for Calorie Prediction Only
import math
def get_warmup_cosine_scheduler(optimizer, warmup_steps, total_steps, min_lr_ratio=0.0):
    def lr_lambda(current_step):
        if current_step < warmup_steps:
            return float(current_step) / float(max(1, warmup_steps))
        else:
            progress = float(current_step - warmup_steps) / float(max(1, total_steps - warmup_steps))
            # Scale from min_lr_ratio to 1.0 instead of 0.0 to 1.0
            return min_lr_ratio + (1.0 - min_lr_ratio) * 0.5 * (1.0 + math.cos(math.pi * progress))
    
    return optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)


class EarlyStopping:
    """Early stopping to stop training when validation loss stops improving"""
    
    def __init__(self, patience: int = 10, min_delta: float = 0.0, mode: str = 'min'):
        """
        Args:
            patience: Number of epochs with no improvement after which training will be stopped
            min_delta: Minimum change to qualify as an improvement
            mode: 'min' or 'max' - whether lower or higher metric is better
        """
        self.patience = patience
        self.min_delta = min_delta
        self.mode = mode
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.best_epoch = 0
        
    def __call__(self, score, epoch):
        if self.best_score is None:
            self.best_score = score
            self.best_epoch = epoch
            return False
        
        if self.mode == 'min':
            improved = score < (self.best_score - self.min_delta)
        else:
            improved = score > (self.best_score + self.min_delta)
        
        if improved:
            self.best_score = score
            self.best_epoch = epoch
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
                
        return self.early_stop


class Trainer:
    """Training manager for calorie prediction"""
    
    def __init__(
        self,
        model,
        train_loader,
        val_loader,
        criterion,
        optimizer,
        scheduler,
        device,
        output_dir,
        early_stopping_patience=15,
        scheduler_step_on_batch=False
    ):
        self.model = model
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.criterion = criterion
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.device = device
        self.output_dir = output_dir
        self.scheduler_step_on_batch = scheduler_step_on_batch
        
        # Early stopping
        self.early_stopping = EarlyStopping(
            patience=early_stopping_patience,
            min_delta=0.1,
            mode='min'
        )
        
        # Tensorboard
        self.writer = SummaryWriter(log_dir=os.path.join(output_dir, 'tensorboard'))
        
        # Tracking
        self.best_val_loss = float('inf')
        self.train_losses = []
        self.val_losses = []
        self.best_metrics = {}
    
    def train_epoch(self):
        """Train for one epoch"""
        self.model.train()
        total_loss = 0.0
        num_batches = 0
        
        pbar = tqdm(self.train_loader, desc="Training")
        for batch_idx, batch in enumerate(pbar):
            # Move to device
            rgb = batch['rgb'].to(self.device)
            depth = batch['depth'].to(self.device)
            calories = batch['calorie'].to(self.device)
            
            # Forward pass
            self.optimizer.zero_grad()
            calorie_pred = self.model(rgb, depth)
            
            # Compute loss (MSE for calorie prediction)
            loss = self.criterion(calorie_pred.squeeze(), calories)
            
            # Backward pass
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
            self.optimizer.step()
            
            # Update learning rate (if step_on_batch)
            if self.scheduler_step_on_batch and self.scheduler:
                self.scheduler.step()
            
            # Track metrics
            total_loss += loss.item()
            num_batches += 1
            
            # Update progress bar
            pbar.set_postfix({'Loss': f'{loss.item():.4f}'})
        
        return total_loss / num_batches
    
    def validate_epoch(self):
        """Validate for one epoch"""
        self.model.eval()
        total_loss = 0.0
        all_predictions = []
        all_targets = []
        
        with torch.no_grad():
            for batch in tqdm(self.val_loader, desc="Validation"):
                # Move to device
                rgb = batch['rgb'].to(self.device)
                depth = batch['depth'].to(self.device)
                calories = batch['calorie'].to(self.device)
                
                # Forward pass
                calorie_pred = self.model(rgb, depth)
                
                # Compute loss
                loss = self.criterion(calorie_pred.squeeze(), calories)
                total_loss += loss.item()
                
                # Store predictions and targets for metrics
                all_predictions.extend(calorie_pred.squeeze().cpu().numpy())
                all_targets.extend(calories.cpu().numpy())
        
        # Calculate metrics
        avg_loss = total_loss / len(self.val_loader)
        predictions = np.array(all_predictions)
        targets = np.array(all_targets)
        
        mae = np.mean(np.abs(predictions - targets))
        
        return avg_loss, mae
    
    def train(self, num_epochs):
        """Full training loop"""
        print(f"Starting training for {num_epochs} epochs...")
        
        for epoch in range(num_epochs):
            print(f"\nEpoch {epoch+1}/{num_epochs}")
            
            # Train
            train_loss = self.train_epoch()
            
            # Validate
            val_loss, mae = self.validate_epoch()
            
            # Update learning rate (if not step_on_batch)
            if not self.scheduler_step_on_batch and self.scheduler:
                self.scheduler.step(val_loss)
            
            # Log metrics
            self.writer.add_scalar('Loss/Train', train_loss, epoch)
            self.writer.add_scalar('Loss/Val', val_loss, epoch)
            self.writer.add_scalar('MAE', mae, epoch)
            
            # Save best model
            if val_loss < self.best_val_loss:
                self.best_val_loss = val_loss
                self.best_metrics = {
                    'epoch': epoch + 1,
                    'val_loss': val_loss,
                    'mae': mae,
                }
                
                # Save model checkpoint
                torch.save({
                    'epoch': epoch + 1,
                    'model_state_dict': self.model.state_dict(),
                    'optimizer_state_dict': self.optimizer.state_dict(),
                    'val_loss': val_loss,
                    'mae': mae,
                }, os.path.join(self.output_dir, 'best_model.pth'))
            
            # Print epoch results
            print(f"Train Loss: {train_loss:.4f}")
            print(f"Val Loss: {val_loss:.4f}")
            print(f"MAE: {mae:.2f}")
            
            # Early stopping
            if self.early_stopping(val_loss, epoch):
                print(f"Early stopping triggered after {epoch+1} epochs")
                print(f"Best epoch: {self.early_stopping.best_epoch+1}")
                break
        
        self.writer.close()
        print(f"\nTraining completed!")
        print(f"Best validation loss: {self.best_val_loss:.4f}")


In [5]:
# Configuration - Update these paths to match your setup
DATA_ROOT = '../Nutrition5K/train'  # Path to training data directory
CSV_PATH = '../Nutrition5K/nutrition5k_train.csv'  # Path to training CSV
OUTPUT_DIR = '../experiments'  # Directory to save experiment results

# Global training hyperparameters (learning rate and weight decay set per experiment)
BATCH_SIZE = 32
NUM_EPOCHS = 40
VAL_RATIO = 0.15
IMG_SIZE = 256
NUM_WORKERS = 4

print("Configuration:")
print(f"  Data root: {DATA_ROOT}")
print(f"  CSV path: {CSV_PATH}")
print(f"  Output directory: {OUTPUT_DIR}")
print(f"  Batch size: {BATCH_SIZE}")
print(f"  Number of epochs: {NUM_EPOCHS}")
print(f"  Image size: {IMG_SIZE}")
print(f"  Workers: {NUM_WORKERS}")

# Create output directory
os.makedirs(OUTPUT_DIR, exist_ok=True)


Configuration:
  Data root: ../Nutrition5K/train
  CSV path: ../Nutrition5K/nutrition5k_train.csv
  Output directory: ../experiments
  Batch size: 32
  Number of epochs: 40
  Image size: 256
  Workers: 4


### Dataset Loading

In [6]:
# Create train/validation split
print("Creating train/validation split...")
train_csv, val_csv = create_train_val_split(
    CSV_PATH,
    val_ratio=VAL_RATIO,
    random_seed=42
)

print(f"Train CSV: {train_csv}")
print(f"Validation CSV: {val_csv}")

# Load a sample to check data
sample_dataset = Nutrition5KDataset(
    csv_path=train_csv,
    data_root=DATA_ROOT,
    split='train',
    augment=False,  # No augmentation for checking
    img_size=IMG_SIZE,
)

print(f"\nDataset loaded successfully!")
print(f"Training samples: {len(sample_dataset)}")
print(f"RGB shape: {sample_dataset[0]['rgb'].shape}")
print(f"Depth shape: {sample_dataset[0]['depth'].shape}")


Creating train/validation split...
Train CSV: ../Nutrition5K/train_split.csv
Validation CSV: ../Nutrition5K/val_split.csv
Loaded 2804 valid samples out of 2805

Dataset loaded successfully!
Training samples: 2804
RGB shape: torch.Size([3, 256, 256])
Depth shape: torch.Size([1, 256, 256])


## Inception

In [7]:
import torch.nn as nn
import torch
import torch.nn.functional as F
import torchvision.models as models

class InceptionV3Encoder(nn.Module):
    """InceptionV3 encoder as used in the original Nutrition5k paper"""
    
    def __init__(self, pretrained: bool = False, in_channels: int = 3):
        super().__init__()
        
        # Load InceptionV3 model
        inception = models.inception_v3(pretrained=pretrained, aux_logits=False)
        
        # The output of InceptionV3 features is 2048 channels
        self.out_channels = 2048
        
        # Modify first conv if we have different input channels (e.g., 1 for depth)
        if in_channels != 3:
            self.Conv2d_1a_3x3 = nn.Conv2d(
                in_channels, 32, kernel_size=3, stride=2, bias=False
            )
        else:
            self.Conv2d_1a_3x3 = inception.Conv2d_1a_3x3
        
        # Copy all other layers from InceptionV3
        # First block
        self.Conv2d_2a_3x3 = inception.Conv2d_2a_3x3
        self.Conv2d_2b_3x3 = inception.Conv2d_2b_3x3
        self.maxpool1 = inception.maxpool1
        
        # Second block
        self.Conv2d_3b_1x1 = inception.Conv2d_3b_1x1
        self.Conv2d_4a_3x3 = inception.Conv2d_4a_3x3
        self.maxpool2 = inception.maxpool2
        
        # Inception blocks
        self.Mixed_5b = inception.Mixed_5b
        self.Mixed_5c = inception.Mixed_5c
        self.Mixed_5d = inception.Mixed_5d
        self.Mixed_6a = inception.Mixed_6a
        self.Mixed_6b = inception.Mixed_6b
        self.Mixed_6c = inception.Mixed_6c
        self.Mixed_6d = inception.Mixed_6d
        self.Mixed_6e = inception.Mixed_6e
        self.Mixed_7a = inception.Mixed_7a
        self.Mixed_7b = inception.Mixed_7b
        self.Mixed_7c = inception.Mixed_7c
    
    def forward(self, x):
        """
        Args:
            x: Input tensor (B, C, H, W)
        Returns:
            Feature map (B, 2048, H/32, W/32)
        """
        # First block
        x = self.Conv2d_1a_3x3(x)
        x = self.Conv2d_2a_3x3(x)
        x = self.Conv2d_2b_3x3(x)
        x = self.maxpool1(x)
        
        # Second block
        x = self.Conv2d_3b_1x1(x)
        x = self.Conv2d_4a_3x3(x)
        x = self.maxpool2(x)
        
        # Inception blocks
        x = self.Mixed_5b(x)
        x = self.Mixed_5c(x)
        x = self.Mixed_5d(x)
        x = self.Mixed_6a(x)
        x = self.Mixed_6b(x)
        x = self.Mixed_6c(x)
        x = self.Mixed_6d(x)
        x = self.Mixed_6e(x)
        x = self.Mixed_7a(x)
        x = self.Mixed_7b(x)
        x = self.Mixed_7c(x)
        
        return x

# Early Fusion Module (RGB + Depth fused at input level)
class EarlyFusion(nn.Module):
    """
    Early Fusion: Combine RGB and Depth channels at the input level
    before processing through the network
    """
    
    def __init__(self, pretrained: bool = False, fusion_channels: int = 2048, dropout_rate: float = 0.4):
        super().__init__()
        
        # Create a single encoder with 4 input channels (3 RGB + 1 Depth)
        self.encoder = InceptionV3Encoder(pretrained=pretrained, in_channels=4)
        
        # Regression head for calorie prediction
        self.regression_head = RegressionHead(
            in_channels=self.encoder.out_channels,
            dropout_rate=dropout_rate
        )
    
    def forward(self, rgb, depth):
        """
        Args:
            rgb: RGB images (B, 3, H, W)
            depth: Depth images (B, 1, H, W)
        
        Returns:
            Predicted calories (B, 1)
        """
        # Concatenate RGB and depth along channel dimension
        x = torch.cat([rgb, depth], dim=1)  # (B, 4, H, W)
        
        # Process through the encoder
        features = self.encoder(x)
        
        # Predict calories
        calories = self.regression_head(features)
        
        return calories

# Late Fusion Module (RGB + Depth processed separately and fused at regression level)
class LateFusion(nn.Module):
    """
    Late Fusion: Process RGB and Depth streams independently, then fuse at the regression head level
    """
    
    def __init__(self, pretrained: bool = False, fusion_channels: int = 2048, dropout_rate: float = 0.4):
        super().__init__()
        
        # RGB and Depth encoders
        self.rgb_encoder = InceptionV3Encoder(pretrained=pretrained, in_channels=3)
        self.depth_encoder = InceptionV3Encoder(pretrained=pretrained, in_channels=1)
        
        # Global average pooling
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        
        # Fusion at the feature vector level
        in_features = self.rgb_encoder.out_channels + self.depth_encoder.out_channels
        
        # Fully connected layers for regression
        self.regression_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate),
            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate),
            nn.Linear(256, 1)
        )
    
    def forward(self, rgb, depth):
        """
        Args:
            rgb: RGB images (B, 3, H, W)
            depth: Depth images (B, 1, H, W)
        
        Returns:
            Predicted calories (B, 1)
        """
        # Extract features from both streams
        rgb_features = self.rgb_encoder(rgb)    # (B, 2048, H/32, W/32)
        depth_features = self.depth_encoder(depth)  # (B, 2048, H/32, W/32)
        
        # Apply global average pooling
        rgb_features = self.avgpool(rgb_features)    # (B, 2048, 1, 1)
        depth_features = self.avgpool(depth_features)  # (B, 2048, 1, 1)
        
        # Concatenate feature vectors
        fused = torch.cat([rgb_features, depth_features], dim=1)  # (B, 4096, 1, 1)
        
        # Predict calories
        calories = self.regression_layers(fused)
        
        return calories

class RegressionHead(nn.Module):
    def __init__(self, in_channels: int = 2048, dropout_rate: float = 0.4):
        super().__init__()
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_channels, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate),
            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate),
            nn.Linear(256, 1)
        )
    
    def forward(self, x):
        x = self.avgpool(x)  # (B, C, 1, 1)
        x = self.fc_layers(x)  # (B, 1)
        return x


class VolumeEstimator(nn.Module):
    """
    Food volume estimation from overhead depth images following the Nutrition5k paper.
    
    Given:
    - Distance between camera and capture plane: 35.9 cm
    - Per-pixel surface area at this distance: 5.957 × 10^-3 cm²
    
    The volume is calculated by:
    1. Computing per-pixel volume (depth × surface_area)
    2. Summing over all food pixels (using binary threshold segmentation)
    """
    
    def __init__(self, 
                 camera_distance: float = 35.9,  # cm
                 pixel_surface_area: float = 5.957e-3,  # cm²
                 depth_threshold: float = 0.1):  # Threshold for simple segmentation
        super().__init__()
        
        self.camera_distance = camera_distance
        self.pixel_surface_area = pixel_surface_area
        self.depth_threshold = depth_threshold
    
    def forward(self, depth_images):
        """
        Args:
            depth_images: Depth images (B, 1, H, W), normalized to [0, 1] range
        
        Returns:
            volume_estimates: Volume in cm³ for each image (B, 1)
        """
        # Simple threshold-based segmentation for foreground/background
        segmentation_mask = (depth_images > self.depth_threshold).float()
        
        # Convert normalized depth back to actual depth values
        # Assuming depth is normalized to [0, 1] and represents distance from camera
        # For simplicity, we assume the depth represents actual distance in cm scaled to [0, 1]
        depth_cm = depth_images * self.camera_distance
        
        # Calculate per-pixel volume: depth × surface_area
        per_pixel_volume = depth_cm * self.pixel_surface_area  # (B, 1, H, W)
        
        # Apply segmentation mask to consider only food pixels
        masked_volume = per_pixel_volume * segmentation_mask
        
        # Sum over all pixels to get total volume
        volume_estimates = masked_volume.sum(dim=[2, 3])  # (B, 1)
        
        return volume_estimates


class RegressionHeadWithVolume(nn.Module):
    """
    Regression head that concatenates volume estimate to InceptionV3 features.
    
    According to the paper: "concatenating the volume estimation value to the output 
    of the InceptionV3 backbone, before the following two fully connected layers"
    with FC layers of 64 and 1 dimension.
    """
    
    def __init__(self, in_channels: int = 2048, dropout_rate: float = 0.4):
        super().__init__()
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        
        # Two FC layers as described in the paper (2048+1 -> 64 -> 1)
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_channels + 1, 64),  # +1 for volume
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate),
            nn.Linear(64, 1)
        )
    
    def forward(self, features, volume):
        """
        Args:
            features: Feature maps from backbone (B, 2048, H, W)
            volume: Volume estimates (B, 1)
        
        Returns:
            Predicted calories (B, 1)
        """
        # Global average pooling
        x = self.avgpool(features)  # (B, 2048, 1, 1)
        x = torch.flatten(x, 1)  # (B, 2048)
        
        # Concatenate volume estimate
        x = torch.cat([x, volume], dim=1)  # (B, 2049)
        
        # Predict calories
        x = self.fc_layers(x)  # (B, 1)
        
        return x

class Nutrition5kModel(nn.Module):
    """
    Implementation of the dual-stream architecture used in the original Nutrition5k paper
    Uses InceptionV3 as the backbone and middle fusion
    """
    
    def __init__(
        self,
        fusion: str = 'middle',
        fusion_channels: int = 2048,
        dropout_rate: float = 0.4,
        pretrained: bool = False,
        use_volume: bool = False
    ):
        super().__init__()
        
        self.use_volume = use_volume
        
        if fusion == 'early':
            self.model = EarlyFusion(
                pretrained=pretrained,
                fusion_channels=fusion_channels,
                dropout_rate=dropout_rate
            )
        elif fusion == 'late':
            self.model = LateFusion(
                pretrained=pretrained,
                fusion_channels=fusion_channels,
                dropout_rate=dropout_rate
            )
        elif fusion == 'image_only':
            # Image-only variant: only RGB is used
            self.rgb_encoder = InceptionV3Encoder(pretrained=pretrained, in_channels=3)
            
            # Volume estimator (if enabled)
            if use_volume:
                self.volume_estimator = VolumeEstimator()
                self.regression_head = RegressionHeadWithVolume(
                    in_channels=self.rgb_encoder.out_channels,
                    dropout_rate=dropout_rate
                )
            else:
                self.regression_head = RegressionHead(
                    in_channels=self.rgb_encoder.out_channels,
                    dropout_rate=dropout_rate
                )
        elif fusion == 'image_volume':
            # Image+Volume variant: RGB encoder + volume as additional signal
            self.rgb_encoder = InceptionV3Encoder(pretrained=pretrained, in_channels=3)
            self.volume_estimator = VolumeEstimator()
            self.regression_head = RegressionHeadWithVolume(
                in_channels=self.rgb_encoder.out_channels,
                dropout_rate=dropout_rate
            )
            self.use_volume = True  # Always use volume for this variant
        else:  # middle fusion
            # RGB and Depth encoders using InceptionV3
            self.rgb_encoder = InceptionV3Encoder(pretrained=pretrained, in_channels=3)
            self.depth_encoder = InceptionV3Encoder(pretrained=pretrained, in_channels=1)
            
            # Create middle fusion module
            from_channels = self.rgb_encoder.out_channels + self.depth_encoder.out_channels
            self.fusion_conv = nn.Sequential(
                nn.Conv2d(from_channels, fusion_channels, kernel_size=1, bias=False),
                nn.BatchNorm2d(fusion_channels),
                nn.ReLU(inplace=True)
            )
            
            # Volume estimator (if enabled)
            if use_volume:
                self.volume_estimator = VolumeEstimator()
                self.regression_head = RegressionHeadWithVolume(
                    in_channels=fusion_channels,
                    dropout_rate=dropout_rate
                )
            else:
                self.regression_head = RegressionHead(
                    in_channels=fusion_channels,
                    dropout_rate=dropout_rate
                )
    
    def forward(self, rgb, depth):
        """
        Args:
            rgb: RGB images (B, 3, H, W)
            depth: Depth images (B, 1, H, W)
        
        Returns:
            calorie_pred: Predicted calories (B, 1)
        """
        if hasattr(self, 'model'):
            return self.model(rgb, depth)
        
        # Calculate volume estimate if enabled
        volume = None
        if self.use_volume and hasattr(self, 'volume_estimator'):
            volume = self.volume_estimator(depth)  # (B, 1)
        
        # Image-only or Image+Volume variant
        if hasattr(self, 'rgb_encoder') and not hasattr(self, 'depth_encoder'):
            rgb_features = self.rgb_encoder(rgb)  # (B, 2048, H/32, W/32)
            
            if volume is not None:
                calorie_pred = self.regression_head(rgb_features, volume)
            else:
                calorie_pred = self.regression_head(rgb_features)
            
            return calorie_pred
        
        # Extract features from both streams
        rgb_features = self.rgb_encoder(rgb)      # (B, 2048, H/32, W/32)
        depth_features = self.depth_encoder(depth)  # (B, 2048, H/32, W/32)
        
        # Middle fusion - concatenate and apply 1x1 conv
        fused = torch.cat([rgb_features, depth_features], dim=1)  # (B, 4096, H/32, W/32)
        fused = self.fusion_conv(fused)  # (B, 2048, H/32, W/32)
        
        # Predict calories (with or without volume)
        if volume is not None:
            calorie_pred = self.regression_head(fused, volume)
        else:
            calorie_pred = self.regression_head(fused)
        
        return calorie_pred
    
    def get_num_parameters(self):
        """Get total number of trainable parameters"""
        return sum(p.numel() for p in self.parameters() if p.requires_grad)

# Factory function to build Nutrition5k models with different fusion types
def build_nutrition5k_model(fusion='middle', pretrained=False, dropout_rate=0.4, fusion_channels=2048, 
                           use_volume=False, **kwargs):
    """
    Factory function to build models using the Nutrition5k paper architecture (InceptionV3 backbone)
    
    Args:
        fusion: Fusion type ('early', 'middle', 'late', 'image_only', or 'image_volume')
        pretrained: Whether to use pretrained weights for InceptionV3
        dropout_rate: Dropout rate for regression head
        fusion_channels: Number of channels after fusion
        use_volume: Whether to use volume estimation as additional signal (uses simple threshold-based segmentation)
    
    Returns:
        Nutrition5k model with specified configuration
    """
    return Nutrition5kModel(
        fusion=fusion,
        fusion_channels=fusion_channels,
        dropout_rate=dropout_rate,
        pretrained=pretrained,
        use_volume=use_volume
    )


## Experiment

### InceptionV3 - Middle Fusion

In [8]:
# Add a new cell with this code to run the Nutrition5k InceptionV3 experiments

# Import the necessary modules
import sys
sys.path.append('/data/projects/punim0478/setiawand/Computer-Vision-COMP90086/src')

from nutrition5k_inceptionv3_model import build_nutrition5k_model

# Configure experiment settings
BATCH_SIZE = 32
NUM_EPOCHS = 40
DROPOUT_RATE = 0.4
LEARNING_RATE = 3e-4
WEIGHT_DECAY = 1e-6
EARLY_STOPPING_PATIENCE = 15
WARMUP_RATIO = 0.1
MIN_LR_RATIO = 0.05
FUSION_CHANNELS = 2048  # InceptionV3 output channels

def train_nutrition5k_model(fusion_type='middle'):
    """Train the Nutrition5k model with InceptionV3 and specified fusion type"""
    
    print("="*60)
    print(f"TRAINING: Nutrition5k InceptionV3 + {fusion_type.capitalize()} Fusion")
    print("="*60)
    
    # Create datasets
    train_dataset = Nutrition5KDataset(
        csv_path=train_csv,
        data_root=DATA_ROOT,
        split='train',
        augment=False,
        img_size=IMG_SIZE,
    )
    
    val_dataset = Nutrition5KDataset(
        csv_path=val_csv,
        data_root=DATA_ROOT,
        split='val',
        augment=False,
        img_size=IMG_SIZE,
    )
    
    # Create data loaders
    train_loader = DataLoader(
        train_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=NUM_WORKERS,
        pin_memory=True if torch.cuda.is_available() else False,
        drop_last=True
    )
    
    val_loader = DataLoader(
        val_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=NUM_WORKERS,
        pin_memory=True if torch.cuda.is_available() else False
    )
    
    # Build model with specified fusion type
    model = build_nutrition5k_model(
        fusion=fusion_type,
        pretrained=False,
        dropout_rate=DROPOUT_RATE,
        fusion_channels=FUSION_CHANNELS
    )
    model = model.to(device)
    
    print(f"Model parameters: {model.get_num_parameters():,}")
    print(f"Training samples: {len(train_dataset)}")
    print(f"Validation samples: {len(val_dataset)}")
    
    # Loss function
    criterion = nn.MSELoss()
    
    # Optimizer
    optimizer = optim.AdamW(
        model.parameters(),
        lr=LEARNING_RATE,
        weight_decay=WEIGHT_DECAY
    )
    
    print(f"Learning rate: {LEARNING_RATE}")
    print(f"Weight decay: {WEIGHT_DECAY}")
    
    # Learning rate scheduler
    steps_per_epoch = len(train_loader)
    total_steps = NUM_EPOCHS * steps_per_epoch
    warmup_steps = int(total_steps * WARMUP_RATIO)
    
    scheduler = get_warmup_cosine_scheduler(
        optimizer, 
        warmup_steps=warmup_steps, 
        total_steps=total_steps,
        min_lr_ratio=MIN_LR_RATIO
    )
    
    # Create experiment directory
    exp_name = f"inceptionv3_{fusion_type}_fusion_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    exp_dir = os.path.join(OUTPUT_DIR, 'nutrition5k_experiments', exp_name)
    os.makedirs(exp_dir, exist_ok=True)
    
    # Save experiment configuration
    config = {
        'fusion': fusion_type,
        'pretrained': False,
        'dropout_rate': DROPOUT_RATE,
        'fusion_channels': FUSION_CHANNELS,
        'learning_rate': LEARNING_RATE,
        'weight_decay': WEIGHT_DECAY,
        'batch_size': BATCH_SIZE,
        'img_size': IMG_SIZE,
        'num_epochs': NUM_EPOCHS
    }
    
    with open(os.path.join(exp_dir, 'config.json'), 'w') as f:
        json.dump(config, f, indent=4)
    
    # Create trainer
    trainer = Trainer(
        model=model,
        train_loader=train_loader,
        val_loader=val_loader,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        device=device,
        output_dir=exp_dir,
        early_stopping_patience=EARLY_STOPPING_PATIENCE,
        scheduler_step_on_batch=False
    )
    
    # Train the model
    trainer.train(NUM_EPOCHS)
    
    print(f"\nExperiment completed! Results saved to: {exp_dir}")
    return trainer.best_metrics

# Run an experiment with middle fusion
middle_fusion_results = train_nutrition5k_model(fusion_type='middle')

TRAINING: Nutrition5k InceptionV3 + Middle Fusion
Loaded 2804 valid samples out of 2805
Loaded 495 valid samples out of 495
Model parameters: 53,143,873
Training samples: 2804
Validation samples: 495
Learning rate: 0.0003
Weight decay: 1e-06
Starting training for 40 epochs...

Epoch 1/40


Training: 100%|██████████| 87/87 [00:15<00:00,  5.72it/s, Loss=128568.6797]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.13it/s]


Train Loss: 99179.5700
Val Loss: 107412.9351
MAE: 240.64

Epoch 2/40


Training: 100%|██████████| 87/87 [00:14<00:00,  6.18it/s, Loss=14551.3506] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.31it/s]


Train Loss: 44613.2511
Val Loss: 22115.9072
MAE: 103.60

Epoch 3/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.24it/s, Loss=8529.9629] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.52it/s]


Train Loss: 14990.2540
Val Loss: 15339.2216
MAE: 89.52

Epoch 4/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.24it/s, Loss=10611.9082]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.42it/s]


Train Loss: 12844.5238
Val Loss: 14614.1202
MAE: 86.15

Epoch 5/40


Training: 100%|██████████| 87/87 [00:14<00:00,  6.19it/s, Loss=10989.2109]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.62it/s]


Train Loss: 12333.9567
Val Loss: 13903.3691
MAE: 84.82

Epoch 6/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.34it/s, Loss=7037.2222] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.65it/s]


Train Loss: 13701.8433
Val Loss: 15968.9327
MAE: 87.62

Epoch 7/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.43it/s, Loss=9634.6934] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.52it/s]


Train Loss: 9885.7110
Val Loss: 13036.7188
MAE: 78.05

Epoch 8/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.32it/s, Loss=15831.9697]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.27it/s]


Train Loss: 13061.2367
Val Loss: 37848.2272
MAE: 150.94

Epoch 9/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.28it/s, Loss=15873.6582]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.62it/s]


Train Loss: 13630.2193
Val Loss: 26709.6769
MAE: 118.80

Epoch 10/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.29it/s, Loss=12883.2695]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.69it/s]


Train Loss: 11182.7567
Val Loss: 16951.6092
MAE: 94.59

Epoch 11/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.38it/s, Loss=8178.9229] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.57it/s]


Train Loss: 8048.6062
Val Loss: 12620.6396
MAE: 74.06

Epoch 12/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.34it/s, Loss=7492.9854] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.49it/s]


Train Loss: 10334.1747
Val Loss: 15044.2647
MAE: 86.57

Epoch 13/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.30it/s, Loss=3736.9170] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.67it/s]


Train Loss: 7712.2835
Val Loss: 10110.7316
MAE: 70.29

Epoch 14/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.35it/s, Loss=12397.4609]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.67it/s]


Train Loss: 4862.7000
Val Loss: 9211.0626
MAE: 63.99

Epoch 15/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.34it/s, Loss=4223.6538] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.44it/s]


Train Loss: 4543.4875
Val Loss: 9900.7881
MAE: 65.54

Epoch 16/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.34it/s, Loss=2259.6072] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.78it/s]


Train Loss: 4426.4736
Val Loss: 9328.8389
MAE: 66.59

Epoch 17/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.31it/s, Loss=1298.2256] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.48it/s]


Train Loss: 3814.8753
Val Loss: 8683.5598
MAE: 63.18

Epoch 18/40


Training: 100%|██████████| 87/87 [00:14<00:00,  6.18it/s, Loss=4756.7666] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.51it/s]


Train Loss: 4460.9755
Val Loss: 9520.8630
MAE: 66.56

Epoch 19/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.25it/s, Loss=3241.6943] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.34it/s]


Train Loss: 4638.1852
Val Loss: 8618.1980
MAE: 63.57

Epoch 20/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.23it/s, Loss=2759.9041] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.33it/s]


Train Loss: 4441.7072
Val Loss: 10380.2209
MAE: 66.44

Epoch 21/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.29it/s, Loss=8066.5176] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.26it/s]


Train Loss: 4397.2711
Val Loss: 9014.0540
MAE: 64.05

Epoch 22/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.23it/s, Loss=2299.1699] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.47it/s]


Train Loss: 3708.5228
Val Loss: 8564.3510
MAE: 61.92

Epoch 23/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.30it/s, Loss=3101.3701] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.50it/s]


Train Loss: 3872.7702
Val Loss: 9195.6286
MAE: 63.51

Epoch 24/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.28it/s, Loss=1211.8209] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.49it/s]


Train Loss: 3619.2267
Val Loss: 8286.5531
MAE: 59.58

Epoch 25/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.31it/s, Loss=4860.5879] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.64it/s]


Train Loss: 4642.6053
Val Loss: 11539.1972
MAE: 76.86

Epoch 26/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.31it/s, Loss=5091.5918] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.65it/s]


Train Loss: 5310.7747
Val Loss: 9601.1501
MAE: 65.47

Epoch 27/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.28it/s, Loss=3598.9797]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.57it/s]


Train Loss: 4402.4465
Val Loss: 8586.3208
MAE: 60.98

Epoch 28/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.28it/s, Loss=3286.5671] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.72it/s]


Train Loss: 4379.7765
Val Loss: 9463.2135
MAE: 64.45

Epoch 29/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.28it/s, Loss=3635.2405] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.29it/s]


Train Loss: 3820.1549
Val Loss: 7934.8172
MAE: 57.35

Epoch 30/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.23it/s, Loss=9945.4971] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.53it/s]


Train Loss: 5754.6514
Val Loss: 10848.7680
MAE: 67.77

Epoch 31/40


Training: 100%|██████████| 87/87 [00:14<00:00,  6.12it/s, Loss=2592.0410] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.46it/s]


Train Loss: 4085.6081
Val Loss: 8705.7453
MAE: 62.18

Epoch 32/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.28it/s, Loss=1845.0674] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.59it/s]


Train Loss: 3637.0483
Val Loss: 8562.5231
MAE: 62.51

Epoch 33/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.27it/s, Loss=5132.6309] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.37it/s]


Train Loss: 3630.7866
Val Loss: 9332.6526
MAE: 62.17

Epoch 34/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.27it/s, Loss=1935.2677] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.50it/s]


Train Loss: 3029.1384
Val Loss: 7578.5881
MAE: 56.28

Epoch 35/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.31it/s, Loss=5809.5571] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.51it/s]


Train Loss: 4695.0795
Val Loss: 9082.4254
MAE: 66.55

Epoch 36/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.28it/s, Loss=2360.2610] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.70it/s]


Train Loss: 4250.1567
Val Loss: 8774.8670
MAE: 62.02

Epoch 37/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.41it/s, Loss=5048.2212]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.41it/s]


Train Loss: 3479.9186
Val Loss: 9305.3121
MAE: 62.28

Epoch 38/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.33it/s, Loss=1282.8358] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.34it/s]


Train Loss: 3230.0179
Val Loss: 7757.0370
MAE: 56.59

Epoch 39/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.33it/s, Loss=3625.0686] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.55it/s]


Train Loss: 4668.2630
Val Loss: 12126.1959
MAE: 82.30

Epoch 40/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.33it/s, Loss=8918.3311] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.40it/s]

Train Loss: 5625.5605
Val Loss: 14431.2035
MAE: 85.84

Training completed!
Best validation loss: 7578.5881

Experiment completed! Results saved to: ../experiments/nutrition5k_experiments/inceptionv3_middle_fusion_20251024_130923





### InceptionV3 - Early Fusion

In [9]:
### EARLY FUSION
# Import the necessary modules
import sys
sys.path.append('/data/projects/punim0478/setiawand/Computer-Vision-COMP90086/src')

from nutrition5k_inceptionv3_model import build_nutrition5k_model

# Configure experiment settings
BATCH_SIZE = 32
NUM_EPOCHS = 40
DROPOUT_RATE = 0.4
LEARNING_RATE = 3e-4
WEIGHT_DECAY = 1e-6
EARLY_STOPPING_PATIENCE = 15
WARMUP_RATIO = 0.1
MIN_LR_RATIO = 0.05
FUSION_CHANNELS = 2048  # InceptionV3 output channels

def train_nutrition5k_model(fusion_type='middle'):
    """Train the Nutrition5k model with InceptionV3 and specified fusion type"""
    
    print("="*60)
    print(f"TRAINING: Nutrition5k InceptionV3 + {fusion_type.capitalize()} Fusion")
    print("="*60)
    
    # Create datasets
    train_dataset = Nutrition5KDataset(
        csv_path=train_csv,
        data_root=DATA_ROOT,
        split='train',
        augment=False,
        img_size=IMG_SIZE,
    )
    
    val_dataset = Nutrition5KDataset(
        csv_path=val_csv,
        data_root=DATA_ROOT,
        split='val',
        augment=False,
        img_size=IMG_SIZE,
    )
    
    # Create data loaders
    train_loader = DataLoader(
        train_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=NUM_WORKERS,
        pin_memory=True if torch.cuda.is_available() else False,
        drop_last=True
    )
    
    val_loader = DataLoader(
        val_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=NUM_WORKERS,
        pin_memory=True if torch.cuda.is_available() else False
    )
    
    # Build model with specified fusion type
    model = build_nutrition5k_model(
        fusion=fusion_type,
        pretrained=False,
        dropout_rate=DROPOUT_RATE,
        fusion_channels=FUSION_CHANNELS
    )
    model = model.to(device)
    
    print(f"Model parameters: {model.get_num_parameters():,}")
    print(f"Training samples: {len(train_dataset)}")
    print(f"Validation samples: {len(val_dataset)}")
    
    # Loss function
    criterion = nn.MSELoss()
    
    # Optimizer
    optimizer = optim.AdamW(
        model.parameters(),
        lr=LEARNING_RATE,
        weight_decay=WEIGHT_DECAY
    )
    
    print(f"Learning rate: {LEARNING_RATE}")
    print(f"Weight decay: {WEIGHT_DECAY}")
    
    # Learning rate scheduler
    steps_per_epoch = len(train_loader)
    total_steps = NUM_EPOCHS * steps_per_epoch
    warmup_steps = int(total_steps * WARMUP_RATIO)
    
    scheduler = get_warmup_cosine_scheduler(
        optimizer, 
        warmup_steps=warmup_steps, 
        total_steps=total_steps,
        min_lr_ratio=MIN_LR_RATIO
    )
    
    # Create experiment directory
    exp_name = f"inceptionv3_{fusion_type}_fusion_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    exp_dir = os.path.join(OUTPUT_DIR, 'nutrition5k_experiments', exp_name)
    os.makedirs(exp_dir, exist_ok=True)
    
    # Save experiment configuration
    config = {
        'fusion': fusion_type,
        'pretrained': False,
        'dropout_rate': DROPOUT_RATE,
        'fusion_channels': FUSION_CHANNELS,
        'learning_rate': LEARNING_RATE,
        'weight_decay': WEIGHT_DECAY,
        'batch_size': BATCH_SIZE,
        'img_size': IMG_SIZE,
        'num_epochs': NUM_EPOCHS
    }
    
    with open(os.path.join(exp_dir, 'config.json'), 'w') as f:
        json.dump(config, f, indent=4)
    
    # Create trainer
    trainer = Trainer(
        model=model,
        train_loader=train_loader,
        val_loader=val_loader,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        device=device,
        output_dir=exp_dir,
        early_stopping_patience=EARLY_STOPPING_PATIENCE,
        scheduler_step_on_batch=False
    )
    
    # Train the model
    trainer.train(NUM_EPOCHS)
    
    print(f"\nExperiment completed! Results saved to: {exp_dir}")
    return trainer.best_metrics

# Run an experiment with middle fusion
middle_fusion_results = train_nutrition5k_model(fusion_type='early')

TRAINING: Nutrition5k InceptionV3 + Early Fusion
Loaded 2804 valid samples out of 2805
Loaded 495 valid samples out of 495
Model parameters: 22,966,465
Training samples: 2804
Validation samples: 495
Learning rate: 0.0003
Weight decay: 1e-06
Starting training for 40 epochs...

Epoch 1/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.49it/s, Loss=66747.9062] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.54it/s]


Train Loss: 99325.6495
Val Loss: 107376.2798
MAE: 240.56

Epoch 2/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.57it/s, Loss=22843.2305] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.52it/s]


Train Loss: 42928.3323
Val Loss: 19980.8986
MAE: 103.48

Epoch 3/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.55it/s, Loss=10708.5781]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.70it/s]


Train Loss: 17106.7717
Val Loss: 19836.2162
MAE: 95.29

Epoch 4/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.48it/s, Loss=12754.4180]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.69it/s]


Train Loss: 17037.9287
Val Loss: 29315.9156
MAE: 112.77

Epoch 5/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.52it/s, Loss=11109.4961]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.56it/s]


Train Loss: 12419.4974
Val Loss: 12350.2427
MAE: 78.81

Epoch 6/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.60it/s, Loss=8752.6113] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.47it/s]


Train Loss: 13488.8264
Val Loss: 12729.3925
MAE: 76.98

Epoch 7/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.46it/s, Loss=8419.4541] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.58it/s]


Train Loss: 12929.6196
Val Loss: 21976.7961
MAE: 100.01

Epoch 8/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.56it/s, Loss=4679.3589] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.54it/s]


Train Loss: 10146.2328
Val Loss: 10747.8688
MAE: 71.96

Epoch 9/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.50it/s, Loss=5635.6797] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.61it/s]


Train Loss: 8792.4451
Val Loss: 10783.5305
MAE: 71.93

Epoch 10/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.62it/s, Loss=5006.7964] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.74it/s]


Train Loss: 7525.5879
Val Loss: 11707.3728
MAE: 71.07

Epoch 11/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.63it/s, Loss=4415.2920] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.77it/s]


Train Loss: 8775.2512
Val Loss: 11203.9194
MAE: 70.39

Epoch 12/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.59it/s, Loss=8206.9492] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.67it/s]


Train Loss: 7488.8060
Val Loss: 11448.6037
MAE: 70.75

Epoch 13/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.58it/s, Loss=20127.1152]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.67it/s]


Train Loss: 7698.7578
Val Loss: 19493.1765
MAE: 90.35

Epoch 14/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.55it/s, Loss=12658.3789]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.54it/s]


Train Loss: 8682.6980
Val Loss: 18340.8633
MAE: 85.01

Epoch 15/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.58it/s, Loss=5315.3403] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.64it/s]


Train Loss: 7846.1895
Val Loss: 13579.7655
MAE: 80.14

Epoch 16/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.62it/s, Loss=7562.2861] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.60it/s]


Train Loss: 8193.6605
Val Loss: 10145.4527
MAE: 69.51

Epoch 17/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.41it/s, Loss=2269.3503] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.70it/s]


Train Loss: 5541.4729
Val Loss: 7903.5556
MAE: 58.91

Epoch 18/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.57it/s, Loss=8156.2046] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.61it/s]


Train Loss: 6543.4155
Val Loss: 8652.4755
MAE: 62.92

Epoch 19/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.58it/s, Loss=2912.4287] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.59it/s]


Train Loss: 4890.6906
Val Loss: 8625.3224
MAE: 62.41

Epoch 20/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.47it/s, Loss=3772.5403] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.49it/s]


Train Loss: 5196.3465
Val Loss: 10165.2951
MAE: 63.64

Epoch 21/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.46it/s, Loss=1960.0081] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.59it/s]


Train Loss: 3562.4957
Val Loss: 7652.7168
MAE: 54.76

Epoch 22/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.45it/s, Loss=17051.5156]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.66it/s]


Train Loss: 5509.5268
Val Loss: 12240.1665
MAE: 74.17

Epoch 23/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.50it/s, Loss=3583.2676] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.69it/s]


Train Loss: 6604.5801
Val Loss: 12108.0193
MAE: 69.45

Epoch 24/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.45it/s, Loss=7632.9282] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.51it/s]


Train Loss: 6506.6554
Val Loss: 14720.9593
MAE: 77.30

Epoch 25/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.45it/s, Loss=2055.4907] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.74it/s]


Train Loss: 4572.6020
Val Loss: 9063.0943
MAE: 59.88

Epoch 26/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.50it/s, Loss=1919.8328] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.54it/s]


Train Loss: 3421.9659
Val Loss: 7674.3759
MAE: 56.12

Epoch 27/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.42it/s, Loss=1848.4658] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.59it/s]


Train Loss: 4576.4738
Val Loss: 9714.7449
MAE: 61.45

Epoch 28/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.40it/s, Loss=2453.5884] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.59it/s]


Train Loss: 3644.3707
Val Loss: 7877.2721
MAE: 55.54

Epoch 29/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.46it/s, Loss=4258.2334] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.57it/s]


Train Loss: 4365.7270
Val Loss: 11066.9588
MAE: 69.82

Epoch 30/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.44it/s, Loss=7192.0464]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.64it/s]


Train Loss: 3430.3935
Val Loss: 7680.3667
MAE: 57.24

Epoch 31/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.50it/s, Loss=3328.4053] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.66it/s]


Train Loss: 4283.5907
Val Loss: 8796.7368
MAE: 64.82

Epoch 32/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.56it/s, Loss=2363.3750] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.39it/s]


Train Loss: 3478.3367
Val Loss: 7675.7208
MAE: 58.12

Epoch 33/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.48it/s, Loss=5590.2734] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.74it/s]


Train Loss: 3638.4839
Val Loss: 9331.1797
MAE: 65.83

Epoch 34/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.28it/s, Loss=4025.5542] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.63it/s]


Train Loss: 3395.1593
Val Loss: 7289.9086
MAE: 54.76

Epoch 35/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.47it/s, Loss=1952.4146] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.61it/s]


Train Loss: 4409.1664
Val Loss: 10487.2523
MAE: 68.48

Epoch 36/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.56it/s, Loss=5168.2134] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.73it/s]


Train Loss: 3580.7155
Val Loss: 7592.5793
MAE: 55.65

Epoch 37/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.44it/s, Loss=5320.6187] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.52it/s]


Train Loss: 4219.9357
Val Loss: 9092.4391
MAE: 63.40

Epoch 38/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.43it/s, Loss=2338.5122]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.63it/s]


Train Loss: 2817.4882
Val Loss: 8004.2764
MAE: 54.92

Epoch 39/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.47it/s, Loss=2916.2383]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.73it/s]


Train Loss: 3088.7458
Val Loss: 8371.1610
MAE: 58.62

Epoch 40/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.51it/s, Loss=2650.1313] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.55it/s]

Train Loss: 2973.2887
Val Loss: 9626.2067
MAE: 60.45

Training completed!
Best validation loss: 7289.9086

Experiment completed! Results saved to: ../experiments/nutrition5k_experiments/inceptionv3_early_fusion_20251024_132025





### InceptionV3 - Late Fusion

In [10]:
### EARLY FUSION
# Import the necessary modules
import sys
sys.path.append('/data/projects/punim0478/setiawand/Computer-Vision-COMP90086/src')

from nutrition5k_inceptionv3_model import build_nutrition5k_model

# Configure experiment settings
BATCH_SIZE = 32
NUM_EPOCHS = 40
DROPOUT_RATE = 0.4
LEARNING_RATE = 3e-4
WEIGHT_DECAY = 1e-6
EARLY_STOPPING_PATIENCE = 15
WARMUP_RATIO = 0.1
MIN_LR_RATIO = 0.05
FUSION_CHANNELS = 2048  # InceptionV3 output channels

def train_nutrition5k_model(fusion_type='middle'):
    """Train the Nutrition5k model with InceptionV3 and specified fusion type"""
    
    print("="*60)
    print(f"TRAINING: Nutrition5k InceptionV3 + {fusion_type.capitalize()} Fusion")
    print("="*60)
    
    # Create datasets
    train_dataset = Nutrition5KDataset(
        csv_path=train_csv,
        data_root=DATA_ROOT,
        split='train',
        augment=False,
        img_size=IMG_SIZE,
    )
    
    val_dataset = Nutrition5KDataset(
        csv_path=val_csv,
        data_root=DATA_ROOT,
        split='val',
        augment=False,
        img_size=IMG_SIZE,
    )
    
    # Create data loaders
    train_loader = DataLoader(
        train_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=NUM_WORKERS,
        pin_memory=True if torch.cuda.is_available() else False,
        drop_last=True
    )
    
    val_loader = DataLoader(
        val_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=NUM_WORKERS,
        pin_memory=True if torch.cuda.is_available() else False
    )
    
    # Build model with specified fusion type
    model = build_nutrition5k_model(
        fusion=fusion_type,
        pretrained=False,
        dropout_rate=DROPOUT_RATE,
        fusion_channels=FUSION_CHANNELS
    )
    model = model.to(device)
    
    print(f"Model parameters: {model.get_num_parameters():,}")
    print(f"Training samples: {len(train_dataset)}")
    print(f"Validation samples: {len(val_dataset)}")
    
    # Loss function
    criterion = nn.MSELoss()
    
    # Optimizer
    optimizer = optim.AdamW(
        model.parameters(),
        lr=LEARNING_RATE,
        weight_decay=WEIGHT_DECAY
    )
    
    print(f"Learning rate: {LEARNING_RATE}")
    print(f"Weight decay: {WEIGHT_DECAY}")
    
    # Learning rate scheduler
    steps_per_epoch = len(train_loader)
    total_steps = NUM_EPOCHS * steps_per_epoch
    warmup_steps = int(total_steps * WARMUP_RATIO)
    
    scheduler = get_warmup_cosine_scheduler(
        optimizer, 
        warmup_steps=warmup_steps, 
        total_steps=total_steps,
        min_lr_ratio=MIN_LR_RATIO
    )
    
    # Create experiment directory
    exp_name = f"inceptionv3_{fusion_type}_fusion_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    exp_dir = os.path.join(OUTPUT_DIR, 'nutrition5k_experiments', exp_name)
    os.makedirs(exp_dir, exist_ok=True)
    
    # Save experiment configuration
    config = {
        'fusion': fusion_type,
        'pretrained': False,
        'dropout_rate': DROPOUT_RATE,
        'fusion_channels': FUSION_CHANNELS,
        'learning_rate': LEARNING_RATE,
        'weight_decay': WEIGHT_DECAY,
        'batch_size': BATCH_SIZE,
        'img_size': IMG_SIZE,
        'num_epochs': NUM_EPOCHS
    }
    
    with open(os.path.join(exp_dir, 'config.json'), 'w') as f:
        json.dump(config, f, indent=4)
    
    # Create trainer
    trainer = Trainer(
        model=model,
        train_loader=train_loader,
        val_loader=val_loader,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        device=device,
        output_dir=exp_dir,
        early_stopping_patience=EARLY_STOPPING_PATIENCE,
        scheduler_step_on_batch=False
    )
    
    # Train the model
    trainer.train(NUM_EPOCHS)
    
    print(f"\nExperiment completed! Results saved to: {exp_dir}")
    return trainer.best_metrics

# Run an experiment with middle fusion
middle_fusion_results = train_nutrition5k_model(fusion_type='late')

TRAINING: Nutrition5k InceptionV3 + Late Fusion
Loaded 2804 valid samples out of 2805
Loaded 495 valid samples out of 495
Model parameters: 45,799,745
Training samples: 2804
Validation samples: 495
Learning rate: 0.0003
Weight decay: 1e-06
Starting training for 40 epochs...

Epoch 1/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.40it/s, Loss=106439.0938]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.52it/s]


Train Loss: 98389.8398
Val Loss: 107367.1013
MAE: 240.54

Epoch 2/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.25it/s, Loss=28412.5645]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.45it/s]


Train Loss: 41690.2808
Val Loss: 31997.4932
MAE: 123.44

Epoch 3/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.32it/s, Loss=11473.9180]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.50it/s]


Train Loss: 17350.7568
Val Loss: 19154.0181
MAE: 99.54

Epoch 4/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.25it/s, Loss=10099.2783]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.26it/s]


Train Loss: 14514.1249
Val Loss: 18284.8318
MAE: 101.39

Epoch 5/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.37it/s, Loss=8358.5098] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.55it/s]


Train Loss: 13459.6902
Val Loss: 13000.7582
MAE: 83.92

Epoch 6/40


Training: 100%|██████████| 87/87 [00:14<00:00,  6.19it/s, Loss=11738.3379]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.38it/s]


Train Loss: 11558.1775
Val Loss: 14764.5721
MAE: 93.13

Epoch 7/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.37it/s, Loss=5837.0171] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.55it/s]


Train Loss: 7952.4247
Val Loss: 10476.4639
MAE: 72.17

Epoch 8/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.38it/s, Loss=5708.1650] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.49it/s]


Train Loss: 6212.3241
Val Loss: 10303.3420
MAE: 67.74

Epoch 9/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.28it/s, Loss=7737.3613] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.55it/s]


Train Loss: 5673.2633
Val Loss: 8837.4902
MAE: 63.97

Epoch 10/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.35it/s, Loss=9406.4307] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.46it/s]


Train Loss: 5494.6568
Val Loss: 9756.0814
MAE: 69.59

Epoch 11/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.27it/s, Loss=4610.4297] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.54it/s]


Train Loss: 5146.7232
Val Loss: 8753.4821
MAE: 63.10

Epoch 12/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.35it/s, Loss=5909.7256] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.66it/s]


Train Loss: 4907.9320
Val Loss: 9570.8997
MAE: 65.66

Epoch 13/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.37it/s, Loss=4963.5718] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.56it/s]


Train Loss: 4037.6672
Val Loss: 8444.3814
MAE: 61.17

Epoch 14/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.28it/s, Loss=7843.6504] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.30it/s]


Train Loss: 4759.5694
Val Loss: 9681.8932
MAE: 66.88

Epoch 15/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.27it/s, Loss=9072.1475] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.67it/s]


Train Loss: 4229.9857
Val Loss: 8599.4233
MAE: 62.60

Epoch 16/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.22it/s, Loss=2737.9417] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.62it/s]


Train Loss: 4172.8494
Val Loss: 10331.9440
MAE: 70.47

Epoch 17/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.33it/s, Loss=3295.8223] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.60it/s]


Train Loss: 4371.7250
Val Loss: 8629.5308
MAE: 65.55

Epoch 18/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.30it/s, Loss=4097.1982] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.34it/s]


Train Loss: 4424.5678
Val Loss: 8790.1743
MAE: 65.26

Epoch 19/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.38it/s, Loss=2633.3958] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.63it/s]


Train Loss: 4350.4276
Val Loss: 8730.1980
MAE: 62.17

Epoch 20/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.25it/s, Loss=4055.3335] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.38it/s]


Train Loss: 4177.5882
Val Loss: 14836.8188
MAE: 79.11

Epoch 21/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.23it/s, Loss=2228.0410] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.32it/s]


Train Loss: 4229.4994
Val Loss: 9068.2230
MAE: 66.28

Epoch 22/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.30it/s, Loss=1770.4275] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.48it/s]


Train Loss: 3978.3809
Val Loss: 8781.0372
MAE: 61.05

Epoch 23/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.32it/s, Loss=3093.6758] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.57it/s]


Train Loss: 3067.3215
Val Loss: 8732.8710
MAE: 64.11

Epoch 24/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.42it/s, Loss=3645.4475] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.35it/s]


Train Loss: 3630.3558
Val Loss: 8726.5923
MAE: 62.54

Epoch 25/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.28it/s, Loss=1093.2750] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.34it/s]


Train Loss: 3741.8051
Val Loss: 9379.7649
MAE: 62.13

Epoch 26/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.33it/s, Loss=1971.1019] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.48it/s]


Train Loss: 3466.8906
Val Loss: 8442.0509
MAE: 58.94

Epoch 27/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.32it/s, Loss=4339.6250] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.71it/s]


Train Loss: 4190.1686
Val Loss: 8390.8783
MAE: 63.37

Epoch 28/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.33it/s, Loss=2052.4866] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.62it/s]


Train Loss: 3881.3878
Val Loss: 9256.8487
MAE: 65.92

Epoch 29/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.30it/s, Loss=12281.3994]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.64it/s]


Train Loss: 3442.6623
Val Loss: 7819.5536
MAE: 57.46

Epoch 30/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.38it/s, Loss=4406.4668] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.42it/s]


Train Loss: 5363.2380
Val Loss: 13414.7968
MAE: 77.36

Epoch 31/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.43it/s, Loss=4942.8740] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.79it/s]


Train Loss: 6517.6254
Val Loss: 24391.3788
MAE: 103.60

Epoch 32/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.48it/s, Loss=4491.8784] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.41it/s]


Train Loss: 5878.0402
Val Loss: 10430.6566
MAE: 65.04

Epoch 33/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.23it/s, Loss=2186.6104] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.49it/s]


Train Loss: 4494.0169
Val Loss: 8213.3772
MAE: 59.08

Epoch 34/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.37it/s, Loss=7051.7349] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.73it/s]


Train Loss: 3761.5112
Val Loss: 10694.6189
MAE: 66.91

Epoch 35/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.29it/s, Loss=2668.6448]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.37it/s]


Train Loss: 3711.7728
Val Loss: 7766.9089
MAE: 59.69

Epoch 36/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.28it/s, Loss=3103.6143] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.69it/s]


Train Loss: 4424.6941
Val Loss: 12087.1532
MAE: 74.20

Epoch 37/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.33it/s, Loss=3766.8242] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.64it/s]


Train Loss: 5107.7292
Val Loss: 16110.6986
MAE: 81.93

Epoch 38/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.27it/s, Loss=6010.6538] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.42it/s]


Train Loss: 4091.8209
Val Loss: 8399.1342
MAE: 57.81

Epoch 39/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.40it/s, Loss=2086.4343] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.74it/s]


Train Loss: 4500.5079
Val Loss: 9125.9881
MAE: 62.44

Epoch 40/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.41it/s, Loss=1422.8608]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.55it/s]

Train Loss: 3323.3737
Val Loss: 8567.3797
MAE: 60.29

Training completed!
Best validation loss: 7766.9089

Experiment completed! Results saved to: ../experiments/nutrition5k_experiments/inceptionv3_late_fusion_20251024_133101





### InceptionV3 - Middle + Volume

This section implements the food volume estimation method as described in the Nutrition5k paper. The method:

1. **Estimates food volume from overhead depth images** using:
   - Camera distance: 35.9 cm
   - Per-pixel surface area: 5.957 × 10⁻³ cm²
   
2. **Uses binary foreground/background segmentation** to identify food pixels

3. **Calculates volume** by summing per-pixel volumes (depth × surface_area) over all food pixels

4. **Concatenates volume estimate** to the InceptionV3 backbone output before FC layers

We implement three variants:
- **Image-only**: Uses only RGB images (baseline)
- **Image+Volume**: RGB + volume estimate as additional signal  
- **Middle+Volume**: RGB + Depth fusion + volume estimate


In [None]:
import sys
sys.path.append('/data/projects/punim0478/setiawand/Computer-Vision-COMP90086/src')

from nutrition5k_inceptionv3_model import build_nutrition5k_model

# Configure experiment settings
BATCH_SIZE = 32
NUM_EPOCHS = 40
DROPOUT_RATE = 0.4
LEARNING_RATE = 5e-4
WEIGHT_DECAY = 1e-6
EARLY_STOPPING_PATIENCE = 15
WARMUP_RATIO = 0.1
MIN_LR_RATIO = 0.05
FUSION_CHANNELS = 2048  # InceptionV3 output channels

# Training function with volume estimation support
def train_nutrition5k_with_volume(fusion_type='image_volume'):
    """
    Train the Nutrition5k model with volume estimation
    
    Args:
        fusion_type: 'image_only', 'image_volume', 'middle', etc.
        use_segmentation: Whether to use learned segmentation for volume estimation
    """
    
    print("="*60)
    print(f"TRAINING: Nutrition5k InceptionV3 + {fusion_type.upper()}")

    print("="*60)
    
    # Create datasets
    train_dataset = Nutrition5KDataset(
        csv_path=train_csv,
        data_root=DATA_ROOT,
        split='train',
        augment=False,
        img_size=IMG_SIZE,
    )
    
    val_dataset = Nutrition5KDataset(
        csv_path=val_csv,
        data_root=DATA_ROOT,
        split='val',
        augment=False,
        img_size=IMG_SIZE,
    )
    
    # Create data loaders
    train_loader = DataLoader(
        train_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=NUM_WORKERS,
        pin_memory=True if torch.cuda.is_available() else False,
        drop_last=True
    )
    
    val_loader = DataLoader(
        val_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=NUM_WORKERS,
        pin_memory=True if torch.cuda.is_available() else False
    )
    
    # Build model with volume estimation
    use_volume = 'volume' in fusion_type
    
    model = build_nutrition5k_model(
        fusion=fusion_type,
        pretrained=False,
        dropout_rate=DROPOUT_RATE,
        fusion_channels=FUSION_CHANNELS,
        use_volume=use_volume,
    )
    model = model.to(device)
    
    print(f"Model parameters: {model.get_num_parameters():,}")
    print(f"Training samples: {len(train_dataset)}")
    print(f"Validation samples: {len(val_dataset)}")
    
    # Loss function
    criterion = nn.MSELoss()
    
    # Optimizer
    optimizer = optim.AdamW(
        model.parameters(),
        lr=LEARNING_RATE,
        weight_decay=WEIGHT_DECAY
    )
    
    print(f"Learning rate: {LEARNING_RATE}")
    print(f"Weight decay: {WEIGHT_DECAY}")
    
    # Learning rate scheduler
    steps_per_epoch = len(train_loader)
    total_steps = NUM_EPOCHS * steps_per_epoch
    warmup_steps = int(total_steps * WARMUP_RATIO)
    
    scheduler = get_warmup_cosine_scheduler(
        optimizer, 
        warmup_steps=warmup_steps, 
        total_steps=total_steps,
        min_lr_ratio=MIN_LR_RATIO
    )
    
    # Create experiment directory
    exp_name = f"inceptionv3_{fusion_type}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    exp_dir = os.path.join(OUTPUT_DIR, 'nutrition5k_experiments', exp_name)
    os.makedirs(exp_dir, exist_ok=True)
    
    # Save experiment configuration
    config = {
        'fusion': fusion_type,
        'use_volume': use_volume,
        'pretrained': False,
        'dropout_rate': DROPOUT_RATE,
        'fusion_channels': FUSION_CHANNELS,
        'learning_rate': LEARNING_RATE,
        'weight_decay': WEIGHT_DECAY,
        'batch_size': BATCH_SIZE,
        'img_size': IMG_SIZE,
        'num_epochs': NUM_EPOCHS
    }
    
    with open(os.path.join(exp_dir, 'config.json'), 'w') as f:
        json.dump(config, f, indent=4)
    
    # Create trainer
    trainer = Trainer(
        model=model,
        train_loader=train_loader,
        val_loader=val_loader,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        device=device,
        output_dir=exp_dir,
        early_stopping_patience=EARLY_STOPPING_PATIENCE,
        scheduler_step_on_batch=False
    )
    
    # Train the model
    trainer.train(NUM_EPOCHS)
    
    print(f"\nExperiment completed! Results saved to: {exp_dir}")
    return trainer.best_metrics


middle_fusion_results = train_nutrition5k_with_volume(fusion_type='image_volume')


TRAINING: Nutrition5k InceptionV3 + IMAGE_VOLUME
Loaded 2804 valid samples out of 2805
Loaded 495 valid samples out of 495
Model parameters: 21,916,833
Training samples: 2804
Validation samples: 495
Learning rate: 0.0005
Weight decay: 1e-06
Starting training for 40 epochs...

Epoch 1/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.43it/s, Loss=108259.2578]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.60it/s]


Train Loss: 98651.2712
Val Loss: 107381.6948
MAE: 240.57

Epoch 2/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.54it/s, Loss=16013.5312] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.67it/s]


Train Loss: 65919.9727
Val Loss: 29319.6537
MAE: 118.82

Epoch 3/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.54it/s, Loss=21910.8262]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.68it/s]


Train Loss: 26365.2399
Val Loss: 25831.0439
MAE: 110.35

Epoch 4/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.54it/s, Loss=28824.6973]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.61it/s]


Train Loss: 19369.3317
Val Loss: 26086.2035
MAE: 106.78

Epoch 5/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.52it/s, Loss=12186.3164]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.66it/s]


Train Loss: 16064.0011
Val Loss: 22472.0131
MAE: 101.50

Epoch 6/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.43it/s, Loss=7965.6699] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.70it/s]


Train Loss: 14435.9392
Val Loss: 12856.3794
MAE: 76.76

Epoch 7/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.66it/s, Loss=11353.6094]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.74it/s]


Train Loss: 14900.7881
Val Loss: 16189.5482
MAE: 85.86

Epoch 8/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.52it/s, Loss=11691.4023]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.78it/s]


Train Loss: 13149.4884
Val Loss: 11781.9545
MAE: 76.72

Epoch 9/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.55it/s, Loss=11914.6602]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.57it/s]


Train Loss: 12836.9574
Val Loss: 12081.9814
MAE: 75.09

Epoch 10/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.65it/s, Loss=5833.5947] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.66it/s]


Train Loss: 12259.6983
Val Loss: 34023.9789
MAE: 135.16

Epoch 11/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.51it/s, Loss=7867.6704] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.57it/s]


Train Loss: 10627.7787
Val Loss: 8857.1537
MAE: 64.40

Epoch 12/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.45it/s, Loss=8124.0371] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.55it/s]


Train Loss: 8637.2702
Val Loss: 12734.0982
MAE: 73.18

Epoch 13/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.48it/s, Loss=5287.5591] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.62it/s]


Train Loss: 12329.5391
Val Loss: 11926.0453
MAE: 73.37

Epoch 14/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.53it/s, Loss=4309.2856] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.63it/s]


Train Loss: 10885.5472
Val Loss: 13142.9994
MAE: 78.21

Epoch 15/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.51it/s, Loss=20139.8066]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.49it/s]


Train Loss: 10598.2898
Val Loss: 14689.3955
MAE: 81.68

Epoch 16/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.41it/s, Loss=7086.2871] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.63it/s]


Train Loss: 8293.4044
Val Loss: 10231.8171
MAE: 66.58

Epoch 17/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.57it/s, Loss=12543.9473]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.57it/s]


Train Loss: 6399.1024
Val Loss: 9016.7161
MAE: 60.06

Epoch 18/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.55it/s, Loss=5665.5361] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.41it/s]


Train Loss: 5826.5098
Val Loss: 8145.8707
MAE: 57.01

Epoch 19/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.39it/s, Loss=5068.1343] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.63it/s]


Train Loss: 7035.3614
Val Loss: 9229.9055
MAE: 61.73

Epoch 20/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.56it/s, Loss=5287.8306] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.55it/s]


Train Loss: 6057.8679
Val Loss: 7616.8182
MAE: 56.79

Epoch 21/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.43it/s, Loss=4746.9902] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.51it/s]


Train Loss: 7072.8501
Val Loss: 24474.4791
MAE: 99.65

Epoch 22/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.53it/s, Loss=13119.5117]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.67it/s]


Train Loss: 7839.2704
Val Loss: 10457.4959
MAE: 65.65

Epoch 23/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.62it/s, Loss=3085.6042] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.72it/s]


Train Loss: 6021.8229
Val Loss: 7191.0435
MAE: 55.98

Epoch 24/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.61it/s, Loss=6064.8857] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.70it/s]


Train Loss: 7473.2339
Val Loss: 9309.3742
MAE: 63.62

Epoch 25/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.55it/s, Loss=3918.4976] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.78it/s]


Train Loss: 6366.5307
Val Loss: 7951.3516
MAE: 57.45

Epoch 26/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.59it/s, Loss=3958.7363] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.55it/s]


Train Loss: 5714.5977
Val Loss: 8676.6589
MAE: 61.97

Epoch 27/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.59it/s, Loss=4025.3110] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.61it/s]


Train Loss: 6011.6670
Val Loss: 8741.7768
MAE: 62.08

Epoch 28/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.63it/s, Loss=3402.2810] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.65it/s]


Train Loss: 5449.7801
Val Loss: 9306.1301
MAE: 59.73

Epoch 29/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.50it/s, Loss=3335.1790] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.67it/s]


Train Loss: 4545.7973
Val Loss: 7175.3438
MAE: 54.02

Epoch 30/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.53it/s, Loss=10892.3789]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.67it/s]


Train Loss: 7070.0363
Val Loss: 13045.5674
MAE: 77.76

Epoch 31/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.51it/s, Loss=5397.4434] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.51it/s]


Train Loss: 6838.4158
Val Loss: 10400.0666
MAE: 64.40

Epoch 32/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.44it/s, Loss=4656.6797] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.39it/s]


Train Loss: 5807.6003
Val Loss: 8157.4585
MAE: 59.51

Epoch 33/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.52it/s, Loss=1558.7113] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.75it/s]


Train Loss: 5879.0825
Val Loss: 8046.5217
MAE: 57.06

Epoch 34/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.62it/s, Loss=7381.2354] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.74it/s]


Train Loss: 6066.1867
Val Loss: 9070.3390
MAE: 60.64

Epoch 35/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.55it/s, Loss=3927.4146] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.77it/s]


Train Loss: 4413.8461
Val Loss: 7681.4189
MAE: 54.37

Epoch 36/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.51it/s, Loss=9086.3516] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.72it/s]


Train Loss: 5761.2919
Val Loss: 9406.7413
MAE: 65.93

Epoch 37/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.50it/s, Loss=6872.7788] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.60it/s]


Train Loss: 4781.9159
Val Loss: 7606.9587
MAE: 55.86

Epoch 38/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.54it/s, Loss=6143.3491] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.68it/s]


Train Loss: 5480.3070
Val Loss: 10385.1789
MAE: 63.28

Epoch 39/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.49it/s, Loss=3758.9685] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.61it/s]


Train Loss: 4822.6640
Val Loss: 7814.0743
MAE: 54.46

Epoch 40/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.51it/s, Loss=2849.0405] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.48it/s]

Train Loss: 5745.1829
Val Loss: 9136.1897
MAE: 59.73

Training completed!
Best validation loss: 7175.3438

Experiment completed! Results saved to: ../experiments/nutrition5k_experiments/inceptionv3_image_volume_20251024_140149





: 

# Experimental Improvements

This section contains experiments to further improve the InceptionV3 + Volume model:

1. **Huber Loss Experiment**: Test Huber loss for better robustness to outliers
2. **Deeper Volume Head**: Try more complex volume processing  
3. **Hyperparameter Tuning**: Grid search for optimal learning rates and dropout


In [16]:
# Configure experiment settings
BATCH_SIZE = 32
NUM_EPOCHS = 45
DROPOUT_RATE = 0.4
LEARNING_RATE = 3e-4
WEIGHT_DECAY = 1e-6
EARLY_STOPPING_PATIENCE = 15
WARMUP_RATIO = 0.1
MIN_LR_RATIO = 0.05
FUSION_CHANNELS = 2048  # InceptionV3 output channels

In [10]:
# EXPERIMENT 1: HUBER LOSS
# Test Huber loss for better robustness to outliers

def train_nutrition5k_huber_loss(fusion_type='image_volume', huber_delta=50.0):
    """
    Train the Nutrition5k model with Huber loss instead of MSE
    
    Args:
        fusion_type: 'image_only', 'image_volume', 'middle', etc.
        huber_delta: Delta parameter for Huber loss (transition point between L1 and L2)
    """
    
    print("="*60)
    print(f"EXPERIMENT 1: HUBER LOSS (delta={huber_delta})")
    print(f"TRAINING: Nutrition5k InceptionV3 + {fusion_type.upper()}")
    print("="*60)
    
    # Create datasets (same as before)
    train_dataset = Nutrition5KDataset(
        csv_path=train_csv,
        data_root=DATA_ROOT,
        split='train',
        augment=False,
        img_size=IMG_SIZE,
    )
    
    val_dataset = Nutrition5KDataset(
        csv_path=val_csv,
        data_root=DATA_ROOT,
        split='val',
        augment=False,
        img_size=IMG_SIZE,
    )
    
    # Create data loaders
    train_loader = DataLoader(
        train_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=NUM_WORKERS,
        pin_memory=True if torch.cuda.is_available() else False,
        drop_last=True
    )
    
    val_loader = DataLoader(
        val_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=NUM_WORKERS,
        pin_memory=True if torch.cuda.is_available() else False
    )
    
    # Build model
    use_volume = 'volume' in fusion_type
    
    model = build_nutrition5k_model(
        fusion=fusion_type,
        pretrained=False,
        dropout_rate=DROPOUT_RATE,
        fusion_channels=FUSION_CHANNELS,
        use_volume=use_volume,
    )
    model = model.to(device)
    
    print(f"Model parameters: {model.get_num_parameters():,}")
    print(f"Training samples: {len(train_dataset)}")
    print(f"Validation samples: {len(val_dataset)}")
    
    # HUBER LOSS instead of MSE
    criterion = nn.HuberLoss(delta=huber_delta)
    print(f"Using Huber Loss with delta={huber_delta}")
    
    # Optimizer
    optimizer = optim.AdamW(
        model.parameters(),
        lr=LEARNING_RATE,
        weight_decay=WEIGHT_DECAY
    )
    
    print(f"Learning rate: {LEARNING_RATE}")
    print(f"Weight decay: {WEIGHT_DECAY}")
    
    # Learning rate scheduler
    steps_per_epoch = len(train_loader)
    total_steps = NUM_EPOCHS * steps_per_epoch
    warmup_steps = int(total_steps * WARMUP_RATIO)
    
    scheduler = get_warmup_cosine_scheduler(
        optimizer, 
        warmup_steps=warmup_steps, 
        total_steps=total_steps,
        min_lr_ratio=MIN_LR_RATIO
    )
    
    # Create experiment directory
    exp_name = f"inceptionv3_{fusion_type}_huber{huber_delta}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    exp_dir = os.path.join(OUTPUT_DIR, 'nutrition5k_experiments', exp_name)
    os.makedirs(exp_dir, exist_ok=True)
    
    # Save experiment configuration
    config = {
        'fusion': fusion_type,
        'use_volume': use_volume,
        'loss_function': 'huber',
        'huber_delta': huber_delta,
        'pretrained': False,
        'dropout_rate': DROPOUT_RATE,
        'fusion_channels': FUSION_CHANNELS,
        'learning_rate': LEARNING_RATE,
        'weight_decay': WEIGHT_DECAY,
        'batch_size': BATCH_SIZE,
        'img_size': IMG_SIZE,
        'num_epochs': NUM_EPOCHS
    }
    
    with open(os.path.join(exp_dir, 'config.json'), 'w') as f:
        json.dump(config, f, indent=4)
    
    # Create trainer
    trainer = Trainer(
        model=model,
        train_loader=train_loader,
        val_loader=val_loader,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        device=device,
        output_dir=exp_dir,
        early_stopping_patience=EARLY_STOPPING_PATIENCE,
        scheduler_step_on_batch=False
    )
    
    # Train the model
    trainer.train(NUM_EPOCHS)
    
    print(f"\nExperiment 1 (Huber Loss) completed! Results saved to: {exp_dir}")
    return trainer.best_metrics

# Run Huber Loss experiment
print("Starting Huber Loss Experiment...")
huber_results = train_nutrition5k_huber_loss(fusion_type='image_volume', huber_delta=50.0)


Starting Huber Loss Experiment...
EXPERIMENT 1: HUBER LOSS (delta=50.0)
TRAINING: Nutrition5k InceptionV3 + IMAGE_VOLUME
Loaded 2804 valid samples out of 2805
Loaded 495 valid samples out of 495
Model parameters: 21,916,833
Training samples: 2804
Validation samples: 495
Using Huber Loss with delta=50.0
Learning rate: 0.0005
Weight decay: 1e-06
Starting training for 40 epochs...

Epoch 1/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.24it/s, Loss=11141.7168]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.39it/s]


Train Loss: 10643.9618
Val Loss: 10846.3843
MAE: 240.63

Epoch 2/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.53it/s, Loss=9204.0020] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.58it/s]


Train Loss: 10151.3070
Val Loss: 9631.6149
MAE: 215.63

Epoch 3/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.56it/s, Loss=10808.7344]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.66it/s]


Train Loss: 9478.4009
Val Loss: 9529.5171
MAE: 213.65

Epoch 4/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.42it/s, Loss=10174.6729]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.63it/s]


Train Loss: 9230.1581
Val Loss: 9379.8687
MAE: 210.64

Epoch 5/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.57it/s, Loss=10531.4971]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.69it/s]


Train Loss: 8900.4787
Val Loss: 9118.2562
MAE: 205.48

Epoch 6/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.49it/s, Loss=7289.9009] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.44it/s]


Train Loss: 8332.7923
Val Loss: 8381.4564
MAE: 190.42

Epoch 7/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.53it/s, Loss=7030.3945] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.66it/s]


Train Loss: 7072.8830
Val Loss: 7461.7495
MAE: 171.75

Epoch 8/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.52it/s, Loss=6245.6543]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.58it/s]


Train Loss: 4965.8345
Val Loss: 5192.3348
MAE: 124.99

Epoch 9/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.46it/s, Loss=3681.1890]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.72it/s]


Train Loss: 3773.1540
Val Loss: 3606.7987
MAE: 92.84

Epoch 10/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.48it/s, Loss=2393.3779]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.57it/s]


Train Loss: 3159.9073
Val Loss: 3098.9756
MAE: 82.67

Epoch 11/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.52it/s, Loss=1396.4508]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.59it/s]


Train Loss: 2947.5262
Val Loss: 3013.1390
MAE: 79.94

Epoch 12/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.53it/s, Loss=2545.4194]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.48it/s]


Train Loss: 2838.2407
Val Loss: 2768.4688
MAE: 74.87

Epoch 13/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.52it/s, Loss=3589.2791]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.60it/s]


Train Loss: 2793.4855
Val Loss: 3229.7859
MAE: 84.37

Epoch 14/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.37it/s, Loss=1428.6729]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.61it/s]


Train Loss: 2487.2035
Val Loss: 2538.7700
MAE: 69.70

Epoch 15/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.60it/s, Loss=2851.4927]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.72it/s]


Train Loss: 2608.0847
Val Loss: 3071.9181
MAE: 81.89

Epoch 16/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.55it/s, Loss=1950.3076]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.68it/s]


Train Loss: 2385.1723
Val Loss: 2486.9775
MAE: 68.48

Epoch 17/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.58it/s, Loss=1615.8881]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.63it/s]


Train Loss: 2496.5160
Val Loss: 2509.6280
MAE: 69.44

Epoch 18/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.49it/s, Loss=2152.5427]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.66it/s]


Train Loss: 2324.5128
Val Loss: 3304.7643
MAE: 86.08

Epoch 19/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.52it/s, Loss=1859.8516]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.68it/s]


Train Loss: 2095.4323
Val Loss: 2286.5796
MAE: 64.52

Epoch 20/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.46it/s, Loss=2066.9907]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.65it/s]


Train Loss: 2333.1052
Val Loss: 2695.4604
MAE: 74.18

Epoch 21/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.56it/s, Loss=2261.9521]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.50it/s]


Train Loss: 2179.2359
Val Loss: 2249.1090
MAE: 64.53

Epoch 22/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.51it/s, Loss=2182.2490]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.65it/s]


Train Loss: 2240.3247
Val Loss: 2836.9266
MAE: 75.27

Epoch 23/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.47it/s, Loss=1446.5403]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.53it/s]


Train Loss: 2046.7043
Val Loss: 2138.6093
MAE: 62.00

Epoch 24/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.40it/s, Loss=3039.7432]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.53it/s]


Train Loss: 2263.7368
Val Loss: 2653.1952
MAE: 73.37

Epoch 25/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.49it/s, Loss=1897.3691]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.66it/s]


Train Loss: 2049.3667
Val Loss: 2271.7062
MAE: 63.93

Epoch 26/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.54it/s, Loss=1840.9064]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.75it/s]


Train Loss: 2214.9147
Val Loss: 3012.9296
MAE: 80.01

Epoch 27/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.53it/s, Loss=2094.4614]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.66it/s]


Train Loss: 1915.2849
Val Loss: 2072.4470
MAE: 60.07

Epoch 28/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.53it/s, Loss=3338.6953]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.47it/s]


Train Loss: 2232.6491
Val Loss: 2616.8171
MAE: 71.86

Epoch 29/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.52it/s, Loss=2330.6382]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.53it/s]


Train Loss: 1983.3744
Val Loss: 2062.5206
MAE: 59.64

Epoch 30/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.48it/s, Loss=2038.4250]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.58it/s]


Train Loss: 2134.1820
Val Loss: 2796.7276
MAE: 75.07

Epoch 31/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.57it/s, Loss=996.3857] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.59it/s]


Train Loss: 1812.5660
Val Loss: 2091.8806
MAE: 60.41

Epoch 32/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.50it/s, Loss=2217.8608]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.73it/s]


Train Loss: 1990.8698
Val Loss: 2499.2729
MAE: 70.51

Epoch 33/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.55it/s, Loss=1992.0198]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.56it/s]


Train Loss: 1941.7687
Val Loss: 2216.7550
MAE: 62.45

Epoch 34/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.56it/s, Loss=2561.9055]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.59it/s]


Train Loss: 1821.1843
Val Loss: 2275.5338
MAE: 64.55

Epoch 35/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.48it/s, Loss=2569.1079]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.59it/s]


Train Loss: 1839.2868
Val Loss: 2906.0326
MAE: 78.39

Epoch 36/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.47it/s, Loss=2263.5874]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.53it/s]


Train Loss: 1709.4974
Val Loss: 1982.4431
MAE: 57.70

Epoch 37/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.51it/s, Loss=1369.8840]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.64it/s]


Train Loss: 1733.9051
Val Loss: 2381.8116
MAE: 66.48

Epoch 38/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.51it/s, Loss=1288.5786]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.47it/s]


Train Loss: 1811.4912
Val Loss: 2180.0294
MAE: 61.85

Epoch 39/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.53it/s, Loss=2111.2681]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.56it/s]


Train Loss: 1729.3645
Val Loss: 2212.7875
MAE: 62.56

Epoch 40/40


Training: 100%|██████████| 87/87 [00:13<00:00,  6.36it/s, Loss=1701.7386]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.66it/s]

Train Loss: 1793.5782
Val Loss: 3240.4079
MAE: 84.43

Training completed!
Best validation loss: 1982.4431

Experiment 1 (Huber Loss) completed! Results saved to: ../experiments/nutrition5k_experiments/inceptionv3_image_volume_huber50.0_20251024_144307





In [11]:
# EXPERIMENT 2: DEEPER VOLUME HEAD
# Try more complex volume processing with deeper neural networks

# First, let's create an enhanced model with deeper volume head
import sys
sys.path.append('/data/projects/punim0478/setiawand/Computer-Vision-COMP90086/src')

# We need to modify the RegressionHeadWithVolume class
class DeepRegressionHeadWithVolume(nn.Module):
    """
    Enhanced regression head with deeper volume processing
    """
    
    def __init__(self, in_channels: int = 2048, dropout_rate: float = 0.4):
        super().__init__()
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        
        # Volume preprocessing network
        self.volume_transform = nn.Sequential(
            nn.Linear(1, 16),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate * 0.5),
            nn.Linear(16, 8),
            nn.ReLU(inplace=True),
            nn.Linear(8, 4),
            nn.ReLU(inplace=True)
        )
        
        # Deeper FC layers for feature + volume fusion
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_channels + 4, 256),  # +4 for processed volume
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate),
            nn.Linear(256, 128),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate * 0.7),
            nn.Linear(128, 64),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate * 0.5),
            nn.Linear(64, 1)
        )
    
    def forward(self, features, volume):
        """
        Args:
            features: Feature maps from backbone (B, 2048, H, W)
            volume: Volume estimates (B, 1)
        
        Returns:
            Predicted calories (B, 1)
        """
        # Global average pooling
        x = self.avgpool(features)  # (B, 2048, 1, 1)
        x = torch.flatten(x, 1)  # (B, 2048)
        
        # Process volume through deeper network
        volume_processed = self.volume_transform(volume)  # (B, 4)
        
        # Concatenate processed volume
        x = torch.cat([x, volume_processed], dim=1)  # (B, 2052)
        
        # Predict calories
        x = self.fc_layers(x)  # (B, 1)
        
        return x

# Create a version of our model with the deeper volume head
class DeepVolumeNutrition5kModel(nn.Module):
    """
    Nutrition5k model with deeper volume processing
    """
    
    def __init__(
        self,
        fusion: str = 'image_volume',
        fusion_channels: int = 2048,
        dropout_rate: float = 0.4,
        pretrained: bool = False,
        use_volume: bool = True
    ):
        super().__init__()
        
        self.use_volume = use_volume
        self.fusion = fusion
        
        if fusion == 'image_volume':
            # Image+Volume variant with deep volume head
            from nutrition5k_inceptionv3_model import InceptionV3Encoder, VolumeEstimator
            
            self.rgb_encoder = InceptionV3Encoder(pretrained=pretrained, in_channels=3)
            self.volume_estimator = VolumeEstimator()
            self.regression_head = DeepRegressionHeadWithVolume(
                in_channels=self.rgb_encoder.out_channels,
                dropout_rate=dropout_rate
            )
        else:
            raise NotImplementedError(f"Deep volume head only implemented for image_volume fusion")
    
    def forward(self, rgb, depth):
        """
        Args:
            rgb: RGB images (B, 3, H, W)
            depth: Depth images (B, 1, H, W)
        
        Returns:
            calorie_pred: Predicted calories (B, 1)
        """
        # Calculate volume estimate
        volume = self.volume_estimator(depth)  # (B, 1)
        
        # Extract RGB features
        rgb_features = self.rgb_encoder(rgb)  # (B, 2048, H/32, W/32)
        
        # Predict with deep volume processing
        calorie_pred = self.regression_head(rgb_features, volume)
        
        return calorie_pred
    
    def get_num_parameters(self):
        """Get total number of trainable parameters"""
        return sum(p.numel() for p in self.parameters() if p.requires_grad)

print("Deep Volume Head model defined successfully!")
print("DeepRegressionHeadWithVolume: Processes volume through 4-layer network")
print("Main improvements: Volume → Linear(16) → Linear(8) → Linear(4) → Concat → Deep FC layers")


Deep Volume Head model defined successfully!
DeepRegressionHeadWithVolume: Processes volume through 4-layer network
Main improvements: Volume → Linear(16) → Linear(8) → Linear(4) → Concat → Deep FC layers


In [17]:
def train_nutrition5k_deep_volume(fusion_type='image_volume'):
    """
    Train the Nutrition5k model with deeper volume head processing
    """
    
    print("="*60)
    print("EXPERIMENT 2: DEEP VOLUME HEAD")
    print(f"TRAINING: Nutrition5k InceptionV3 + Deep Volume Processing")
    print("="*60)
    
    # Create datasets
    train_dataset = Nutrition5KDataset(
        csv_path=train_csv,
        data_root=DATA_ROOT,
        split='train',
        augment=False,
        img_size=IMG_SIZE,
    )
    
    val_dataset = Nutrition5KDataset(
        csv_path=val_csv,
        data_root=DATA_ROOT,
        split='val',
        augment=False,
        img_size=IMG_SIZE,
    )
    
    # Create data loaders
    train_loader = DataLoader(
        train_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=NUM_WORKERS,
        pin_memory=True if torch.cuda.is_available() else False,
        drop_last=True
    )
    
    val_loader = DataLoader(
        val_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=NUM_WORKERS,
        pin_memory=True if torch.cuda.is_available() else False
    )
    
    # Build model with deep volume head
    model = DeepVolumeNutrition5kModel(
        fusion=fusion_type,
        fusion_channels=FUSION_CHANNELS,
        dropout_rate=DROPOUT_RATE,
        pretrained=False,
        use_volume=True
    )
    model = model.to(device)
    
    print(f"Model parameters: {model.get_num_parameters():,}")
    print(f"Training samples: {len(train_dataset)}")
    print(f"Validation samples: {len(val_dataset)}")
    print("Deep Volume Processing: Volume → 16 → 8 → 4 → Concat with features")
    
    # Loss function (using MSE for comparison)
    criterion = nn.MSELoss()
    
    # Optimizer
    optimizer = optim.AdamW(
        model.parameters(),
        lr=LEARNING_RATE,
        weight_decay=WEIGHT_DECAY
    )
    
    print(f"Learning rate: {LEARNING_RATE}")
    print(f"Weight decay: {WEIGHT_DECAY}")
    
    # Learning rate scheduler
    steps_per_epoch = len(train_loader)
    total_steps = NUM_EPOCHS * steps_per_epoch
    warmup_steps = int(total_steps * WARMUP_RATIO)
    
    scheduler = get_warmup_cosine_scheduler(
        optimizer, 
        warmup_steps=warmup_steps, 
        total_steps=total_steps,
        min_lr_ratio=MIN_LR_RATIO
    )
    
    # Create experiment directory
    exp_name = f"inceptionv3_{fusion_type}_deepvolume_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    exp_dir = os.path.join(OUTPUT_DIR, 'nutrition5k_experiments', exp_name)
    os.makedirs(exp_dir, exist_ok=True)
    
    # Save experiment configuration
    config = {
        'fusion': fusion_type,
        'use_volume': True,
        'volume_head': 'deep',
        'volume_processing': 'Volume -> Linear(16) -> Linear(8) -> Linear(4)',
        'pretrained': False,
        'dropout_rate': DROPOUT_RATE,
        'fusion_channels': FUSION_CHANNELS,
        'learning_rate': LEARNING_RATE,
        'weight_decay': WEIGHT_DECAY,
        'batch_size': BATCH_SIZE,
        'img_size': IMG_SIZE,
        'num_epochs': NUM_EPOCHS
    }
    
    with open(os.path.join(exp_dir, 'config.json'), 'w') as f:
        json.dump(config, f, indent=4)
    
    # Create trainer
    trainer = Trainer(
        model=model,
        train_loader=train_loader,
        val_loader=val_loader,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        device=device,
        output_dir=exp_dir,
        early_stopping_patience=EARLY_STOPPING_PATIENCE,
        scheduler_step_on_batch=False
    )
    
    # Train the model
    trainer.train(NUM_EPOCHS)
    
    print(f"\nExperiment 2 (Deep Volume Head) completed! Results saved to: {exp_dir}")
    return trainer.best_metrics

# Run Deep Volume Head experiment
print("Starting Deep Volume Head Experiment...")
deep_volume_results = train_nutrition5k_deep_volume(fusion_type='image_volume')


Starting Deep Volume Head Experiment...
EXPERIMENT 2: DEEP VOLUME HEAD
TRAINING: Nutrition5k InceptionV3 + Deep Volume Processing
Loaded 2804 valid samples out of 2805
Loaded 495 valid samples out of 495
Model parameters: 22,352,557
Training samples: 2804
Validation samples: 495
Deep Volume Processing: Volume → 16 → 8 → 4 → Concat with features
Learning rate: 0.0003
Weight decay: 1e-06
Starting training for 45 epochs...

Epoch 1/45


Training: 100%|██████████| 87/87 [00:14<00:00,  6.21it/s, Loss=80366.6875] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.37it/s]


Train Loss: 99205.7511
Val Loss: 107435.8311
MAE: 240.67

Epoch 2/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.30it/s, Loss=42767.6406] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.55it/s]


Train Loss: 62462.6107
Val Loss: 26640.4976
MAE: 111.19

Epoch 3/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.48it/s, Loss=19693.2227]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.44it/s]


Train Loss: 17812.8964
Val Loss: 18368.4536
MAE: 93.13

Epoch 4/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.42it/s, Loss=6996.1182] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.52it/s]


Train Loss: 14158.8240
Val Loss: 13956.9041
MAE: 84.27

Epoch 5/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.41it/s, Loss=36037.9648]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.53it/s]


Train Loss: 17312.7492
Val Loss: 16704.5676
MAE: 89.29

Epoch 6/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.32it/s, Loss=10720.8262]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.54it/s]


Train Loss: 14583.0168
Val Loss: 19564.9041
MAE: 93.75

Epoch 7/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.46it/s, Loss=10871.3320]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.46it/s]


Train Loss: 12611.0272
Val Loss: 14169.0002
MAE: 80.21

Epoch 8/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.49it/s, Loss=9407.8428] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.61it/s]


Train Loss: 15297.1831
Val Loss: 20645.2300
MAE: 97.14

Epoch 9/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.46it/s, Loss=19601.9004]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.57it/s]


Train Loss: 15043.8423
Val Loss: 17701.0334
MAE: 86.46

Epoch 10/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.43it/s, Loss=13169.5557]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.59it/s]


Train Loss: 9831.7051
Val Loss: 10827.0071
MAE: 70.75

Epoch 11/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.31it/s, Loss=8325.3926] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.46it/s]


Train Loss: 9250.7783
Val Loss: 11669.3208
MAE: 71.77

Epoch 12/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.33it/s, Loss=7180.2148] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.63it/s]


Train Loss: 9096.3642
Val Loss: 13502.6510
MAE: 75.82

Epoch 13/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.51it/s, Loss=21630.8555]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.49it/s]


Train Loss: 11536.0411
Val Loss: 16316.4424
MAE: 88.31

Epoch 14/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.47it/s, Loss=8692.0693] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.54it/s]


Train Loss: 10891.4717
Val Loss: 13009.5298
MAE: 79.27

Epoch 15/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.41it/s, Loss=6204.7344] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.34it/s]


Train Loss: 9798.0848
Val Loss: 11309.1967
MAE: 71.77

Epoch 16/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.40it/s, Loss=6890.5459] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.50it/s]


Train Loss: 7931.4332
Val Loss: 9796.1152
MAE: 63.66

Epoch 17/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.46it/s, Loss=4032.2761] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.50it/s]


Train Loss: 7630.1566
Val Loss: 10407.0497
MAE: 65.29

Epoch 18/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.38it/s, Loss=3764.5483] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.42it/s]


Train Loss: 5977.9989
Val Loss: 8238.3538
MAE: 58.84

Epoch 19/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.51it/s, Loss=10124.8330]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.51it/s]


Train Loss: 9647.7231
Val Loss: 25826.2465
MAE: 103.16

Epoch 20/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.44it/s, Loss=7833.0483] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.44it/s]


Train Loss: 7892.4448
Val Loss: 9729.6092
MAE: 63.32

Epoch 21/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.51it/s, Loss=8804.7959] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.57it/s]


Train Loss: 6628.5629
Val Loss: 9416.9252
MAE: 62.84

Epoch 22/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.41it/s, Loss=9127.1973] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.14it/s]


Train Loss: 6950.6162
Val Loss: 11039.9025
MAE: 66.52

Epoch 23/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.45it/s, Loss=3264.8909] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.55it/s]


Train Loss: 6053.4310
Val Loss: 8304.7925
MAE: 57.59

Epoch 24/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.45it/s, Loss=4391.6738] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.42it/s]


Train Loss: 9047.5831
Val Loss: 11573.5070
MAE: 72.88

Epoch 25/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.53it/s, Loss=3230.2251] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.64it/s]


Train Loss: 6986.5248
Val Loss: 8468.6087
MAE: 58.40

Epoch 26/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.53it/s, Loss=3686.1116] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.56it/s]


Train Loss: 7756.2643
Val Loss: 10940.5167
MAE: 68.32

Epoch 27/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.41it/s, Loss=5126.8564] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.59it/s]


Train Loss: 6363.6007
Val Loss: 8172.4353
MAE: 59.49

Epoch 28/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.56it/s, Loss=8081.7939] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.51it/s]


Train Loss: 8297.1073
Val Loss: 11520.4989
MAE: 69.72

Epoch 29/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.65it/s, Loss=5672.3999] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.56it/s]


Train Loss: 6875.8716
Val Loss: 8196.6327
MAE: 57.72

Epoch 30/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.59it/s, Loss=3094.8096] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.59it/s]


Train Loss: 7093.3065
Val Loss: 11581.6989
MAE: 69.27

Epoch 31/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.35it/s, Loss=5388.1133] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.54it/s]


Train Loss: 6088.3377
Val Loss: 8285.9584
MAE: 58.98

Epoch 32/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.37it/s, Loss=4404.3076] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.56it/s]


Train Loss: 7843.6039
Val Loss: 9856.5440
MAE: 65.92

Epoch 33/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.41it/s, Loss=7536.9897] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.69it/s]


Train Loss: 6096.0412
Val Loss: 7832.8084
MAE: 57.39

Epoch 34/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.32it/s, Loss=9356.6445] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.60it/s]


Train Loss: 7243.3942
Val Loss: 15842.8336
MAE: 79.96

Epoch 35/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.47it/s, Loss=6985.7808] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.57it/s]


Train Loss: 6834.2551
Val Loss: 9833.3923
MAE: 66.18

Epoch 36/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.38it/s, Loss=7727.5576] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.60it/s]


Train Loss: 6154.0865
Val Loss: 10103.0405
MAE: 62.84

Epoch 37/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.56it/s, Loss=3225.5237] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.54it/s]


Train Loss: 4738.9881
Val Loss: 8308.3131
MAE: 57.40

Epoch 38/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.45it/s, Loss=4491.5337] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.66it/s]


Train Loss: 6319.1628
Val Loss: 19218.7817
MAE: 99.64

Epoch 39/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.55it/s, Loss=2597.2593] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.30it/s]


Train Loss: 5711.8545
Val Loss: 7717.9986
MAE: 57.72

Epoch 40/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.54it/s, Loss=13540.6094]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.68it/s]


Train Loss: 7226.7803
Val Loss: 13784.5108
MAE: 75.41

Epoch 41/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.61it/s, Loss=7317.3413] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.39it/s]


Train Loss: 6857.2523
Val Loss: 11458.6540
MAE: 69.23

Epoch 42/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.45it/s, Loss=1913.9406] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.61it/s]


Train Loss: 5378.5506
Val Loss: 8467.2728
MAE: 57.27

Epoch 43/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.44it/s, Loss=6208.5449] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.51it/s]


Train Loss: 5861.9913
Val Loss: 9621.0563
MAE: 63.83

Epoch 44/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.30it/s, Loss=5605.9072] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.48it/s]


Train Loss: 5118.7139
Val Loss: 7799.6044
MAE: 55.56

Epoch 45/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.46it/s, Loss=7294.8413] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.59it/s]

Train Loss: 6115.0432
Val Loss: 8104.9309
MAE: 58.43

Training completed!
Best validation loss: 7717.9986

Experiment 2 (Deep Volume Head) completed! Results saved to: ../experiments/nutrition5k_experiments/inceptionv3_image_volume_deepvolume_20251024_152918





In [None]:
# EXPERIMENT 3: HYPERPARAMETER TUNING
# Grid search for optimal learning rates, dropout, and weight decay

def train_nutrition5k_hyperparam_tuning():
    """
    Run hyperparameter tuning experiments with different configurations
    """
    
    print("="*60)
    print("EXPERIMENT 3: HYPERPARAMETER TUNING")
    print("Testing different LR, Dropout, and Weight Decay combinations")
    print("="*60)
    
    # Hyperparameter grid
    hyperparam_configs = [
        # [learning_rate, dropout_rate, weight_decay, description]
        [1e-4, 0.3, 1e-5, "Lower LR + Lower Dropout + Higher WD"],
        [1e-4, 0.4, 1e-6, "Lower LR + Current Dropout + Current WD"],
        [1e-4, 0.5, 1e-7, "Lower LR + Higher Dropout + Lower WD"],
        [3e-4, 0.3, 1e-5, "Current LR + Lower Dropout + Higher WD"],
        [3e-4, 0.5, 1e-7, "Current LR + Higher Dropout + Lower WD"],
        [7e-4, 0.4, 1e-6, "Higher LR + Current Dropout + Current WD"],
    ]
    
    results = []
    
    for i, (lr, dropout, wd, desc) in enumerate(hyperparam_configs):
        print(f"\n{'='*50}")
        print(f"HYPERPARAMETER CONFIG {i+1}/{len(hyperparam_configs)}")
        print(f"Description: {desc}")
        print(f"Learning Rate: {lr}")
        print(f"Dropout Rate: {dropout}")
        print(f"Weight Decay: {wd}")
        print(f"{'='*50}")
        
        # Create datasets
        train_dataset = Nutrition5KDataset(
            csv_path=train_csv,
            data_root=DATA_ROOT,
            split='train',
            augment=False,
            img_size=IMG_SIZE,
        )
        
        val_dataset = Nutrition5KDataset(
            csv_path=val_csv,
            data_root=DATA_ROOT,
            split='val',
            augment=False,
            img_size=IMG_SIZE,
        )
        
        # Create data loaders
        train_loader = DataLoader(
            train_dataset,
            batch_size=BATCH_SIZE,
            shuffle=True,
            num_workers=NUM_WORKERS,
            pin_memory=True if torch.cuda.is_available() else False,
            drop_last=True
        )
        
        val_loader = DataLoader(
            val_dataset,
            batch_size=BATCH_SIZE,
            shuffle=False,
            num_workers=NUM_WORKERS,
            pin_memory=True if torch.cuda.is_available() else False
        )
        
        # Build model
        model = build_nutrition5k_model(
            fusion='image_volume',
            pretrained=False,
            dropout_rate=dropout,
            fusion_channels=FUSION_CHANNELS,
            use_volume=True,
        )
        model = model.to(device)
        
        print(f"Model parameters: {model.get_num_parameters():,}")
        
        # Loss function
        criterion = nn.MSELoss()
        
        # Optimizer with tuned parameters
        optimizer = optim.AdamW(
            model.parameters(),
            lr=lr,
            weight_decay=wd
        )
        
        # Learning rate scheduler
        steps_per_epoch = len(train_loader)
        total_steps = NUM_EPOCHS * steps_per_epoch
        warmup_steps = int(total_steps * WARMUP_RATIO)
        
        scheduler = get_warmup_cosine_scheduler(
            optimizer, 
            warmup_steps=warmup_steps, 
            total_steps=total_steps,
            min_lr_ratio=MIN_LR_RATIO
        )
        
        # Create experiment directory
        exp_name = f"inceptionv3_hyperparam_lr{lr}_dr{dropout}_wd{wd}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
        exp_dir = os.path.join(OUTPUT_DIR, 'nutrition5k_experiments', exp_name)
        os.makedirs(exp_dir, exist_ok=True)
        
        # Save experiment configuration
        config = {
            'experiment': 'hyperparameter_tuning',
            'config_index': i + 1,
            'description': desc,
            'fusion': 'image_volume',
            'use_volume': True,
            'pretrained': False,
            'dropout_rate': dropout,
            'fusion_channels': FUSION_CHANNELS,
            'learning_rate': lr,
            'weight_decay': wd,
            'batch_size': BATCH_SIZE,
            'img_size': IMG_SIZE,
            'num_epochs': NUM_EPOCHS
        }
        
        with open(os.path.join(exp_dir, 'config.json'), 'w') as f:
            json.dump(config, f, indent=4)
        
        # Create trainer
        trainer = Trainer(
            model=model,
            train_loader=train_loader,
            val_loader=val_loader,
            criterion=criterion,
            optimizer=optimizer,
            scheduler=scheduler,
            device=device,
            output_dir=exp_dir,
            early_stopping_patience=EARLY_STOPPING_PATIENCE,
            scheduler_step_on_batch=False
        )
        
        # Train the model
        trainer.train(NUM_EPOCHS)
        
        # Store results
        result = {
            'config_index': i + 1,
            'description': desc,
            'learning_rate': lr,
            'dropout_rate': dropout,
            'weight_decay': wd,
            'best_metrics': trainer.best_metrics,
            'experiment_dir': exp_dir
        }
        results.append(result)
        
        print(f"Config {i+1} completed! Best VAL MAE: {trainer.best_metrics.get('val_mae', 'N/A')}")
    
    # Print summary of all results
    print("\n" + "="*80)
    print("HYPERPARAMETER TUNING RESULTS SUMMARY")
    print("="*80)
    
    # Sort results by validation MAE
    results_sorted = sorted(results, key=lambda x: x['best_metrics'].get('val_mae', float('inf')))
    
    for i, result in enumerate(results_sorted):
        print(f"{i+1}. {result['description']}")
        print(f"   LR: {result['learning_rate']}, Dropout: {result['dropout_rate']}, WD: {result['weight_decay']}")
        print(f"   Best VAL MAE: {result['best_metrics'].get('val_mae', 'N/A')}")
        print(f"   Directory: {result['experiment_dir']}")
        print()
    
    print(f"BEST CONFIGURATION: {results_sorted[0]['description']}")
    print(f"Best VAL MAE: {results_sorted[0]['best_metrics'].get('val_mae', 'N/A')}")
    
    return results_sorted

# Run hyperparameter tuning
print("Starting Hyperparameter Tuning Experiment...")
print("This will train 6 different configurations - it will take a while!")
hyperparam_results = train_nutrition5k_hyperparam_tuning()


Starting Hyperparameter Tuning Experiment...
This will train 6 different configurations - it will take a while!
EXPERIMENT 3: HYPERPARAMETER TUNING
Testing different LR, Dropout, and Weight Decay combinations

HYPERPARAMETER CONFIG 1/6
Description: Lower LR + Lower Dropout + Higher WD
Learning Rate: 0.0001
Dropout Rate: 0.3
Weight Decay: 1e-05
Loaded 2804 valid samples out of 2805
Loaded 495 valid samples out of 495
Model parameters: 21,916,833
Starting training for 45 epochs...

Epoch 1/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.44it/s, Loss=102409.4922]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.64it/s]


Train Loss: 99224.4542
Val Loss: 107400.1443
MAE: 240.62

Epoch 2/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.45it/s, Loss=121750.7969]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.66it/s]


Train Loss: 96177.4820
Val Loss: 95536.2358
MAE: 225.45

Epoch 3/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.48it/s, Loss=125471.6641]
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.54it/s]


Train Loss: 91769.7808
Val Loss: 98484.9629
MAE: 229.50

Epoch 4/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.46it/s, Loss=57732.0547] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.46it/s]


Train Loss: 83916.4638
Val Loss: 84710.8484
MAE: 215.08

Epoch 5/45


Training: 100%|██████████| 87/87 [00:13<00:00,  6.51it/s, Loss=67963.1016] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  6.57it/s]

## 🧪 How to Run the Experiments

To run any of the experiments above, simply uncomment the appropriate line:

### Experiment 1: Huber Loss
```python
huber_results = train_nutrition5k_huber_loss(fusion_type='image_volume', huber_delta=50.0)
```
- **Expected improvement**: Better robustness to outliers
- **Runtime**: ~40 epochs x 13s = ~8.5 minutes

### Experiment 2: Deep Volume Head  
```python
deep_volume_results = train_nutrition5k_deep_volume(fusion_type='image_volume')
```
- **Expected improvement**: Better volume signal processing
- **Runtime**: Similar to baseline (~8.5 minutes)

### Experiment 3: Hyperparameter Tuning
```python  
hyperparam_results = train_nutrition5k_hyperparam_tuning()
```
- **Expected improvement**: Optimal LR/dropout/weight_decay combination
- **Runtime**: 6 configurations x 8.5 minutes = ~51 minutes

### 📊 Expected Results
Based on your current **MAE of 54.02**, these experiments should achieve:
- **Huber Loss**: 52-54 MAE (better stability)
- **Deep Volume**: 51-53 MAE (better volume processing)  
- **Hyperparameter Tuning**: 49-52 MAE (optimal settings)

### 🎯 Next Steps After Experiments
1. Compare results from all experiments
2. Use the best configuration for ensemble training
3. Apply test-time augmentation for final submission
