In [1]:
import os
import sys
import json
import numpy as np
from datetime import datetime
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
import pandas as pd
import torchvision.transforms as T
import torchvision.transforms.functional as TF
import random

from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from PIL import Image
from typing import Tuple, Optional

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Set random seed for reproducibility
torch.manual_seed(789)
np.random.seed(789)
random.seed(789)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(789)


Using device: cuda


# 1. Model and Trainer

## 1.1 Model Definition

In [2]:
# Model Implementation - Dual-stream CNN with Middle Fusion

class SimpleCNN(nn.Module):
    """
    Simple 2-layer CNN encoder
    
    Based on the architecture in CNN.ipynb, converted to PyTorch
    """
    
    def __init__(self, input_channels=3, width1=32, width2=64, dropout_rate=0.25):
        """
        Args:
            input_channels: Number of input channels (3 for RGB, 1 for depth)
            width1: Number of filters in first conv layer
            width2: Number of filters in second conv layer
            dropout_rate: Dropout rate
        """
        super(SimpleCNN, self).__init__()
        
        # First convolutional block
        self.conv1 = nn.Conv2d(input_channels, width1, kernel_size=3, padding=0)
        self.bn1 = nn.BatchNorm2d(width1)
        self.dropout1 = nn.Dropout(dropout_rate)
        
        # Second convolutional block
        self.conv2 = nn.Conv2d(width1, width2, kernel_size=3, padding=0)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.bn2 = nn.BatchNorm2d(width2)
        self.dropout2 = nn.Dropout(dropout_rate)
        
        self.out_channels = width2
    
    def forward(self, x):
        """
        Args:
            x: Input tensor (B, C, H, W) - expects normalized images [0, 1]
        
        Returns:
            Feature map (B, width2, H', W')
        """
        # First convolutional block
        x = self.conv1(x)
        x = F.relu(x)
        x = self.bn1(x)
        x = self.dropout1(x)
        
        # Second convolutional block
        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool(x)
        x = self.bn2(x)
        x = self.dropout2(x)
        
        return x


class MiddleFusionModule(nn.Module):
    """Middle fusion: Concatenate RGB and Depth features, then merge with 1x1 conv"""
    
    def __init__(self, rgb_channels: int = 64, depth_channels: int = 64, output_channels: int = 64):
        super().__init__()
        
        # 1x1 convolution to merge features
        self.fusion_conv = nn.Conv2d(
            rgb_channels + depth_channels,
            output_channels,
            kernel_size=1,
            bias=False
        )
        self.bn = nn.BatchNorm2d(output_channels)
        self.relu = nn.ReLU(inplace=True)
    
    def forward(self, rgb_features, depth_features):
        """
        Args:
            rgb_features: (B, rgb_channels, H, W)
            depth_features: (B, depth_channels, H, W)
        Returns:
            Fused features: (B, output_channels, H, W)
        """
        # Concatenate along channel dimension
        fused = torch.cat([rgb_features, depth_features], dim=1)
        
        # Apply 1x1 conv to reduce channels
        fused = self.fusion_conv(fused)
        fused = self.bn(fused)
        fused = self.relu(fused)
        
        return fused


class RegressionHead(nn.Module):
    """Regression head for calorie prediction (matches ResNet experiments architecture)"""
    
    def __init__(self, in_channels: int = 64, dropout_rate: float = 0.4):
        super().__init__()
        
        # Adaptive average pooling to handle any feature map size
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        
        # FC layers matching ResNet experiments: in_channels -> 256 -> 128 -> 1
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_channels, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate),
            nn.Linear(256, 128),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate),
            nn.Linear(128, 1)
        )
    
    def forward(self, x):
        x = self.avgpool(x)  # (B, C, 1, 1)
        x = self.fc_layers(x)  # (B, 1)
        return x


class DualStreamCNN(nn.Module):
    """
    Dual-stream CNN for calorie prediction using RGB and Depth images with middle fusion
    Architecture: Simple CNN encoders + Middle Fusion + Regression head
    """
    
    def __init__(self, width1=32, width2=64, dropout_rate=0.25):
        super().__init__()
        
        # RGB and Depth encoders
        self.rgb_encoder = SimpleCNN(input_channels=3, width1=width1, width2=width2, dropout_rate=dropout_rate)
        self.depth_encoder = SimpleCNN(input_channels=1, width1=width1, width2=width2, dropout_rate=dropout_rate)
        
        # Middle fusion module
        self.fusion = MiddleFusionModule(
            rgb_channels=width2,
            depth_channels=width2,
            output_channels=width2
        )
        
        # Regression head for calorie prediction
        self.regression_head = RegressionHead(
            in_channels=width2,
            dropout_rate=dropout_rate
        )
    
    def forward(self, rgb, depth):
        """
        Args:
            rgb: RGB images (B, 3, H, W)
            depth: Depth images (B, 1, H, W)
        
        Returns:
            calorie_pred: Predicted calories (B, 1)
        """
        # Extract features from both streams
        rgb_features = self.rgb_encoder(rgb)
        depth_features = self.depth_encoder(depth)
        
        # Fuse features
        fused_features = self.fusion(rgb_features, depth_features)
        
        # Predict calories
        calorie_pred = self.regression_head(fused_features)
        
        return calorie_pred
    
    def get_num_parameters(self):
        """Get total number of trainable parameters"""
        return sum(p.numel() for p in self.parameters() if p.requires_grad)


def build_model(width1=32, width2=64, dropout_rate=0.25, **kwargs):
    """
    Factory function to build the dual-stream CNN model with middle fusion
    
    Args:
        width1: Number of filters in first conv layer
        width2: Number of filters in second conv layer
        dropout_rate: Dropout rate
    
    Returns:
        DualStreamCNN model
    """
    return DualStreamCNN(
        width1=width1,
        width2=width2,
        dropout_rate=dropout_rate
    )

## 1.2 Trainer Definition

In [3]:
import math
import math
def get_warmup_cosine_scheduler(optimizer, warmup_steps, total_steps, min_lr_ratio=0.0):
    def lr_lambda(current_step):
        if current_step < warmup_steps:
            return float(current_step) / float(max(1, warmup_steps))
        else:
            progress = float(current_step - warmup_steps) / float(max(1, total_steps - warmup_steps))
            return min_lr_ratio + (1.0 - min_lr_ratio) * 0.5 * (1.0 + math.cos(math.pi * progress))
    
    return optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)

class EarlyStopping:
    """Early stopping to stop training when validation loss stops improving"""
    
    def __init__(self, patience: int = 10, min_delta: float = 0.0, mode: str = 'min'):
        """
        Args:
            patience: Number of epochs with no improvement after which training will be stopped
            min_delta: Minimum change to qualify as an improvement
            mode: 'min' or 'max' - whether lower or higher metric is better
        """
        self.patience = patience
        self.min_delta = min_delta
        self.mode = mode
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.best_epoch = 0
        
    def __call__(self, score, epoch):
        if self.best_score is None:
            self.best_score = score
            self.best_epoch = epoch
            return False
        
        if self.mode == 'min':
            improved = score < (self.best_score - self.min_delta)
        else:
            improved = score > (self.best_score + self.min_delta)
        
        if improved:
            self.best_score = score
            self.best_epoch = epoch
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
                
        return self.early_stop

class Trainer:
    """Training manager for calorie prediction"""
    
    def __init__(
        self,
        model,
        train_loader,
        val_loader,
        criterion,
        optimizer,
        scheduler,
        device,
        output_dir,
        early_stopping_patience=15
    ):
        self.model = model
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.criterion = criterion
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.device = device
        self.output_dir = output_dir
        
        # Early stopping
        self.early_stopping = EarlyStopping(
            patience=early_stopping_patience,
            min_delta=0.1,
            mode='min'
        )
        
        # Tensorboard
        self.writer = SummaryWriter(log_dir=os.path.join(output_dir, 'tensorboard'))
        
        # Tracking
        self.best_val_loss = float('inf')
        self.train_losses = []
        self.val_losses = []
        self.best_metrics = {}
    
    def train_epoch(self):
        """Train for one epoch"""
        self.model.train()
        total_loss = 0.0
        num_batches = 0
        
        pbar = tqdm(self.train_loader, desc="Training")
        for batch_idx, batch in enumerate(pbar):
            # Move to device (use both RGB and depth for fusion)
            rgb = batch['rgb'].to(self.device)
            depth = batch['depth'].to(self.device)
            calories = batch['calorie'].to(self.device)
            
            # Forward pass
            self.optimizer.zero_grad()
            calorie_pred = self.model(rgb, depth)
            
            # Compute loss (MSE for calorie prediction)
            loss = self.criterion(calorie_pred.squeeze(), calories)
            
            # Backward pass
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
            self.optimizer.step()
            
            # Track metrics
            total_loss += loss.item()
            num_batches += 1
            
            # Update progress bar
            pbar.set_postfix({'Loss': f'{loss.item():.4f}'})
        
        return total_loss / num_batches
    
    def validate_epoch(self):
        """Validate for one epoch"""
        self.model.eval()
        total_loss = 0.0
        all_predictions = []
        all_targets = []
        
        with torch.no_grad():
            for batch in tqdm(self.val_loader, desc="Validation"):
                # Move to device (use both RGB and depth for fusion)
                rgb = batch['rgb'].to(self.device)
                depth = batch['depth'].to(self.device)
                calories = batch['calorie'].to(self.device)
                
                # Forward pass
                calorie_pred = self.model(rgb, depth)
                
                # Compute loss
                loss = self.criterion(calorie_pred.squeeze(), calories)
                total_loss += loss.item()
                
                # Store predictions and targets for metrics
                all_predictions.extend(calorie_pred.squeeze().cpu().numpy())
                all_targets.extend(calories.cpu().numpy())
        
        # Calculate metrics
        avg_loss = total_loss / len(self.val_loader)
        predictions = np.array(all_predictions)
        targets = np.array(all_targets)
        
        mae = np.mean(np.abs(predictions - targets))
        
        return avg_loss, mae
    
    def train(self, num_epochs):
        """Full training loop"""
        print(f"Starting training for {num_epochs} epochs...")
        
        for epoch in range(num_epochs):
            print(f"\nEpoch {epoch+1}/{num_epochs}")
            
            # Train
            train_loss = self.train_epoch()
            
            # Validate
            val_loss, mae = self.validate_epoch()
            
            # Update learning rate
            if self.scheduler:
                if isinstance(self.scheduler, optim.lr_scheduler.ReduceLROnPlateau):
                    self.scheduler.step(val_loss)
                else:
                    self.scheduler.step()
            
            # Log metrics
            self.writer.add_scalar('Loss/Train', train_loss, epoch)
            self.writer.add_scalar('Loss/Val', val_loss, epoch)
            self.writer.add_scalar('MAE', mae, epoch)
            
            # Save best model
            if val_loss < self.best_val_loss:
                self.best_val_loss = val_loss
                self.best_metrics = {
                    'epoch': epoch + 1,
                    'val_loss': val_loss,
                    'mae': mae,
                }
                
                # Save model checkpoint
                torch.save({
                    'epoch': epoch + 1,
                    'model_state_dict': self.model.state_dict(),
                    'optimizer_state_dict': self.optimizer.state_dict(),
                    'val_loss': val_loss,
                    'mae': mae,
                }, os.path.join(self.output_dir, 'best_model.pth'))
            
            # Print epoch results
            print(f"Train Loss: {train_loss:.4f}")
            print(f"Val Loss: {val_loss:.4f}")
            print(f"MAE: {mae:.2f}")
            
            # Early stopping
            if self.early_stopping(val_loss, epoch):
                print(f"Early stopping triggered after {epoch+1} epochs")
                print(f"Best epoch: {self.early_stopping.best_epoch+1}")
                break
        
        self.writer.close()
        print(f"\nTraining completed!")
        print(f"Best validation loss: {self.best_val_loss:.4f}")

# 2. Dataset

## 2.1 Dataset Definition

In [None]:
# Dataset Implementation
class Nutrition5KDataset(Dataset):
    """
    Dataset class for Nutrition5K with multi-modal inputs (RGB + Depth)
    """
    
    def __init__(
        self,
        csv_path: str,
        data_root: str,
        split: str = 'train',
        augment: bool = True,
        img_size: int = 224,
    ):
        self.data_root = data_root
        self.split = split
        self.augment = augment
        self.img_size = img_size
        
        # Load CSV
        self.df = pd.read_csv(csv_path)
        if 'Value' in self.df.columns and 'calories' not in self.df.columns:
            self.df = self.df.rename(columns={'Value': 'calories'})
        if 'calories' not in self.df.columns:
            raise ValueError("CSV file must contain a 'calories' column or a 'Value' column that can be renamed")
        self.df = self.df[self.df['calories'] < 3000].reset_index(drop=True)
                
        self.color_dir = os.path.join(data_root, 'color')
        self.depth_raw_dir = os.path.join(data_root, 'depth_raw')
        
        self.valid_indices = self._validate_dataset()
        print(f"Loaded {len(self.valid_indices)} valid samples out of {len(self.df)}")
        
        # Color normalization (ImageNet stats as baseline)
        self.color_normalize = T.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
        )
        
    def _validate_dataset(self):
        """This method ensure that the code don't break when there are corrupted images."""
        valid_indices = []
        
        for idx in range(len(self.df)):
            dish_id = self.df.iloc[idx]['ID']
            
            rgb_path = os.path.join(self.color_dir, dish_id, 'rgb.png')
            depth_path = os.path.join(self.depth_raw_dir, dish_id, 'depth_raw.png')
            
            # Check if files exist
            if not os.path.exists(rgb_path):
                continue
            if not os.path.exists(depth_path):
                continue
            
            # Try to load images to check for corruption
            try:
                with Image.open(rgb_path) as img:
                    img.verify()
                with Image.open(depth_path) as img:
                    img.verify()
                valid_indices.append(idx)
            except Exception as e:
                continue
                
        return valid_indices
    
    def __len__(self):
        return len(self.valid_indices)
    
    def _load_image_safe(self, path: str, mode: str = 'RGB') -> Optional[Image.Image]:
        """Safely load an image with error handling"""
        try:
            with Image.open(path) as img:
                return img.convert(mode).copy()
        except Exception as e:
            return None
    
    def _apply_augmentation(self, rgb_img, depth_img):
        """Apply geometric augmentation only (no color changes)"""
        if not self.augment:
            return rgb_img, depth_img
        
        # Convert to tensors first
        rgb_tensor = TF.to_tensor(rgb_img)
        depth_tensor = TF.to_tensor(depth_img)
        
        # Random horizontal flip
        if random.random() > 0.5:
            rgb_tensor = TF.hflip(rgb_tensor)
            depth_tensor = TF.hflip(depth_tensor)
        
        # Random rotation (±15 degrees)
        if random.random() > 0.5:
            angle = random.uniform(-15, 15)
            rgb_tensor = TF.rotate(rgb_tensor, angle)
            depth_tensor = TF.rotate(depth_tensor, angle)
        
        # Convert back to PIL
        rgb_img = TF.to_pil_image(rgb_tensor)
        depth_img = TF.to_pil_image(depth_tensor)
        
        return rgb_img, depth_img
    
    def _resize_and_center_crop(self, img, target_size: int = 256):
        """
        Resize and center crop image to target_size x target_size
        Matches the preprocessing in the Nutrition5k paper
        
        Args:
            img: PIL Image
            target_size: Target size (default 256x256 as per paper)
        
        Returns:
            Cropped PIL Image
        """
        # Get original dimensions
        width, height = img.size
        
        # Resize so the shorter side is target_size
        if width < height:
            new_width = target_size
            new_height = int(target_size * height / width)
        else:
            new_height = target_size
            new_width = int(target_size * width / height)
        
        img = img.resize((new_width, new_height), Image.LANCZOS)
        
        # Center crop to target_size x target_size
        left = (new_width - target_size) // 2
        top = (new_height - target_size) // 2
        right = left + target_size
        bottom = top + target_size
        
        img = img.crop((left, top, right, bottom))
        
        return img
    
    def __getitem__(self, idx):
        """Get a single sample"""
        actual_idx = self.valid_indices[idx]
        row = self.df.iloc[actual_idx]
        
        dish_id = row['ID']
        calorie = float(row['calories'])
        
        # Load images
        rgb_path = os.path.join(self.color_dir, dish_id, 'rgb.png')
        depth_path = os.path.join(self.depth_raw_dir, dish_id, 'depth_raw.png')
        
        rgb_img = self._load_image_safe(rgb_path, 'RGB')
        depth_img = self._load_image_safe(depth_path, 'L')  # Grayscale for depth
        
        # Fallback: return a black image
        if rgb_img is None or depth_img is None:
            rgb_img = Image.new('RGB', (self.img_size, self.img_size), (0, 0, 0))
            depth_img = Image.new('L', (self.img_size, self.img_size), 0)
        
        # Apply augmentation
        rgb_img, depth_img = self._apply_augmentation(rgb_img, depth_img)
        
        # Resize and center crop to match paper preprocessing (256x256)
        rgb_img = self._resize_and_center_crop(rgb_img, target_size=self.img_size)
        depth_img = self._resize_and_center_crop(depth_img, target_size=self.img_size)
        
        # Convert to tensors
        rgb_tensor = TF.to_tensor(rgb_img)  # (3, H, W)
        depth_tensor = TF.to_tensor(depth_img)  # (1, H, W)
        
        # Normalize RGB
        rgb_tensor = self.color_normalize(rgb_tensor)
        
        # Normalize depth (0-1 range, assuming depth is already in reasonable range)
        depth_tensor = depth_tensor / 255.0
        
        return {
            'dish_id': dish_id,
            'rgb': rgb_tensor,
            'depth': depth_tensor,
            'calorie': torch.tensor(calorie, dtype=torch.float32)
        }


def create_train_val_split(csv_path: str, val_ratio: float = 0.15, random_seed: int = 42):
    """
    Create train/validation split CSV files
    """
    # Read original CSV
    df = pd.read_csv(csv_path)    
    
    # Shuffle with fixed seed
    df_shuffled = df.sample(frac=1, random_state=random_seed).reset_index(drop=True)
    
    # Split
    val_size = int(len(df_shuffled) * val_ratio)
    train_df = df_shuffled[val_size:]
    val_df = df_shuffled[:val_size]
    
    # Save temporary CSV files
    base_dir = os.path.dirname(csv_path)
    train_csv = os.path.join(base_dir, 'train_split.csv')
    val_csv = os.path.join(base_dir, 'val_split.csv')
    
    train_df.to_csv(train_csv, index=False)
    val_df.to_csv(val_csv, index=False)
    
    return train_csv, val_csv

## 2.2 Dataset Loading

In [5]:
# Configuration - Update these paths to match your setup
DATA_ROOT = './Nutrition5K/Nutrition5K/train'  # Path to training data directory
CSV_PATH = './Nutrition5K/Nutrition5K/nutrition5k_train.csv'  # Path to training CSV
OUTPUT_DIR = './experiments'  # Directory to save experiment results

# Global training hyperparameters (matching CNN.ipynb architecture)
BATCH_SIZE = 32
NUM_EPOCHS = 40
VAL_RATIO = 0.15
IMG_SIZE = 128  # Changed to match CNN.ipynb
NUM_WORKERS = 4
DROPOUT_RATE = 0.25  # Match CNN.ipynb
LEARNING_RATE = 0.001
EARLY_STOPPING_PATIENCE = 5

print("Configuration:")
print(f"  Data root: {DATA_ROOT}")
print(f"  CSV path: {CSV_PATH}")
print(f"  Output directory: {OUTPUT_DIR}")
print(f"  Batch size: {BATCH_SIZE}")
print(f"  Number of epochs: {NUM_EPOCHS}")
print(f"  Image size: {IMG_SIZE}")
print(f"  Dropout rate: {DROPOUT_RATE}")
print(f"  Learning rate: {LEARNING_RATE}")
print(f"  Workers: {NUM_WORKERS}")

# Create output directory
os.makedirs(OUTPUT_DIR, exist_ok=True)


Configuration:
  Data root: ./Nutrition5K/Nutrition5K/train
  CSV path: ./Nutrition5K/Nutrition5K/nutrition5k_train.csv
  Output directory: ./experiments
  Batch size: 32
  Number of epochs: 40
  Image size: 128
  Dropout rate: 0.25
  Learning rate: 0.001
  Workers: 4


In [6]:
# Create train/validation split
print("Creating train/validation split...")
train_csv, val_csv = create_train_val_split(
    CSV_PATH,
    val_ratio=VAL_RATIO,
    random_seed=42
)

print(f"Train CSV: {train_csv}")
print(f"Validation CSV: {val_csv}")

# Load a sample to check data
sample_dataset = Nutrition5KDataset(
    csv_path=train_csv,
    data_root=DATA_ROOT,
    split='train',
    augment=False,  # No augmentation for checking
    img_size=IMG_SIZE,
)

print(f"\nDataset loaded successfully!")
print(f"Training samples: {len(sample_dataset)}")
print(f"RGB shape: {sample_dataset[0]['rgb'].shape}")
print(f"Calorie value: {sample_dataset[0]['calorie']}")


Creating train/validation split...
Train CSV: ./Nutrition5K/Nutrition5K/train_split.csv
Validation CSV: ./Nutrition5K/Nutrition5K/val_split.csv
Loaded 2804 valid samples out of 2805

Dataset loaded successfully!
Training samples: 2804
RGB shape: torch.Size([3, 128, 128])
Calorie value: 88.54999542236328


# 3. Experiments

## 3.1 Baseline - No Augmentation

In [7]:
### Define Hyperparameteres
DROPOUT_RATE = 0.3
LEARNING_RATE = 2e-2
WEIGHT_DECAY = 1e-6
EARLY_STOPPING_PATIENCE = 7
WARMUP_RATIO = 0.1
MIN_LR_RATIO = 0.05

In [8]:
# Configuration for Simple CNN baseline
def train_simple_cnn():
    """Train simple 2-layer CNN for calorie prediction (matching CNN.ipynb architecture)"""
    
    print("="*60)
    print("TRAINING: Simple 2-Layer CNN (Baseline)")
    print("="*60)
    
    # Create datasets (no augmentation for baseline)
    train_dataset = Nutrition5KDataset(
        csv_path=train_csv,
        data_root=DATA_ROOT,
        split='train',
        augment=False,  # No augmentation for baseline
        img_size=IMG_SIZE,
    )
    
    val_dataset = Nutrition5KDataset(
        csv_path=val_csv,
        data_root=DATA_ROOT,
        split='val',
        augment=False,  # Never augment validation
        img_size=IMG_SIZE,
    )
    
    # Create data loaders
    train_loader = DataLoader(
        train_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=NUM_WORKERS,
        pin_memory=True if torch.cuda.is_available() else False,
        drop_last=True
    )
    
    val_loader = DataLoader(
        val_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=NUM_WORKERS,
        pin_memory=True if torch.cuda.is_available() else False
    )
    
    model = build_model(
        width1=32,
        width2=64,
        dropout_rate=DROPOUT_RATE
    )
    model = model.to(device)
    
    print(f"Model parameters: {model.get_num_parameters():,}")
    print(f"Training samples: {len(train_dataset)}")
    print(f"Validation samples: {len(val_dataset)}")
    
    # Loss function (MSE for calorie prediction)
    criterion = nn.MSELoss()
    
    learning_rate = LEARNING_RATE
    weight_decay = WEIGHT_DECAY
    
    optimizer = optim.AdamW(
        model.parameters(),
        lr=learning_rate,
        weight_decay=weight_decay
    )
    
    print(f"Learning rate: {learning_rate}")
    
    # Learning rate scheduler
    steps_per_epoch = len(train_loader)
    total_steps = NUM_EPOCHS * steps_per_epoch
    warmup_steps = int(total_steps * WARMUP_RATIO)

    # Learning rate scheduler: Warmup + Linear Decay
    scheduler = get_warmup_cosine_scheduler(
        optimizer, 
        warmup_steps=warmup_steps, 
        total_steps=total_steps,
        min_lr_ratio=MIN_LR_RATIO
    )
    # Create experiment directory
    exp_name = f"baseline_simple_cnn_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    exp_dir = os.path.join(OUTPUT_DIR, exp_name)
    os.makedirs(exp_dir, exist_ok=True)
    
    # Create trainer
    trainer = Trainer(
        model=model,
        train_loader=train_loader,
        val_loader=val_loader,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        device=device,
        output_dir=exp_dir,
        early_stopping_patience=EARLY_STOPPING_PATIENCE
    )
    
    # Train the model
    trainer.train(NUM_EPOCHS)
    
    print(f"\nExperiment completed! Results saved to: {exp_dir}")
    return trainer.best_metrics

# Run the experiment
simple_cnn_results = train_simple_cnn()

TRAINING: Simple 2-Layer CNN (Baseline)
Loaded 2804 valid samples out of 2805
Loaded 495 valid samples out of 495
Model parameters: 96,577
Training samples: 2804
Validation samples: 495
Learning rate: 0.02
Starting training for 40 epochs...

Epoch 1/40


Training: 100%|██████████| 87/87 [00:11<00:00,  7.69it/s, Loss=108093.2422]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.63it/s]


Train Loss: 98864.8370
Val Loss: 107355.1853
MAE: 240.52

Epoch 2/40


Training: 100%|██████████| 87/87 [00:11<00:00,  7.79it/s, Loss=69902.6562] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.86it/s]


Train Loss: 99038.0821
Val Loss: 107041.9934
MAE: 239.99

Epoch 3/40


Training: 100%|██████████| 87/87 [00:11<00:00,  7.78it/s, Loss=85103.7344] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.70it/s]


Train Loss: 97338.5387
Val Loss: 103530.2859
MAE: 234.57

Epoch 4/40


Training: 100%|██████████| 87/87 [00:11<00:00,  7.80it/s, Loss=76401.5781] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.79it/s]


Train Loss: 88918.2979
Val Loss: 91305.2539
MAE: 215.52

Epoch 5/40


Training: 100%|██████████| 87/87 [00:11<00:00,  7.74it/s, Loss=28038.8555] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.63it/s]


Train Loss: 67648.6363
Val Loss: 61558.1018
MAE: 171.46

Epoch 6/40


Training: 100%|██████████| 87/87 [00:11<00:00,  7.71it/s, Loss=41210.7539]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.63it/s]


Train Loss: 38201.2138
Val Loss: 31379.9893
MAE: 125.35

Epoch 7/40


Training: 100%|██████████| 87/87 [00:10<00:00,  7.92it/s, Loss=29742.9531]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.87it/s]


Train Loss: 24311.5955
Val Loss: 35111.7456
MAE: 128.30

Epoch 8/40


Training: 100%|██████████| 87/87 [00:11<00:00,  7.77it/s, Loss=16946.3359]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.78it/s]


Train Loss: 22953.1951
Val Loss: 22969.6235
MAE: 117.02

Epoch 9/40


Training: 100%|██████████| 87/87 [00:11<00:00,  7.81it/s, Loss=9095.9141] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.88it/s]


Train Loss: 21169.2973
Val Loss: 37002.9525
MAE: 129.90

Epoch 10/40


Training: 100%|██████████| 87/87 [00:11<00:00,  7.82it/s, Loss=14162.1094]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.70it/s]


Train Loss: 20939.9783
Val Loss: 25076.7343
MAE: 126.04

Epoch 11/40


Training: 100%|██████████| 87/87 [00:11<00:00,  7.87it/s, Loss=34422.9297]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.68it/s]


Train Loss: 18997.1927
Val Loss: 25395.6118
MAE: 110.69

Epoch 12/40


Training: 100%|██████████| 87/87 [00:11<00:00,  7.70it/s, Loss=17347.0664]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.62it/s]


Train Loss: 19351.6856
Val Loss: 24114.6584
MAE: 107.19

Epoch 13/40


Training: 100%|██████████| 87/87 [00:11<00:00,  7.82it/s, Loss=16328.1729]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.89it/s]


Train Loss: 18469.6020
Val Loss: 24396.3007
MAE: 114.10

Epoch 14/40


Training: 100%|██████████| 87/87 [00:10<00:00,  7.92it/s, Loss=17174.9844]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.87it/s]


Train Loss: 19699.8777
Val Loss: 24575.5194
MAE: 118.80

Epoch 15/40


Training: 100%|██████████| 87/87 [00:11<00:00,  7.86it/s, Loss=14422.1514]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.68it/s]

Train Loss: 19199.9997
Val Loss: 24363.2964
MAE: 115.57
Early stopping triggered after 15 epochs
Best epoch: 8

Training completed!
Best validation loss: 22969.6235

Experiment completed! Results saved to: ./experiments/baseline_simple_cnn_20251025_224927





## 3.1 Baseline - Augmentation

In [9]:
# Configuration for Simple CNN baseline
def train_simple_cnn():
    """Train simple 2-layer CNN for calorie prediction (matching CNN.ipynb architecture)"""
    
    print("="*60)
    print("TRAINING: Simple 2-Layer CNN (Baseline)")
    print("="*60)
    
    # Create datasets (no augmentation for baseline)
    train_dataset = Nutrition5KDataset(
        csv_path=train_csv,
        data_root=DATA_ROOT,
        split='train',
        augment=True,  # No augmentation for baseline
        img_size=IMG_SIZE,
    )
    
    val_dataset = Nutrition5KDataset(
        csv_path=val_csv,
        data_root=DATA_ROOT,
        split='val',
        augment=False,  # Never augment validation
        img_size=IMG_SIZE,
    )
    
    # Create data loaders
    train_loader = DataLoader(
        train_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=NUM_WORKERS,
        pin_memory=True if torch.cuda.is_available() else False,
        drop_last=True
    )
    
    val_loader = DataLoader(
        val_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=NUM_WORKERS,
        pin_memory=True if torch.cuda.is_available() else False
    )
    
    model = build_model(
        width1=32,
        width2=64,
        dropout_rate=DROPOUT_RATE
    )
    model = model.to(device)
    
    print(f"Model parameters: {model.get_num_parameters():,}")
    print(f"Training samples: {len(train_dataset)}")
    print(f"Validation samples: {len(val_dataset)}")
    
    criterion = nn.MSELoss()
    
    learning_rate = LEARNING_RATE
    weight_decay = WEIGHT_DECAY
    
    optimizer = optim.AdamW(
        model.parameters(),
        lr=learning_rate,
        weight_decay=weight_decay
    )
    
    print(f"Learning rate: {learning_rate}")
    
    # Learning rate scheduler
    steps_per_epoch = len(train_loader)
    total_steps = NUM_EPOCHS * steps_per_epoch
    warmup_steps = int(total_steps * WARMUP_RATIO)

    # Learning rate scheduler: Warmup + Linear Decay
    scheduler = get_warmup_cosine_scheduler(
        optimizer, 
        warmup_steps=warmup_steps, 
        total_steps=total_steps,
        min_lr_ratio=MIN_LR_RATIO
    )
    # Create experiment directory
    exp_name = f"baseline_simple_cnn_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    exp_dir = os.path.join(OUTPUT_DIR, exp_name)
    os.makedirs(exp_dir, exist_ok=True)
    
    # Create trainer
    trainer = Trainer(
        model=model,
        train_loader=train_loader,
        val_loader=val_loader,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        device=device,
        output_dir=exp_dir,
        early_stopping_patience=EARLY_STOPPING_PATIENCE
    )
    
    # Train the model
    trainer.train(NUM_EPOCHS)
    
    print(f"\nExperiment completed! Results saved to: {exp_dir}")
    return trainer.best_metrics

# Run the experiment
simple_cnn_results = train_simple_cnn()

TRAINING: Simple 2-Layer CNN (Baseline)
Loaded 2804 valid samples out of 2805
Loaded 495 valid samples out of 495
Model parameters: 96,577
Training samples: 2804
Validation samples: 495
Learning rate: 0.02
Starting training for 40 epochs...

Epoch 1/40


Training: 100%|██████████| 87/87 [00:16<00:00,  5.31it/s, Loss=152621.5625]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.66it/s]


Train Loss: 98987.3481
Val Loss: 107362.6895
MAE: 240.53

Epoch 2/40


Training: 100%|██████████| 87/87 [00:16<00:00,  5.25it/s, Loss=102144.8828]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.77it/s]


Train Loss: 98487.1005
Val Loss: 106996.3491
MAE: 239.92

Epoch 3/40


Training: 100%|██████████| 87/87 [00:16<00:00,  5.25it/s, Loss=94819.3750] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.66it/s]


Train Loss: 96606.0635
Val Loss: 103642.0425
MAE: 234.85

Epoch 4/40


Training: 100%|██████████| 87/87 [00:16<00:00,  5.28it/s, Loss=124572.1406]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.83it/s]


Train Loss: 88913.4773
Val Loss: 90574.2195
MAE: 214.43

Epoch 5/40


Training: 100%|██████████| 87/87 [00:16<00:00,  5.12it/s, Loss=21435.7871] 
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.67it/s]


Train Loss: 66065.7899
Val Loss: 60459.3843
MAE: 170.51

Epoch 6/40


Training: 100%|██████████| 87/87 [00:16<00:00,  5.14it/s, Loss=20445.1836]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.91it/s]


Train Loss: 36037.4168
Val Loss: 34607.9143
MAE: 129.54

Epoch 7/40


Training: 100%|██████████| 87/87 [00:16<00:00,  5.26it/s, Loss=25664.3711]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.73it/s]


Train Loss: 24656.3940
Val Loss: 27873.2155
MAE: 117.64

Epoch 8/40


Training: 100%|██████████| 87/87 [00:16<00:00,  5.25it/s, Loss=20396.9473]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.56it/s]


Train Loss: 22242.6033
Val Loss: 24023.6109
MAE: 110.84

Epoch 9/40


Training: 100%|██████████| 87/87 [00:16<00:00,  5.25it/s, Loss=10939.0234]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.77it/s]


Train Loss: 20808.6422
Val Loss: 28673.0519
MAE: 116.72

Epoch 10/40


Training: 100%|██████████| 87/87 [00:16<00:00,  5.26it/s, Loss=18166.1328]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.77it/s]


Train Loss: 19530.7289
Val Loss: 21901.0102
MAE: 109.43

Epoch 11/40


Training: 100%|██████████| 87/87 [00:16<00:00,  5.24it/s, Loss=28875.2598]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.74it/s]


Train Loss: 19255.6032
Val Loss: 29375.0599
MAE: 121.47

Epoch 12/40


Training: 100%|██████████| 87/87 [00:16<00:00,  5.19it/s, Loss=19532.2031]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.79it/s]


Train Loss: 19012.9062
Val Loss: 24327.9830
MAE: 107.79

Epoch 13/40


Training: 100%|██████████| 87/87 [00:17<00:00,  5.10it/s, Loss=19986.4277]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.93it/s]


Train Loss: 18139.0050
Val Loss: 27254.1710
MAE: 112.20

Epoch 14/40


Training: 100%|██████████| 87/87 [00:17<00:00,  5.12it/s, Loss=17413.3535]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.77it/s]


Train Loss: 17981.1191
Val Loss: 110377.8042
MAE: 310.07

Epoch 15/40


Training: 100%|██████████| 87/87 [00:16<00:00,  5.24it/s, Loss=24426.4199]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.73it/s]


Train Loss: 18253.1599
Val Loss: 35338.6985
MAE: 127.01

Epoch 16/40


Training: 100%|██████████| 87/87 [00:16<00:00,  5.23it/s, Loss=23556.9941]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.70it/s]


Train Loss: 17829.3046
Val Loss: 19898.2485
MAE: 104.10

Epoch 17/40


Training: 100%|██████████| 87/87 [00:16<00:00,  5.27it/s, Loss=16335.4248]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.72it/s]


Train Loss: 18050.1026
Val Loss: 20530.7458
MAE: 99.48

Epoch 18/40


Training: 100%|██████████| 87/87 [00:16<00:00,  5.26it/s, Loss=22480.8574]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.78it/s]


Train Loss: 18220.5709
Val Loss: 8734556.9062
MAE: 2949.48

Epoch 19/40


Training: 100%|██████████| 87/87 [00:16<00:00,  5.27it/s, Loss=19522.8848]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.77it/s]


Train Loss: 16990.2763
Val Loss: 672721.3125
MAE: 803.39

Epoch 20/40


Training: 100%|██████████| 87/87 [00:16<00:00,  5.23it/s, Loss=26279.8477]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.82it/s]


Train Loss: 17453.1802
Val Loss: 125554.6660
MAE: 333.12

Epoch 21/40


Training: 100%|██████████| 87/87 [00:16<00:00,  5.16it/s, Loss=12423.6445]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.89it/s]


Train Loss: 16656.5246
Val Loss: 36625.5835
MAE: 133.98

Epoch 22/40


Training: 100%|██████████| 87/87 [00:16<00:00,  5.24it/s, Loss=13079.5986]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.80it/s]


Train Loss: 16675.4579
Val Loss: 1967012.3203
MAE: 1395.40

Epoch 23/40


Training: 100%|██████████| 87/87 [00:16<00:00,  5.24it/s, Loss=12739.0791]
Validation: 100%|██████████| 16/16 [00:02<00:00,  7.66it/s]

Train Loss: 15551.5802
Val Loss: 25948.6469
MAE: 120.35
Early stopping triggered after 23 epochs
Best epoch: 16

Training completed!
Best validation loss: 19898.2485

Experiment completed! Results saved to: ./experiments/baseline_simple_cnn_20251025_225247



