# Training Nutrition5K Calorie Predictor - ResNet-34

This notebook allows you to quickly train and experiment with different model architectures.

**Quick Start:** Just run all cells to train ResNet-34 with best config!


## 1. Setup


In [None]:
import sys
import os

# Add src to path
sys.path.insert(0, '../src')

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import json
from datetime import datetime

# Import our modules
from dataset import Nutrition5KDataset, create_train_val_split
from model import build_model, list_available_components
from train import MultiTaskLoss, EarlyStopping, Trainer

print("✓ Imports successful!")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")


In [None]:
# ============================================================================
# MODEL ARCHITECTURE - Change these to experiment!
# ============================================================================
ENCODER = 'resnet34'          # Options: 'resnet18', 'resnet34', 'resnet50'
FUSION = 'middle'             # Options: 'middle', 'middle_attention', 'additive'
REGRESSION_HEAD = 'standard'  # Options: 'standard', 'deep'
SEGMENTATION_HEAD = 'standard' # Options: 'standard', 'light'

# ============================================================================
# HYPERPARAMETERS - Best config from grid search
# ============================================================================
LEARNING_RATE = 0.0005
DROPOUT = 0.4
WEIGHT_DECAY = 0.0001
BATCH_SIZE = 64
CALORIE_WEIGHT = 1.0
SEG_WEIGHT = 0.5
FUSION_CHANNELS = 512

# ============================================================================
# TRAINING SETTINGS
# ============================================================================
NUM_EPOCHS = 50
EARLY_STOPPING_PATIENCE = 15
VAL_RATIO = 0.15
NUM_WORKERS = 4
IMG_SIZE = 224

# ============================================================================
# DATA PATHS
# ============================================================================
DATA_ROOT = '../Nutrition5K/Nutrition5K/train'
CSV_PATH = '../Nutrition5K/Nutrition5K/nutrition5k_train.csv'

# ============================================================================
# OUTPUT
# ============================================================================
EXPERIMENT_NAME = f"{ENCODER}_{FUSION}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
OUTPUT_DIR = f'../outputs/notebook_{EXPERIMENT_NAME}'

# Device
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print("Configuration:")
print(f"  Architecture: {ENCODER} + {FUSION} fusion")
print(f"  Heads: {REGRESSION_HEAD} (reg), {SEGMENTATION_HEAD} (seg)")
print(f"  Learning rate: {LEARNING_RATE}")
print(f"  Batch size: {BATCH_SIZE}")
print(f"  Device: {DEVICE}")
print(f"  Output: {OUTPUT_DIR}")


## 3. Show Available Components

See what architectures you can try!


In [None]:
list_available_components()


## 4. Load Data


In [None]:
print("Creating train/val split...")
train_csv, val_csv = create_train_val_split(
    CSV_PATH,
    val_ratio=VAL_RATIO,
    random_seed=42
)

print("\nLoading datasets...")
train_dataset = Nutrition5KDataset(
    csv_path=train_csv,
    data_root=DATA_ROOT,
    split='train',
    augment=True,
    img_size=IMG_SIZE
)

val_dataset = Nutrition5KDataset(
    csv_path=val_csv,
    data_root=DATA_ROOT,
    split='val',
    augment=False,
    img_size=IMG_SIZE
)

print(f"\n✓ Training samples: {len(train_dataset)}")
print(f"✓ Validation samples: {len(val_dataset)}")


### Visualize a Sample


In [None]:
# Load a sample
sample = train_dataset[0]

# Denormalize RGB for visualization
rgb = sample['rgb'].numpy()
mean = np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1)
std = np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1)
rgb = rgb * std + mean
rgb = np.clip(rgb, 0, 1)
rgb = np.transpose(rgb, (1, 2, 0))

# Plot
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

axes[0].imshow(rgb)
axes[0].set_title('RGB Image')
axes[0].axis('off')

axes[1].imshow(sample['depth'].numpy()[0], cmap='viridis')
axes[1].set_title('Depth Image')
axes[1].axis('off')

axes[2].imshow(sample['mask'].numpy()[0], cmap='gray')
axes[2].set_title('Segmentation Mask')
axes[2].axis('off')

plt.suptitle(f"Dish: {sample['dish_id']} | Calories: {sample['calorie'].item():.0f} kcal", fontsize=14)
plt.tight_layout()
plt.show()


## 5. Create Data Loaders


In [None]:
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_WORKERS,
    pin_memory=True if torch.cuda.is_available() else False,
    drop_last=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True if torch.cuda.is_available() else False
)

print(f"✓ Train batches: {len(train_loader)}")
print(f"✓ Val batches: {len(val_loader)}")


## 6. Build Model


In [None]:
print(f"Building {ENCODER} model...")

model = build_model(
    encoder=ENCODER,
    fusion=FUSION,
    regression_head=REGRESSION_HEAD,
    segmentation_head=SEGMENTATION_HEAD,
    pretrained=False,
    dropout_rate=DROPOUT,
    fusion_channels=FUSION_CHANNELS
)

model = model.to(DEVICE)

print(f"\n✓ Model created!")
print(f"  Total parameters: {model.get_num_parameters():,}")
print(f"  Configuration: {model.get_config()}")

# Test forward pass
rgb_test = torch.randn(2, 3, IMG_SIZE, IMG_SIZE).to(DEVICE)
depth_test = torch.randn(2, 1, IMG_SIZE, IMG_SIZE).to(DEVICE)
cal_pred, seg_pred = model(rgb_test, depth_test)
print(f"\n✓ Forward pass test successful!")
print(f"  Calorie output: {cal_pred.shape}")
print(f"  Segmentation output: {seg_pred.shape}")


## 7. Setup Training Components


In [None]:
# Loss function
criterion = MultiTaskLoss(
    calorie_weight=CALORIE_WEIGHT,
    seg_weight=SEG_WEIGHT
)

# Optimizer
optimizer = optim.AdamW(
    model.parameters(),
    lr=LEARNING_RATE,
    weight_decay=WEIGHT_DECAY
)

# Learning rate scheduler
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode='min',
    factor=0.5,
    patience=5
)

# Create output directory
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Save configuration
config = {
    'encoder': ENCODER,
    'fusion': FUSION,
    'regression_head': REGRESSION_HEAD,
    'segmentation_head': SEGMENTATION_HEAD,
    'learning_rate': LEARNING_RATE,
    'dropout': DROPOUT,
    'weight_decay': WEIGHT_DECAY,
    'batch_size': BATCH_SIZE,
    'calorie_weight': CALORIE_WEIGHT,
    'seg_weight': SEG_WEIGHT,
    'num_epochs': NUM_EPOCHS,
    'early_stopping_patience': EARLY_STOPPING_PATIENCE,
}

with open(os.path.join(OUTPUT_DIR, 'config.json'), 'w') as f:
    json.dump(config, f, indent=4)

print("✓ Training components ready!")
print(f"  Loss: Multi-task (calorie={CALORIE_WEIGHT}, seg={SEG_WEIGHT})")
print(f"  Optimizer: AdamW (lr={LEARNING_RATE}, wd={WEIGHT_DECAY})")
print(f"  Scheduler: ReduceLROnPlateau (patience=5)")


## 8. Train the Model

**This will take some time!** Progress bar will show training status.


In [None]:
# Create trainer
trainer = Trainer(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    device=DEVICE,
    output_dir=OUTPUT_DIR,
    early_stopping_patience=EARLY_STOPPING_PATIENCE
)

# Start training
print("\n" + "="*80)
print("STARTING TRAINING")
print("="*80)

trainer.train(NUM_EPOCHS)

print("\n" + "="*80)
print("TRAINING COMPLETE!")
print("="*80)
print(f"Best validation loss: {trainer.best_val_loss:.4f}")
print(f"Model saved to: {OUTPUT_DIR}/best_model.pth")


## 9. Training Summary

View TensorBoard for detailed curves:
```bash
tensorboard --logdir=../outputs/notebook_*
```


In [None]:
print("\nTraining Summary:")
print("="*60)
print(f"Architecture: {ENCODER} + {FUSION}")
print(f"Total Parameters: {model.get_num_parameters():,}")
print(f"Best Validation Loss: {trainer.best_val_loss:.4f}")
print(f"Training Samples: {len(train_dataset)}")
print(f"Validation Samples: {len(val_dataset)}")
print(f"Output Directory: {OUTPUT_DIR}")
print("="*60)

# Show best model path
best_model_path = os.path.join(OUTPUT_DIR, 'best_model.pth')
if os.path.exists(best_model_path):
    print(f"\n✓ Best model saved at: {best_model_path}")
    print(f"  File size: {os.path.getsize(best_model_path) / 1e6:.1f} MB")


## Next Steps

1. **Try different architectures**: Change ENCODER, FUSION, etc. in cell 4 and rerun from there
2. **View TensorBoard**: Run `tensorboard --logdir=../outputs/notebook_*`
3. **Generate test predictions**: Use `test_inference.py` with your best model
4. **Compare models**: Use `compare_architectures.py`

### Quick Experiments:
```python
# Try ResNet-50
ENCODER = 'resnet50'

# Try attention fusion
FUSION = 'middle_attention'

# Try deep head
REGRESSION_HEAD = 'deep'
```
