Restarted base (Python 3.12.7)

In [7]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# %% [markdown]
# # Food Nutrition Prediction Model
# This notebook implements a CNN-based model for predicting nutritional values from food images.

 ## 1. Imports and Configuration

In [8]:
import os
import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
import multiprocessing
from scipy import stats
import seaborn as sns

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.dummy import DummyRegressor

if __name__ == "__main__":
    multiprocessing.set_start_method('spawn', force=True)

 ## 2. Configuration and Hyperparameters

In [9]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
NUM_WORKERS = min(int(os.cpu_count() * 0.8), 1)
PIN_MEMORY = torch.cuda.is_available()

if torch.cuda.is_available():
    torch.backends.cudnn.benchmark = True

LOCAL_BASE_DIR = "/Data/aa/FoodCNN/datasets/nutrition5k"
IMAGERY_DIR_LOCAL_FULL = os.path.join(LOCAL_BASE_DIR, "imagery/realsense_overhead")
METADATA_FILE_CAFE1 = os.path.join(LOCAL_BASE_DIR, "metadata/dish_metadata_cafe1.csv")
METADATA_FILE_CAFE2 = os.path.join(LOCAL_BASE_DIR, "metadata/dish_metadata_cafe2.csv")
RGB_IMAGE_FILENAME = "rgb.png"

BATCH_SIZE = 32
LEARNING_RATE = 1e-2
NUM_EPOCHS = 50
TARGET_COLUMNS = ["calories", "weight", "fat", "carbs", "protein"]
RANDOM_STATE = 42
N_SPLITS = 5  # For cross-validation
CONFIDENCE_LEVEL = 0.95

 ## 3. Data Loading and Preprocessing

In [10]:
def parse_nutrition_csv(file_path):
    dishes = []
    ingredients_list = []

    with open(file_path, "r") as f:
        for line in f:
            parts = line.strip().split(",")
            if not parts[0].startswith("dish_"):
                continue

            dish_id = parts[0]
            dish_calories = float(parts[1])
            dish_weight = float(parts[2])
            dish_fat = float(parts[3])
            dish_carbs = float(parts[4])
            dish_protein = float(parts[5])

            dishes.append(
                {
                    "dish_id": dish_id,
                    "calories": dish_calories,
                    "weight": dish_weight,
                    "fat": dish_fat,
                    "carbs": dish_carbs,
                    "protein": dish_protein,
                }
            )

            ingredient_data = parts[6:]
            i = 0
            while i < len(ingredient_data) - 6:
                if not ingredient_data[i].startswith("ingr_"):
                    break

                ingredients_list.append(
                    {
                        "dish_id": dish_id,
                        "ingredient_id": ingredient_data[i],
                        "ingredient_name": ingredient_data[i + 1],
                        "amount": float(ingredient_data[i + 2]),
                        "calories": float(ingredient_data[i + 3]),
                        "fat": float(ingredient_data[i + 4]),
                        "carbs": float(ingredient_data[i + 5]),
                        "protein": float(ingredient_data[i + 6]),
                    }
                )
                i += 7

    dish_df = pd.DataFrame(dishes)
    ingredient_df = pd.DataFrame(ingredients_list)

    return dish_df, ingredient_df

class NutritionDataset(Dataset):
    def __init__(
        self,
        dish_ids,
        labels,
        imagery_dir,
        rgb_filename,
        transform=None,
        gpu_cache=False,
    ):
        self.dish_ids = dish_ids
        self.labels = labels
        self.imagery_dir = imagery_dir
        self.rgb_filename = rgb_filename
        self.transform = transform
        self.gpu_cache = gpu_cache and torch.cuda.is_available()
        
        self.image_cache = {}
        
        if self.gpu_cache:
            print(f"Caching {len(self.dish_ids)} images to GPU VRAM...")
            for dish_id in tqdm(self.dish_ids, desc="Caching images to GPU VRAM"):
                self._load_and_cache_image(dish_id)
            print("Image caching to GPU VRAM completed")

    def __len__(self):
        return len(self.dish_ids)

    def __getitem__(self, idx):
        dish_id = self.dish_ids[idx]
        
        if self.gpu_cache and dish_id in self.image_cache:
            image_tensor = self.image_cache[dish_id].clone()
        else:
            image = self._load_and_cache_image(dish_id)
            image_tensor = self.transform(image) if self.transform else image
        
        label_tensor = torch.tensor(self.labels[idx], dtype=torch.float32)
        
        return image_tensor, label_tensor
    
    def _load_and_cache_image(self, dish_id):
        img_path = os.path.join(self.imagery_dir, dish_id, self.rgb_filename)
        try:
            image = Image.open(img_path).convert("RGB")
            
            if self.gpu_cache:
                image_tensor = self.transform(image) if self.transform else transforms.ToTensor()(image)
                with torch.no_grad():
                    self.image_cache[dish_id] = image_tensor.to(DEVICE, non_blocking=True)
                return image
            
            return image
        except FileNotFoundError:
            print(f"ERROR: Image not found at {img_path} for dish_id {dish_id}")
            dummy_img = Image.new("RGB", (224, 224), color=(0, 0, 0))
            
            if self.gpu_cache:
                dummy_tensor = self.transform(dummy_img) if self.transform else transforms.ToTensor()(dummy_img)
                with torch.no_grad():
                    self.image_cache[dish_id] = dummy_tensor.to(DEVICE, non_blocking=True)
                
            return dummy_img

def check_memory_for_caching(num_images, image_size=224):
    try:
        gpu_caching_viable = False
        if torch.cuda.is_available():
            tensor_size_mb = num_images * image_size * image_size * 3 * 4 / (1024 * 1024)
            
            available_gpu_memory_mb = torch.cuda.get_device_properties(0).total_memory / (1024 * 1024)
            free_gpu_memory_mb = available_gpu_memory_mb - (torch.cuda.memory_allocated(0) / (1024 * 1024))
            
            usable_gpu_memory_mb = free_gpu_memory_mb * 0.7
            
            gpu_caching_viable = usable_gpu_memory_mb > tensor_size_mb
            
            print(f"Available GPU memory: {free_gpu_memory_mb:.2f} MB")
            print(f"Estimated GPU cache size: {tensor_size_mb:.2f} MB")
            
        if gpu_caching_viable:
            print("✓ Sufficient GPU memory available, enabling GPU VRAM caching")
            return True
        else:
            print("✗ Insufficient GPU memory for image caching")
            return False
            
    except Exception as e:
        print(f"Error checking memory: {e}")
        return False

 ## 4. Model Architecture Definitions

In [11]:
class BaselineCNN(nn.Module):
    
    def __init__(self, num_outputs):
        super(BaselineCNN, self).__init__()
        self.name = "Baseline CNN"

        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        
            nn.AdaptiveAvgPool2d((1, 1)),
        )

        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, num_outputs),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.fc_layers(x)
        return x
    
class SimpleBaseline(nn.Module):
    """Simple baseline that predicts mean values."""
    def __init__(self, num_outputs, mean_values):
        super(SimpleBaseline, self).__init__()
        self.name = "Mean Baseline"
        self.register_buffer('mean_values', torch.tensor(mean_values, dtype=torch.float32))
        
    def forward(self, x):
        batch_size = x.size(0)
        return self.mean_values.unsqueeze(0).repeat(batch_size, 1)

 ## 5. Training and Evaluation Framework

In [12]:
class ModelTrainer:
    def __init__(
        self,
        train_loader,
        val_loader,
        test_loader,  # Added test loader
        device,
        learning_rate=LEARNING_RATE,
        num_epochs=NUM_EPOCHS,
        patience=5,
    ):
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.test_loader = test_loader
        self.device = device
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs
        self.models_results = {}
        self.patience = patience
        
    def compute_comprehensive_metrics(self, predictions, ground_truth):
        """Compute comprehensive evaluation metrics with confidence intervals."""
        n_samples = len(predictions)
        metrics = {}
        
        for i, target in enumerate(TARGET_COLUMNS):
            pred_col = predictions[:, i]
            true_col = ground_truth[:, i]
            
            # Basic metrics
            mae = mean_absolute_error(true_col, pred_col)
            rmse = np.sqrt(mean_squared_error(true_col, pred_col))
            r2 = r2_score(true_col, pred_col)
            
            # Mean Absolute Percentage Error (MAPE)
            mape = np.mean(np.abs((true_col - pred_col) / np.maximum(np.abs(true_col), 1e-8))) * 100
            
            # Bootstrap confidence intervals for MAE
            bootstrap_maes = []
            n_bootstrap = 1000
            for _ in range(n_bootstrap):
                indices = np.random.choice(n_samples, n_samples, replace=True)
                bootstrap_mae = mean_absolute_error(true_col[indices], pred_col[indices])
                bootstrap_maes.append(bootstrap_mae)
            
            alpha = 1 - CONFIDENCE_LEVEL
            mae_ci_lower = np.percentile(bootstrap_maes, (alpha/2) * 100)
            mae_ci_upper = np.percentile(bootstrap_maes, (1 - alpha/2) * 100)
            
            metrics[target] = {
                'mae': mae,
                'rmse': rmse,
                'r2': r2,
                'mape': mape,
                'mae_ci_lower': mae_ci_lower,
                'mae_ci_upper': mae_ci_upper
            }
        
        return metrics
    
    def statistical_significance_test(self, model1_preds, model2_preds, ground_truth):
        """Perform statistical significance test between two models."""
        results = {}
        
        for i, target in enumerate(TARGET_COLUMNS):
            true_col = ground_truth[:, i]
            
            # Compute absolute errors for both models
            errors1 = np.abs(model1_preds[:, i] - true_col)
            errors2 = np.abs(model2_preds[:, i] - true_col)
            
            # Paired t-test on absolute errors
            t_stat, p_value = stats.ttest_rel(errors1, errors2)
            
            # Effect size (Cohen's d)
            diff = errors1 - errors2
            pooled_std = np.sqrt((np.var(errors1, ddof=1) + np.var(errors2, ddof=1)) / 2)
            cohens_d = np.mean(diff) / pooled_std if pooled_std > 0 else 0
            
            results[target] = {
                't_statistic': t_stat,
                'p_value': p_value,
                'cohens_d': cohens_d,
                'significant': p_value < 0.05,
                'mean_diff': np.mean(diff)
            }
        
        return results

    def train_and_evaluate(self, model):
        model_name = model.name
        print(f"\n{'='*50}")
        print(f"Training {model_name}")
        print(f"{'='*50}\n")

        model = model.to(self.device)
        
        # Skip training for baseline models
        if 'Baseline' in model_name:
            train_losses, val_losses = [], []
        else:
            criterion = nn.MSELoss()
            optimizer = optim.Adam(model.parameters(), lr=self.learning_rate)
            scheduler = optim.lr_scheduler.ReduceLROnPlateau(
                optimizer, mode="min", factor=0.5, patience=3, verbose=True
            )
            train_losses, val_losses = self._train_model(
                model, criterion, optimizer, scheduler
            )

        # Evaluate on validation set
        val_predictions, val_ground_truth, _ = self._evaluate_model(model, self.val_loader)
        val_metrics = self.compute_comprehensive_metrics(val_predictions, val_ground_truth)
        
        # Evaluate on test set
        test_predictions, test_ground_truth, _ = self._evaluate_model(model, self.test_loader)
        test_metrics = self.compute_comprehensive_metrics(test_predictions, test_ground_truth)

        self.models_results[model_name] = {
            "model": model,
            "train_losses": train_losses,
            "val_losses": val_losses,
            "val_metrics": val_metrics,
            "test_metrics": test_metrics,
            "val_predictions": val_predictions,
            "test_predictions": test_predictions,
            "val_ground_truth": val_ground_truth,
            "test_ground_truth": test_ground_truth
        }

        self._display_comprehensive_results(model_name, val_metrics, test_metrics)
        if 'Baseline' not in model_name:
            self._save_model(model, model_name)

        return self.models_results[model_name]

    def train_multiple_models(self, models):
        print(f"\n{'='*50}")
        print(f"Training {len(models)} model architectures")
        print(f"{'='*50}")

        model_pbar = tqdm(
            models, desc="Training models", leave=True, dynamic_ncols=True
        )
        for model in model_pbar:
            model_pbar.set_description(f"Training {model.name}")
            self.train_and_evaluate(model)

        return self.models_results

    def _train_model(self, model, criterion, optimizer, scheduler):
        best_val_loss = float("inf")
        train_losses = []
        val_losses = []
        no_improve_count = 0

        for epoch in range(self.num_epochs):
            model.train()
            train_loss = 0.0

            train_pbar = tqdm(
                self.train_loader,
                desc=f"Epoch {epoch+1}/{self.num_epochs} [Train]",
                leave=False,
                dynamic_ncols=True,
            )

            for inputs, labels in train_pbar:
                inputs = inputs.to(self.device, non_blocking=PIN_MEMORY)
                labels = labels.to(self.device, non_blocking=PIN_MEMORY)

                optimizer.zero_grad(set_to_none=True)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                batch_loss = loss.item() * inputs.size(0)
                train_loss += batch_loss
                train_pbar.set_postfix({"loss": f"{batch_loss/inputs.size(0):.4f}"})

            train_loss = train_loss / len(self.train_loader.dataset)
            train_losses.append(train_loss)

            model.eval()
            val_loss = 0.0
            val_pbar = tqdm(
                self.val_loader,
                desc=f"Epoch {epoch+1}/{self.num_epochs} [Valid]",
                leave=False,
                dynamic_ncols=True,
            )

            with torch.no_grad():
                for inputs, labels in val_pbar:
                    inputs = inputs.to(self.device, non_blocking=PIN_MEMORY)
                    labels = labels.to(self.device, non_blocking=PIN_MEMORY)

                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    batch_loss = loss.item() * inputs.size(0)
                    val_loss += batch_loss
                    val_pbar.set_postfix({"loss": f"{batch_loss/inputs.size(0):.4f}"})

            val_loss = val_loss / len(self.val_loader.dataset)
            val_losses.append(val_loss)

            scheduler.step(val_loss)

            if epoch > 0:
                train_improve = train_losses[epoch - 1] - train_loss
                val_improve = val_losses[epoch - 1] - val_loss
                train_indicator = (
                    f"↓ {train_improve:.4f}"
                    if train_improve > 0
                    else f"↑ {-train_improve:.4f}"
                )
                val_indicator = (
                    f"↓ {val_improve:.4f}"
                    if val_improve > 0
                    else f"↑ {-val_improve:.4f}"
                )
            else:
                train_indicator = "---"
                val_indicator = "---"

            is_best = val_loss < best_val_loss

            if is_best:
                best_val_loss = val_loss
                self._save_model(model, f"{model.name}_best")
                no_improve_count = 0
                best_indicator = "✓ BEST"
            else:
                no_improve_count += 1
                best_indicator = ""

            early_stop_msg = (
                f"| Early stop: {no_improve_count}/{self.patience}"
                if no_improve_count > 0
                else ""
            )

            print(
                f"Epoch {epoch+1}/{self.num_epochs} | Train Loss: {train_loss:.4f} ({train_indicator}) | Val Loss: {val_loss:.4f} ({val_indicator}) {best_indicator} {early_stop_msg}"
            )

            if no_improve_count >= self.patience:
                print(
                    f"Early stopping triggered after {epoch+1} epochs. No improvement for {self.patience} epochs."
                )
                break

        return train_losses, val_losses

    def _evaluate_model(self, model, data_loader):
        model.eval()
        all_predictions = []
        all_ground_truth = []

        eval_pbar = tqdm(
            data_loader, desc="Evaluating", leave=False, dynamic_ncols=True
        )
        with torch.no_grad():
            for inputs, targets in eval_pbar:
                inputs = inputs.to(self.device, non_blocking=PIN_MEMORY)
                targets = targets.to(self.device, non_blocking=PIN_MEMORY)

                outputs = model(inputs)

                all_predictions.append(outputs.cpu().numpy())
                all_ground_truth.append(targets.cpu().numpy())

        all_predictions = np.vstack(all_predictions)
        all_ground_truth = np.vstack(all_ground_truth)
        mae_values = np.mean(np.abs(all_predictions - all_ground_truth), axis=0)

        return all_predictions, all_ground_truth, mae_values

    def _display_comprehensive_results(self, model_name, val_metrics, test_metrics):
        """Display comprehensive results including confidence intervals."""
        print(f"\n{'='*60}")
        print(f"Results for {model_name}")
        print(f"{'='*60}")
        
        # Create comprehensive results table
        results_data = []
        for target in TARGET_COLUMNS:
            val_m = val_metrics[target]
            test_m = test_metrics[target]
            
            results_data.append({
                'Target': target,
                'Val_MAE': f"{val_m['mae']:.3f}",
                'Val_MAE_CI': f"[{val_m['mae_ci_lower']:.3f}, {val_m['mae_ci_upper']:.3f}]",
                'Test_MAE': f"{test_m['mae']:.3f}",
                'Test_MAE_CI': f"[{test_m['mae_ci_lower']:.3f}, {test_m['mae_ci_upper']:.3f}]",
                'Test_RMSE': f"{test_m['rmse']:.3f}",
                'Test_R2': f"{test_m['r2']:.3f}",
                'Test_MAPE': f"{test_m['mape']:.1f}%"
            })
        
        results_df = pd.DataFrame(results_data)
        print("\nDetailed Metrics:")
        print(results_df.to_string(index=False))
        
        # Summary statistics
        val_mae_overall = np.mean([val_metrics[t]['mae'] for t in TARGET_COLUMNS])
        test_mae_overall = np.mean([test_metrics[t]['mae'] for t in TARGET_COLUMNS])
        test_r2_overall = np.mean([test_metrics[t]['r2'] for t in TARGET_COLUMNS])
        
        print(f"\nSummary:")
        print(f"Validation MAE (overall): {val_mae_overall:.3f}")
        print(f"Test MAE (overall): {test_mae_overall:.3f}")
        print(f"Test R² (overall): {test_r2_overall:.3f}")

    def compare_models_statistically(self):
        """Compare models with statistical significance tests."""
        model_names = list(self.models_results.keys())
        
        if len(model_names) < 2:
            print("Need at least 2 models for comparison.")
            return
        
        print(f"\n{'='*60}")
        print("Statistical Significance Tests (Test Set)")
        print(f"{'='*60}")
        
        for i in range(len(model_names)):
            for j in range(i+1, len(model_names)):
                model1 = model_names[i]
                model2 = model_names[j]
                
                preds1 = self.models_results[model1]['test_predictions']
                preds2 = self.models_results[model2]['test_predictions']
                ground_truth = self.models_results[model1]['test_ground_truth']
                
                sig_results = self.statistical_significance_test(preds1, preds2, ground_truth)
                
                print(f"\n{model1} vs {model2}:")
                for target in TARGET_COLUMNS:
                    result = sig_results[target]
                    significance = "***" if result['p_value'] < 0.001 else "**" if result['p_value'] < 0.01 else "*" if result['p_value'] < 0.05 else "ns"
                    print(f"  {target}: p={result['p_value']:.4f} {significance}, d={result['cohens_d']:.3f}, Δ={result['mean_diff']:.3f}")

    def compare_models(self):
        self._compare_losses()
        self._compare_mae()
        self._rank_models()

    def _compare_losses(self):
        plt.figure(figsize=(15, 6))
        
        plt.subplot(1, 2, 1)
        for name, results in self.models_results.items():
            plt.plot(results["train_losses"], label=name)
        plt.xlabel("Epoch")
        plt.ylabel("Loss")
        plt.title("Training Loss Comparison")
        plt.legend()
        
        plt.subplot(1, 2, 2)
        for name, results in self.models_results.items():
            plt.plot(results["val_losses"], label=name)
        plt.xlabel("Epoch")
        plt.ylabel("Loss")
        plt.title("Validation Loss Comparison")
        plt.legend()
        
        plt.tight_layout()
        plt.savefig("model_comparison_loss.png")
        plt.show()
        plt.close()

    def _compare_mae(self):
        model_names = list(self.models_results.keys())
        model_maes = [results["mae_values"] for results in self.models_results.values()]
        
        comparison_df = pd.DataFrame({"Target": TARGET_COLUMNS})
        
        for i, name in enumerate(model_names):
            comparison_df[name] = model_maes[i]
        
        overall_row = pd.DataFrame({"Target": ["Overall MAE"]})
        for name in model_names:
            overall_row[name] = [self.models_results[name]["overall_mae"]]
        
        comparison_df = pd.concat([comparison_df, overall_row], ignore_index=True)
        
        print("MAE Comparison across models:")
        print(comparison_df)
        
        plt.figure(figsize=(12, 6))
        x = np.arange(len(TARGET_COLUMNS))
        width = 0.8 / len(model_names) if len(model_names) > 0 else 0.4
        
        for i, name in enumerate(model_names):
            offset = (i - len(model_names) / 2 + 0.5) * width
            plt.bar(x + offset, model_maes[i], width, label=name)
        
        plt.xlabel("Target")
        plt.ylabel("Mean Absolute Error")
        plt.title("MAE Comparison Between Models")
        plt.xticks(x, TARGET_COLUMNS)
        plt.legend()
        
        plt.tight_layout()
        plt.savefig("model_comparison_mae.png")
        plt.show()
        plt.close()

    def _rank_models(self):
        ranked_models = sorted(
            self.models_results.items(), key=lambda x: x[1]["overall_mae"]
        )
        
        print("\nModel Ranking (based on overall MAE):")
        for i, (name, results) in enumerate(ranked_models):
            print(f"{i+1}. {name}: {results['overall_mae']:.2f}")
        
        plt.figure(figsize=(10, 6))
        names = [name for name, _ in ranked_models]
        maes = [results["overall_mae"] for _, results in ranked_models]
        
        plt.bar(names, maes)
        plt.xlabel("Model")
        plt.ylabel("Overall MAE")
        plt.title("Model Ranking by Overall MAE")
        plt.xticks(rotation=45, ha="right")
        
        plt.tight_layout()
        plt.savefig("model_ranking.png")
        plt.show()
        plt.close()

 ## 6. Visualization Utilities

In [13]:
def visualize_predictions(model, data_loader, device, num_samples=5):
    model.eval()
    
    dataiter = iter(data_loader)
    images, targets = next(dataiter)
    
    images = images[:num_samples]
    targets = targets[:num_samples]
    
    with torch.no_grad():
        predictions = model(images.to(device)).cpu().numpy()
    
    for i in range(len(images)):
        img_tensor = images[i].cpu() if images[i].device.type != 'cpu' else images[i]
        
        img = img_tensor.permute(1, 2, 0).numpy()
        
        mean = np.array([0.485, 0.456, 0.406])
        std = np.array([0.229, 0.224, 0.225])
        img = std * img + mean
        img = np.clip(img, 0, 1)
        
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
        
        ax1.imshow(img)
        ax1.set_title("Food Image")
        ax1.axis("off")
        
        target = targets[i].cpu().numpy() if targets[i].device.type != 'cpu' else targets[i].numpy()
        pred = predictions[i]
        
        abs_errors = np.abs(pred - target).round(1)
        error_pcts = np.zeros_like(target)
        
        for j in range(len(target)):
            if abs(target[j]) > 1e-7:
                error_pcts[j] = (abs_errors[j] / abs(target[j]) * 100).round(1)
            else:
                if abs(pred[j]) < 1e-5:
                    error_pcts[j] = 0
                else:
                    error_pcts[j] = 100
        
        comparison = pd.DataFrame(
            {
                "Target": TARGET_COLUMNS,
                "Ground Truth": target.round(1),
                "Prediction": pred.round(1),
                "Absolute Error": abs_errors,
                "Error %": error_pcts,
            }
        )
        
        ax2.axis("tight")
        ax2.axis("off")
        table = ax2.table(
            cellText=comparison.values,
            colLabels=comparison.columns,
            rowLabels=None,
            cellLoc="center",
            loc="center",
        )
        table.auto_set_font_size(False)
        table.set_fontsize(9)
        table.scale(1.2, 1.5)
        
        plt.tight_layout()
        plt.show()
def plot_model_comparison_with_ci(results):
    """Plot model comparison with confidence intervals."""
    fig, axes = plt.subplots(2, 3, figsize=(18, 12))
    axes = axes.flatten()
    
    model_names = list(results.keys())
    colors = plt.cm.Set1(np.linspace(0, 1, len(model_names)))
    
    for i, target in enumerate(TARGET_COLUMNS):
        ax = axes[i]
        
        x_pos = np.arange(len(model_names))
        maes = []
        ci_lowers = []
        ci_uppers = []
        
        for model_name in model_names:
            metrics = results[model_name]['test_metrics'][target]
            maes.append(metrics['mae'])
            ci_lowers.append(metrics['mae_ci_lower'])
            ci_uppers.append(metrics['mae_ci_upper'])
        
        # Error bars
        yerr_lower = np.array(maes) - np.array(ci_lowers)
        yerr_upper = np.array(ci_uppers) - np.array(maes)
        
        bars = ax.bar(x_pos, maes, yerr=[yerr_lower, yerr_upper], 
                     capsize=5, color=colors, alpha=0.7)
        
        ax.set_title(f'{target.title()} - MAE with 95% CI')
        ax.set_xlabel('Model')
        ax.set_ylabel('Mean Absolute Error')
        ax.set_xticks(x_pos)
        ax.set_xticklabels(model_names, rotation=45, ha='right')
        
        # Add value labels on bars
        for j, (bar, mae) in enumerate(zip(bars, maes)):
            ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + yerr_upper[j] + 0.01,
                   f'{mae:.3f}', ha='center', va='bottom', fontsize=8)
    
    # Remove empty subplot
    if len(TARGET_COLUMNS) < len(axes):
        axes[-1].remove()
    
    plt.tight_layout()
    plt.savefig('model_comparison_with_ci.png', dpi=300, bbox_inches='tight')
    plt.show()

 ## 7. Main Training Pipeline

In [14]:
def main():
    # Set random seeds for reproducibility
    torch.manual_seed(RANDOM_STATE)
    np.random.seed(RANDOM_STATE)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(RANDOM_STATE)
    
    # Data loading and preparation
    dish_df_cafe1, ingredient_df_cafe1 = parse_nutrition_csv(METADATA_FILE_CAFE1)
    dish_df_cafe2, ingredient_df_cafe2 = parse_nutrition_csv(METADATA_FILE_CAFE2)

    dish_metadata_df = pd.concat([dish_df_cafe1, dish_df_cafe2], ignore_index=True)
    ingredient_metadata_df = pd.concat(
        [ingredient_df_cafe1, ingredient_df_cafe2], ignore_index=True
    )

    available_dish_ids_in_imagery = []
    for dish_id_folder in os.listdir(IMAGERY_DIR_LOCAL_FULL):
        rgb_path = os.path.join(IMAGERY_DIR_LOCAL_FULL, dish_id_folder, RGB_IMAGE_FILENAME)
        if os.path.isdir(
            os.path.join(IMAGERY_DIR_LOCAL_FULL, dish_id_folder)
        ) and os.path.exists(rgb_path):
            available_dish_ids_in_imagery.append(dish_id_folder)

    filtered_metadata_df = dish_metadata_df[
        dish_metadata_df["dish_id"].isin(available_dish_ids_in_imagery)
    ].copy()

    for col in TARGET_COLUMNS:
        filtered_metadata_df[col] = pd.to_numeric(
            filtered_metadata_df[col], errors="coerce"
        )
    filtered_metadata_df.dropna(subset=TARGET_COLUMNS, inplace=True)

    # Improved data splitting with stratification
    print(f"Total samples: {len(filtered_metadata_df)}")
    train_df, val_df, test_df = stratified_split_by_bins(
        filtered_metadata_df, TARGET_COLUMNS, 
        test_size=0.2, val_size=0.2, random_state=RANDOM_STATE
    )
    
    print(f"Train samples: {len(train_df)} ({len(train_df)/len(filtered_metadata_df)*100:.1f}%)")
    print(f"Validation samples: {len(val_df)} ({len(val_df)/len(filtered_metadata_df)*100:.1f}%)")
    print(f"Test samples: {len(test_df)} ({len(test_df)/len(filtered_metadata_df)*100:.1f}%)")
    
    # Extract data for datasets
    train_dish_ids = train_df["dish_id"].tolist()
    val_dish_ids = val_df["dish_id"].tolist()
    test_dish_ids = test_df["dish_id"].tolist()
    
    train_labels = train_df[TARGET_COLUMNS].values.astype(np.float32)
    val_labels = val_df[TARGET_COLUMNS].values.astype(np.float32)
    test_labels = test_df[TARGET_COLUMNS].values.astype(np.float32)
    
    # Compute normalization statistics from training set only
    train_mean = np.mean(train_labels, axis=0)
    train_std = np.std(train_labels, axis=0)
    
    print(f"\nTarget statistics (training set):")
    stats_df = pd.DataFrame({
        'Target': TARGET_COLUMNS,
        'Mean': train_mean,
        'Std': train_std,
        'Min': np.min(train_labels, axis=0),
        'Max': np.max(train_labels, axis=0)
    })
    print(stats_df)

    data_transforms = {
        "train": transforms.Compose(
            [
                transforms.Resize((224, 224)),
                transforms.RandomHorizontalFlip(p=0.5),
                transforms.RandomRotation(15),
                transforms.ColorJitter(brightness=0.1, contrast=0.1),
                transforms.ToTensor(),
                transforms.Normalize(
                    [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
                ),
            ]
        ),
        "val": transforms.Compose(
            [
                transforms.Resize((224, 224)),
                transforms.ToTensor(),
                transforms.Normalize(
                    [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
                ),
            ]
        ),
    }

    try:
        can_gpu_cache = check_memory_for_caching(len(train_dish_ids + val_dish_ids + test_dish_ids))
    except:
        can_gpu_cache = False
        print("Error checking memory. Disabling GPU image caching.")

    # Create datasets
    train_dataset = NutritionDataset(
        dish_ids=train_dish_ids,
        labels=train_labels,
        imagery_dir=IMAGERY_DIR_LOCAL_FULL,
        rgb_filename=RGB_IMAGE_FILENAME,
        transform=data_transforms["train"],
        gpu_cache=can_gpu_cache,
    )
    
    val_dataset = NutritionDataset(
        dish_ids=val_dish_ids,
        labels=val_labels,
        imagery_dir=IMAGERY_DIR_LOCAL_FULL,
        rgb_filename=RGB_IMAGE_FILENAME,
        transform=data_transforms["val"],
        gpu_cache=can_gpu_cache,
    )
    
    test_dataset = NutritionDataset(
        dish_ids=test_dish_ids,
        labels=test_labels,
        imagery_dir=IMAGERY_DIR_LOCAL_FULL,
        rgb_filename=RGB_IMAGE_FILENAME,
        transform=data_transforms["val"],
        gpu_cache=can_gpu_cache,
    )

    if can_gpu_cache:
        loader_workers = 0
        print("Using 0 workers for DataLoader due to GPU caching")
    else:
        loader_workers = NUM_WORKERS
        print(f"Using {loader_workers} workers for DataLoader")

    # Create data loaders
    train_loader = DataLoader(
        train_dataset, 
        batch_size=BATCH_SIZE, 
        shuffle=True, 
        num_workers=loader_workers,
        pin_memory=PIN_MEMORY and not can_gpu_cache,
        persistent_workers=loader_workers > 0,
        prefetch_factor=2 if loader_workers > 0 else None,
    )

    val_loader = DataLoader(
        val_dataset, 
        batch_size=BATCH_SIZE, 
        shuffle=False, 
        num_workers=loader_workers,
        pin_memory=PIN_MEMORY and not can_gpu_cache,
        persistent_workers=loader_workers > 0,
        prefetch_factor=2 if loader_workers > 0 else None,
    )
    
    test_loader = DataLoader(
        test_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=loader_workers,
        pin_memory=PIN_MEMORY and not can_gpu_cache,
        persistent_workers=loader_workers > 0,
        prefetch_factor=2 if loader_workers > 0 else None,
    )

    # Define models including baselines
    MODELS_TO_TEST = [
        SimpleBaseline(num_outputs=len(TARGET_COLUMNS), mean_values=train_mean),
        BaselineCNN(num_outputs=len(TARGET_COLUMNS)),
    ]

    trainer = ModelTrainer(
        train_loader=train_loader,
        val_loader=val_loader,
        test_loader=test_loader,
        device=DEVICE,
        learning_rate=LEARNING_RATE,
        num_epochs=NUM_EPOCHS,
        patience=7,
    )

    results = trainer.train_multiple_models(MODELS_TO_TEST)

    print("\nComparing model performance:")
    trainer.compare_models()
    
    print("\nPerforming statistical significance tests:")
    trainer.compare_models_statistically()
    
    # Enhanced visualizations
    plot_model_comparison_with_ci(results)

    # Select best model based on validation performance
    best_model_name = min(results.items(), 
                         key=lambda x: np.mean([x[1]['val_metrics'][t]['mae'] for t in TARGET_COLUMNS]))[0]
    best_model = results[best_model_name]["model"]

    print(f"\nBest model (validation): {best_model_name}")
    print(f"Final test performance:")
    test_metrics = results[best_model_name]['test_metrics']
    for target in TARGET_COLUMNS:
        m = test_metrics[target]
        print(f"  {target}: MAE={m['mae']:.3f} [CI: {m['mae_ci_lower']:.3f}-{m['mae_ci_upper']:.3f}], R²={m['r2']:.3f}")
    
    # Visualize predictions on test set
    print(f"\nVisualizing predictions for {best_model_name} on test set:")
    visualize_predictions(best_model, test_loader, DEVICE, num_samples=3)

if __name__ == "__main__":
    main()

Total samples: 3490


NameError: name 'stratified_split_by_bins' is not defined