In [1]:
import os
import time
import io
import copy
import numpy as np
import pandas as pd
import gcsfs
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from torch.optim.lr_scheduler import ReduceLROnPlateau

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_absolute_error

In [2]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
INPUT_SIZE = 224
NORM_MEAN = [0.485, 0.456, 0.406]
NORM_STD = [0.229, 0.224, 0.225]


# Define your GCS variables
project_id = "josiahs-project-475720"
bucket = "calorie-prediction"
os.environ["GOOGLE_CLOUD_PROJECT"] = project_id

SA_KEY_FILE = "/kaggle/input/my-kaggle-gcs-key/josiahs-project-475720-0a3d6753bc40.json"

if os.path.exists(SA_KEY_FILE):
    os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = SA_KEY_FILE
    print(f"GOOGLE_APPLICATION_CREDENTIALS set to {SA_KEY_FILE}.")
else:
    print(f"⚠️ Service account key '{SA_KEY_FILE}' not found. Please upload it to your Kaggle environment.")

GOOGLE_APPLICATION_CREDENTIALS set to /kaggle/input/my-kaggle-gcs-key/josiahs-project-475720-0a3d6753bc40.json.


In [3]:
metadata_path = "/kaggle/input/metadata/final_metadata_with_clusters.csv"
clean_df = pd.read_csv(metadata_path)
clean_df['image_file'] = 'gs://' + bucket + '/' + clean_df['image_file']

print("Metadata loaded successfully.")

# Data Split
train_df, temp_df = train_test_split(
    clean_df,
    test_size=0.2,
    stratify=clean_df["meal_type_class"],
    random_state=42,
    shuffle=True
)
val_df, test_df = train_test_split(
    temp_df,
    test_size=0.5,
    random_state=42,
    shuffle=True
)
print(f"\nTraining Samples: {len(train_df)}")
print(f"Validation Samples: {len(val_df)}")
print(f"Testing Samples: {len(test_df)}")

Metadata loaded successfully.

Training Samples: 2463
Validation Samples: 308
Testing Samples: 308


In [4]:
clean_df.head()

Unnamed: 0,dish_id,total_weight_g,total_calories,g_fat,g_carb,g_protein,image_file,meal_type_class,meal_type_name
0,dish_1561662216,300.794281,193.0,12.387489,28.21829,18.63397,gs://calorie-prediction/nutrition5k_data/image...,4,Salad
1,dish_1561662054,419.438782,292.0,23.838249,26.351543,25.910593,gs://calorie-prediction/nutrition5k_data/image...,4,Salad
2,dish_1562008979,382.936646,290.0,22.224644,10.17357,35.345387,gs://calorie-prediction/nutrition5k_data/image...,4,Salad
3,dish_1560455030,20.59,103.0,0.148,4.625,0.956,gs://calorie-prediction/nutrition5k_data/image...,2,Dessert
4,dish_1558372433,74.360001,143.0,0.286,0.429,20.02,gs://calorie-prediction/nutrition5k_data/image...,5,Sandwich / Burger


In [5]:

print(train_df['meal_type_class'].value_counts())

import matplotlib.pyplot as plt
for cls in sorted(train_df['meal_type_class'].unique())[:10]:
    row = train_df[train_df['meal_type_class']==cls].sample(1).iloc[0]
    print(cls, row['meal_type_name'], row['image_file'])


meal_type_class
4    528
8    310
3    275
7    272
6    243
0    237
2    222
1    148
5    122
9    106
Name: count, dtype: int64
0 Rice Bowl gs://calorie-prediction/nutrition5k_data/imagery/realsense_overhead/dish_1565107066/rgb.png
1 Pasta gs://calorie-prediction/nutrition5k_data/imagery/realsense_overhead/dish_1574193262/rgb.png
2 Dessert gs://calorie-prediction/nutrition5k_data/imagery/realsense_overhead/dish_1559838402/rgb.png
3 Soup gs://calorie-prediction/nutrition5k_data/imagery/realsense_overhead/dish_1566588016/rgb.png
4 Salad gs://calorie-prediction/nutrition5k_data/imagery/realsense_overhead/dish_1565637548/rgb.png
5 Sandwich / Burger gs://calorie-prediction/nutrition5k_data/imagery/realsense_overhead/dish_1558372650/rgb.png
6 Vegetable Plate gs://calorie-prediction/nutrition5k_data/imagery/realsense_overhead/dish_1559062005/rgb.png
7 Meat / Protein Plate gs://calorie-prediction/nutrition5k_data/imagery/realsense_overhead/dish_1565711154/rgb.png
8 Breakfast Items gs://cal

In [6]:
class NutritionDataset(Dataset):
    def __init__(self, df, bucket, gcs_token=None, transform=None,
                 cal_mean=0.0, cal_std=1.0, macro_means=None, macro_stds=None):

        self.df = df.reset_index(drop=True).copy()
        self.bucket = bucket.replace("gs://", "").strip("/")
        self.gcs_token = gcs_token
        self.transform = transform
        self.fs = None

        # store scaling
        self.cal_mean = cal_mean
        self.cal_std = cal_std
        self.macro_means = np.array(macro_means, dtype=np.float32)
        self.macro_stds = np.array(macro_stds, dtype=np.float32)

    def _init_fs(self):
        if self.fs is None:
            if self.gcs_token:
                self.fs = gcsfs.GCSFileSystem(token=self.gcs_token)
            else:
                self.fs = gcsfs.GCSFileSystem(token="anon")

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        self._init_fs()

        row = self.df.iloc[idx]
        img_path = row["image_file"]

        clean = img_path.replace("gs://calorie-prediction/", "").lstrip("/")
        gcs_full_path = f"gs://calorie-prediction/{clean}"

        # Load image
        try:
            with self.fs.open(gcs_full_path, "rb") as f:
                image = Image.open(f).convert("RGB")
        except Exception as e:
            print(f"[WARN] Could not load {gcs_full_path}: {e}")
            raise

        if self.transform:
            image = self.transform(image)

        # ---- SCALE TARGETS HERE ----
        cal_scaled = (float(row["total_calories"]) - self.cal_mean) / self.cal_std

        mac_scaled = np.array([
            (float(row["g_protein"]) - self.macro_means[0]) / self.macro_stds[0],
            (float(row["g_carb"])    - self.macro_means[1]) / self.macro_stds[1],
            (float(row["g_fat"])     - self.macro_means[2]) / self.macro_stds[2],
        ], dtype=np.float32)

        targets = {
            "class": int(row["meal_type_class"]),
            "calories": torch.tensor(cal_scaled, dtype=torch.float32),
            "macros": torch.tensor(mac_scaled, dtype=torch.float32)
        }

        return image, targets

In [7]:
NUM_CLASSES = 10  
BATCH_SIZE = 32
FREEZE_EPOCHS = 4
FINETUNE_EPOCHS = 12
LR_FREEZE = 3e-4
LR_FINETUNE = 1e-5
IMG_SIZE = 384

In [8]:
from google.cloud import storage

In [9]:
train_tf = transforms.Compose([
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.7,1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.08, contrast=0.08, saturation=0.08, hue=0.02),
    transforms.GaussianBlur(kernel_size=(3,3), sigma=(0.1,2.0)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])
val_tf = transforms.Compose([
    transforms.Resize(int(IMG_SIZE*1.14)),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

In [10]:
GCS_TOKEN_PATH = "/kaggle/input/my-kaggle-gcs-key/josiahs-project-475720-0a3d6753bc40.json"

In [11]:
cal_mean = train_df['total_calories'].mean()
cal_std  = train_df['total_calories'].std()
macro_means = train_df[['g_fat','g_carb','g_protein']].mean().values
macro_stds  = train_df[['g_fat','g_carb','g_protein']].std().values

print("cal mean/std:", cal_mean, cal_std)
print("macro means:", macro_means, "macro stds:", macro_stds)

cal mean/std: 192.18026796589524 141.41432514790083
macro means: [10.63356507 16.83611963 14.23756943] macro stds: [12.95014584 15.74015911 17.50638443]


In [12]:
import os, io, time
import numpy as np
import pandas as pd
from PIL import Image
from collections import Counter

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torchvision import transforms, models
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, accuracy_score
from sklearn.utils.class_weight import compute_class_weight

In [28]:
USE_GCS = True
train_ds = NutritionDataset(train_df,
                           bucket=(bucket if USE_GCS else None),
                           gcs_token=(GCS_TOKEN_PATH if USE_GCS else None),
                           transform=train_tf,
                           cal_mean=cal_mean, cal_std=cal_std,
                           macro_means=macro_means, macro_stds=macro_stds)

val_ds = NutritionDataset(val_df,
                           bucket=(bucket if USE_GCS else None),
                           gcs_token=(GCS_TOKEN_PATH if USE_GCS else None),
                           transform=val_tf,
                           cal_mean=cal_mean, cal_std=cal_std,
                           macro_means=macro_means, macro_stds=macro_stds)

test_ds = NutritionDataset(test_df, 
                           bucket=(bucket if USE_GCS else None),
                           gcs_token=(GCS_TOKEN_PATH if USE_GCS else None),
                           transform=val_tf,
                           cal_mean=cal_mean, cal_std=cal_std,
                           macro_means=macro_means, macro_stds=macro_stds)

class_counts = train_df['meal_type_class'].value_counts().sort_index()

sample_weights = train_df['meal_type_class'].map(lambda c: 1.0/float(class_counts[c])).values
sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS, pin_memory=True)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)

print("Dataloaders ready (BATCH_SIZE=%d)"%BATCH_SIZE)

Dataloaders ready (BATCH_SIZE=24)


In [29]:
import torchvision
model = torchvision.models.resnet50(weights="IMAGENET1K_V2")
model.fc = nn.Linear(model.fc.in_features, NUM_CLASSES)
model = model.to(DEVICE)

In [30]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
user_credential = user_secrets.get_gcloud_credential()
user_secrets.set_tensorflow_credential(user_credential)

In [31]:
from google.cloud import storage
from sklearn.utils.class_weight import compute_class_weight


In [32]:
weights = compute_class_weight(
    class_weight="balanced",
    classes=np.arange(NUM_CLASSES),
    y=train_df["meal_type_class"]
)

weights = torch.tensor(weights).float().to(DEVICE)
criterion = nn.CrossEntropyLoss(weight=weights)

In [33]:
NUM_CLASSES = 10
BATCH_SIZE = 24           
IMG_SIZE = 448      
FREEZE_EPOCHS = 5
FINETUNE_EPOCHS = 18
LR_HEAD = 3e-4
LR_FINETUNE = 1e-5
WEIGHT_DECAY = 1e-5
NUM_WORKERS = 0

In [34]:
print("Sizes -> train:", len(train_df), "val:", len(val_df), "test:", len(test_df))

Sizes -> train: 2463 val: 308 test: 308


In [35]:
GCS_TOKEN_PATH = "/kaggle/input/my-kaggle-gcs-key/josiahs-project-475720-0a3d6753bc40.json" 

In [36]:
class ThreeHeadResNet(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        base = models.resnet50(pretrained=True)
        self.features = nn.Sequential(*list(base.children())[:-1])
        feature_size = base.fc.in_features

        self.cls_head = nn.Sequential(
            nn.Linear(feature_size, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, num_classes)
        )
        self.cal_head = nn.Sequential(
            nn.Linear(feature_size, 256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, 1)
        )
        self.mac_head = nn.Sequential(
            nn.Linear(feature_size, 256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, 3)
        )

    def forward(self, x):
        f = self.features(x)            
        f = f.view(f.size(0), -1)       
        c = self.cls_head(f)
        cal = self.cal_head(f).squeeze(1)  
        mac = self.mac_head(f)            
        return c, cal, mac

model = ThreeHeadResNet(num_classes=NUM_CLASSES).to(DEVICE)



In [37]:
cls_weights = compute_class_weight(class_weight='balanced', classes=np.arange(NUM_CLASSES), y=train_df['meal_type_class'])
cls_weights = torch.tensor(cls_weights, dtype=torch.float32).to(DEVICE)
cls_loss_fn = nn.CrossEntropyLoss(weight=cls_weights)

reg_loss_fn = nn.SmoothL1Loss()

CLS_W = 1.0
CAL_W = 3.0  
MAC_W = 2.0  


In [38]:

def unscale_calories(preds_scaled, mean, std):
    return preds_scaled * std + mean
def unscale_macros(preds_scaled, means, stds):
    return preds_scaled * stds + means
def evaluate_model(model, loader, cal_mean, cal_std, macro_means, macro_stds):
    model.eval()
    all_true_cls, all_pred_cls = [], []
    all_true_cal, all_pred_cal = [], []
    all_true_mac, all_pred_mac = [], []
    with torch.no_grad():
        for imgs, targets in loader:
            cls_labels = targets["class"].to(DEVICE)
            cal_scaled = targets["calories"].to(DEVICE)
            mac_scaled = targets["macros"].to(DEVICE)
            imgs = imgs.to(DEVICE)

            c_out, cal_out, mac_out = model(imgs)
            preds_cls = torch.argmax(c_out, dim=1)

            all_true_cls.append(cls_labels.cpu().numpy())
            all_pred_cls.append(preds_cls.cpu().numpy())

            all_true_cal.append(unscale_calories(cal_scaled.cpu().numpy(), cal_mean, cal_std))
            all_pred_cal.append(unscale_calories(cal_out.cpu().numpy(), cal_mean, cal_std))

            all_true_mac.append(unscale_macros(mac_scaled.cpu().numpy(), macro_means, macro_stds))
            all_pred_mac.append(unscale_macros(mac_out.cpu().numpy(), macro_means, macro_stds))
    all_true_cls = np.concatenate(all_true_cls)
    all_pred_cls = np.concatenate(all_pred_cls)
    all_true_cal = np.concatenate(all_true_cal)
    all_pred_cal = np.concatenate(all_pred_cal)
    all_true_mac = np.concatenate(all_true_mac)
    all_pred_mac = np.concatenate(all_pred_mac)
    acc = accuracy_score(all_true_cls, all_pred_cls)
    cal_mae = mean_absolute_error(all_true_cal, all_pred_cal)
    mac_mae = mean_absolute_error(all_true_mac.reshape(-1,3), all_pred_mac.reshape(-1,3))
    return acc, cal_mae, mac_mae, (all_true_cls, all_pred_cls, all_true_cal, all_pred_cal, all_true_mac, all_pred_mac)


In [39]:
SAVE_DIR = "/kaggle/working"
os.makedirs(SAVE_DIR, exist_ok=True)


In [40]:
cls_criterion = nn.CrossEntropyLoss()
cal_criterion = nn.MSELoss() 
mac_criterion = nn.MSELoss()

In [41]:
CLS_W = 1.0
CAL_W = 1.0
MAC_W = 1.0
CLIP_NORM = 1.0
PATIENCE = 6
SAVE_DIR = "/kaggle/working/checkpoints" 
os.makedirs(SAVE_DIR, exist_ok=True)

cls_criterion = nn.CrossEntropyLoss()
reg_criterion = nn.MSELoss()

model = model.to(DEVICE)

scaler = torch.amp.GradScaler('cuda')

# ---- Phase 1: freeze backbone, train heads only ----
for p in model.features.parameters():
    p.requires_grad = False

head_params = list(model.cls_head.parameters()) + \
              list(model.cal_head.parameters()) + \
              list(model.mac_head.parameters())

opt = optim.AdamW(head_params, lr=LR_HEAD, weight_decay=WEIGHT_DECAY)

best_val_loss = float("inf")
no_improve = 0

print("=== Phase 1: training heads only ===")
for epoch in range(1, FREEZE_EPOCHS + 1):
    model.train()
    running_loss = 0.0
    n_samples = 0
    start = time.time()

    for imgs, targets in train_loader:
        imgs = imgs.to(DEVICE, non_blocking=True)
        cls_labels = targets["class"].long().to(DEVICE)
        cal_targets = targets["calories"].float().to(DEVICE)
        mac_targets = targets["macros"].float().to(DEVICE)

        opt.zero_grad()
        with torch.amp.autocast(device_type='cuda' if torch.cuda.is_available() else 'cpu'):
            cls_pred, cal_pred, mac_pred = model(imgs)
            loss_cls = cls_criterion(cls_pred, cls_labels)
            loss_cal = reg_criterion(cal_pred.squeeze(), cal_targets)
            loss_mac = reg_criterion(mac_pred, mac_targets)
            loss = CLS_W * loss_cls + CAL_W * loss_cal + MAC_W * loss_mac

        scaler.scale(loss).backward()
        # unscale gradients before clipping
        scaler.unscale_(opt)
        torch.nn.utils.clip_grad_norm_(model.parameters(), CLIP_NORM)
        scaler.step(opt)
        scaler.update()

        running_loss += loss.item() * imgs.size(0)
        n_samples += imgs.size(0)

    train_loss = running_loss / max(1, n_samples)

    # Evaluate (unscales internally)
    val_acc, val_cal_mae, val_mac_mae, _ = evaluate_model(
        model,
        val_loader,
        cal_mean=train_ds.cal_mean,
        cal_std=train_ds.cal_std,
        macro_means=train_ds.macro_means,
        macro_stds=train_ds.macro_stds,
    )

    val_loss = CLS_W * 0.0 + CAL_W * val_cal_mae + MAC_W * val_mac_mae

    print(f"[Phase1] E{epoch}/{FREEZE_EPOCHS} train_loss={train_loss:.4f} val_acc={val_acc:.4f} "
          f"val_cal_MAE={val_cal_mae:.2f} val_mac_MAE={val_mac_mae:.2f} time={time.time()-start:.1f}s")

    # Save best heads
    if val_loss < best_val_loss - 1e-6:
        best_val_loss = val_loss
        torch.save({
            "epoch": epoch,
            "model_state": model.state_dict(),
            "opt_state": opt.state_dict(),
            "val_loss": val_loss
        }, os.path.join(SAVE_DIR, "best_heads.pth"))
        no_improve = 0
        print("  Saved best_heads.pth")
    else:
        no_improve += 1
        print(f"  No improvement ({no_improve}/{PATIENCE})")

    if no_improve >= PATIENCE:
        print("Early stopping in Phase 1.")
        break

=== Phase 1: training heads only ===
[Phase1] E1/5 train_loss=3.2090 val_acc=0.6039 val_cal_MAE=76.79 val_mac_MAE=9.45 time=840.0s
  Saved best_heads.pth
[Phase1] E2/5 train_loss=2.0695 val_acc=0.7045 val_cal_MAE=74.66 val_mac_MAE=9.40 time=751.1s
  Saved best_heads.pth
[Phase1] E3/5 train_loss=1.8726 val_acc=0.8084 val_cal_MAE=109.85 val_mac_MAE=9.21 time=756.3s
  No improvement (1/6)
[Phase1] E4/5 train_loss=1.6830 val_acc=0.7143 val_cal_MAE=77.90 val_mac_MAE=9.04 time=707.1s
  No improvement (2/6)
[Phase1] E5/5 train_loss=1.5954 val_acc=0.7727 val_cal_MAE=118.07 val_mac_MAE=9.11 time=756.1s
  No improvement (3/6)


In [None]:
# ---- Phase 2: unfreeze entire model and finetune ----
for p in model.features.parameters():
    p.requires_grad = True

opt = optim.AdamW(model.parameters(), lr=LR_FINETUNE, weight_decay=WEIGHT_DECAY)
# Use ReduceLROnPlateau that reacts to validation composite loss
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(opt, mode='min', factor=0.5, patience=2, verbose=True)

best_val_loss = float("inf")
no_improve = 0

print("=== Phase 2: finetune full model ===")
for epoch in range(1, FINETUNE_EPOCHS + 1):
    model.train()
    running_loss = 0.0
    n_samples = 0
    start = time.time()

    for imgs, targets in train_loader:
        imgs = imgs.to(DEVICE, non_blocking=True)
        cls_labels = targets["class"].long().to(DEVICE)
        cal_targets = targets["calories"].float().to(DEVICE)
        mac_targets = targets["macros"].float().to(DEVICE)

        opt.zero_grad()
        with torch.amp.autocast(device_type='cuda' if torch.cuda.is_available() else 'cpu'):
            cls_pred, cal_pred, mac_pred = model(imgs)
            loss_cls = cls_criterion(cls_pred, cls_labels)
            loss_cal = reg_criterion(cal_pred.squeeze(), cal_targets)
            loss_mac = reg_criterion(mac_pred, mac_targets)
            loss = CLS_W * loss_cls + CAL_W * loss_cal + MAC_W * loss_mac

        scaler.scale(loss).backward()
        scaler.unscale_(opt)
        torch.nn.utils.clip_grad_norm_(model.parameters(), CLIP_NORM)
        scaler.step(opt)
        scaler.update()

        running_loss += loss.item() * imgs.size(0)
        n_samples += imgs.size(0)

    train_loss = running_loss / max(1, n_samples)

    # Evaluate on validation set (unscaled inside evaluate_model)
    val_acc, val_cal_mae, val_mac_mae, _ = evaluate_model(
        model,
        val_loader,
        cal_mean=train_ds.cal_mean,
        cal_std=train_ds.cal_std,
        macro_means=train_ds.macro_means,
        macro_stds=train_ds.macro_stds,
    )

    val_loss = CLS_W * 0.0 + CAL_W * val_cal_mae + MAC_W * val_mac_mae

    scheduler.step(val_loss)

    print(f"[Finetune] E{epoch}/{FINETUNE_EPOCHS} train_loss={train_loss:.4f} val_acc={val_acc:.4f} "
          f"val_cal_MAE={val_cal_mae:.2f} val_mac_MAE={val_mac_mae:.2f} time={time.time()-start:.1f}s")

    if val_loss < best_val_loss - 1e-6:
        best_val_loss = val_loss
        torch.save({
            "epoch": epoch,
            "model_state": model.state_dict(),
            "opt_state": opt.state_dict(),
            "val_loss": val_loss
        }, os.path.join(SAVE_DIR, "best_full.pth"))
        no_improve = 0
        print("  Saved best_full.pth")
    else:
        no_improve += 1
        print(f"  No improvement ({no_improve}/{PATIENCE})")

    torch.save({
        "epoch": epoch,
        "model_state": model.state_dict(),
        "opt_state": opt.state_dict(),
        "val_loss": val_loss
    }, os.path.join(SAVE_DIR, "last_checkpoint.pth"))

    if no_improve >= PATIENCE:
        print("Early stopping triggered.")
        break

best_ckpt = os.path.join(SAVE_DIR, "best_full.pth")
if os.path.exists(best_ckpt):
    ck = torch.load(best_ckpt, map_location=DEVICE)
    model.load_state_dict(ck["model_state"])
    print("Loaded best_full.pth - training complete.")
else:
    print("No best_full.pth found; training finished with last checkpoint.")



=== Phase 2: finetune full model ===
[Finetune] E1/18 train_loss=1.2496 val_acc=0.8506 val_cal_MAE=59.04 val_mac_MAE=7.53 time=721.4s
  Saved best_full.pth
[Finetune] E2/18 train_loss=1.0757 val_acc=0.8377 val_cal_MAE=60.37 val_mac_MAE=7.19 time=709.0s
  No improvement (1/6)
[Finetune] E3/18 train_loss=0.8954 val_acc=0.8539 val_cal_MAE=54.46 val_mac_MAE=6.85 time=709.8s
  Saved best_full.pth
[Finetune] E4/18 train_loss=0.8343 val_acc=0.8734 val_cal_MAE=54.59 val_mac_MAE=6.85 time=740.1s
  No improvement (1/6)
[Finetune] E5/18 train_loss=0.7574 val_acc=0.8961 val_cal_MAE=50.35 val_mac_MAE=6.71 time=800.6s
  Saved best_full.pth


In [38]:
# DIAGNOSTIC 1: show batch-level stats (scaled targets coming from dataset)
imgs, targets = next(iter(train_loader))
print("SCALED targets (calories) — mean/std/min/max:", 
      targets["calories"].mean().item(), targets["calories"].std().item(),
      targets["calories"].min().item(), targets["calories"].max().item())
print("SCALED targets (macros) shape:", targets["macros"].shape,
      "mean/std:", targets["macros"].mean().item(), targets["macros"].std().item())

# DIAGNOSTIC 2: model outputs on same batch (scaled outputs)
model.eval()
with torch.no_grad():
    imgs = imgs.to(DEVICE)
    c_out, cal_out, mac_out = model(imgs)
    print("MODEL cal_out (scaled) — mean/std/min/max:", 
          cal_out.mean().item(), cal_out.std().item(),
          cal_out.min().item(), cal_out.max().item())
    print("MODEL mac_out (scaled) — shape:", mac_out.shape,
          "mean/std:", mac_out.mean().item(), mac_out.std().item())

# Also show a few unscaled comparisons
cal_mean, cal_std = train_ds.cal_mean, train_ds.cal_std
macro_means, macro_stds = train_ds.macro_means, train_ds.macro_stds
cal_true_unscaled = (targets["calories"].cpu().numpy() * cal_std) + cal_mean
cal_pred_unscaled = (cal_out.cpu().numpy() * cal_std) + cal_mean
print("Example unscaled true cal (first 5):", cal_true_unscaled[:5])
print("Example unscaled pred cal (first 5):", cal_pred_unscaled[:5])


SCALED targets (calories) — mean/std/min/max: 180.29166666666666 168.2328818520086 17.0 706.0
SCALED targets (macros) shape: torch.Size([24, 3]) mean/std: 12.130121231079102 16.383054733276367
MODEL cal_out (scaled) — mean/std/min/max: 183.97021484375 167.00860595703125 28.373851776123047 638.457275390625
MODEL mac_out (scaled) — shape: torch.Size([24, 3]) mean/std: 10.695490837097168 12.281275749206543
Example unscaled true cal (first 5): [21545.7433653   2596.22379548  9808.35437802 21404.32904015
 17020.48496057]
Example unscaled pred cal (first 5): [25963.59    4204.6494  7588.83   20158.121  17797.52  ]


In [79]:
best_model_path = "/kaggle/working/best_full.pth"

checkpoint = torch.load(best_model_path, map_location=DEVICE)
model.load_state_dict(checkpoint)
model.to(DEVICE)
model.eval()

print("Loaded best fine-tuned model ✔️")

Loaded best fine-tuned model ✔️


In [87]:
from sklearn.metrics import accuracy_score, mean_absolute_error
import numpy as np

def evaluate_model(model, test_loader):
    model.eval()

    all_true_cls, all_pred_cls = [], []
    all_true_cal, all_pred_cal = [], []
    all_true_mac, all_pred_mac = [], []

    with torch.no_grad():
        for images, class_labels, calories, macros in test_loader:
            images = images.to(DEVICE)
            class_labels = class_labels.to(DEVICE)
            calories = calories.to(DEVICE)
            macros = macros.to(DEVICE)

            pred_cls, pred_cal, pred_mac = model(images)

            all_true_cls.append(class_labels.cpu().numpy())
            all_pred_cls.append(pred_cls.argmax(1).cpu().numpy())

            all_true_cal.append(calories.cpu().numpy())
            all_pred_cal.append(pred_cal.cpu().numpy())

            all_true_mac.append(macros.cpu().numpy())
            all_pred_mac.append(pred_mac.cpu().numpy())

    all_true_cls = np.concatenate(all_true_cls)
    all_pred_cls = np.concatenate(all_pred_cls)
    all_true_cal = np.concatenate(all_true_cal).reshape(-1)
    all_pred_cal = np.concatenate(all_pred_cal).reshape(-1)
    all_true_mac = np.concatenate(all_true_mac)
    all_pred_mac = np.concatenate(all_pred_mac)

    print("========== TEST RESULTS ==========")
    print(f"Classification Accuracy: {accuracy_score(all_true_cls, all_pred_cls):.4f}")
    print(f"Calorie MAE: {mean_absolute_error(all_true_cal, all_pred_cal):.2f}")
    print(f"Macros MAE: {mean_absolute_error(all_true_mac, all_pred_mac):.2f}")

    return {
        "cls_acc": accuracy_score(all_true_cls, all_pred_cls),
        "cal_mae": mean_absolute_error(all_true_cal, all_pred_cal),
        "mac_mae": mean_absolute_error(all_true_mac, all_pred_mac)
    }


metrics = evaluate_model(model, test_loader)

Classification Accuracy: 0.8377
Calorie MAE: 0.26
Macros MAE: 0.37


In [84]:
sample = test_dataset[0]
print("TYPE:", type(sample))
print("LENGTH:", len(sample))

if isinstance(sample, tuple):
    for i, v in enumerate(sample):
        print(f"  Item {i}:", type(v), v)


TYPE: <class 'tuple'>
LENGTH: 2
  Item 0: <class 'torch.Tensor'> tensor([[[-1.0733, -1.1075, -1.0562,  ...,  0.6221,  0.7419,  0.7762],
         [-1.1418, -1.1418, -1.1075,  ...,  0.6221,  0.7077,  0.7762],
         [-1.2103, -1.1760, -1.1247,  ...,  0.6221,  0.7248,  0.7933],
         ...,
         [-1.5357, -1.5528, -1.5014,  ...,  0.3481,  0.4166,  0.4679],
         [-1.5357, -1.5357, -1.5357,  ...,  0.3309,  0.3823,  0.4679],
         [-1.5014, -1.5014, -1.5014,  ...,  0.3309,  0.3652,  0.4679]],

        [[-1.0903, -1.0728, -1.0203,  ...,  0.7654,  0.8704,  0.9580],
         [-1.1253, -1.1078, -1.0553,  ...,  0.7654,  0.8880,  0.9755],
         [-1.1954, -1.1429, -1.0728,  ...,  0.7654,  0.8880,  0.9930],
         ...,
         [-1.4230, -1.4405, -1.4230,  ...,  0.4678,  0.5378,  0.5903],
         [-1.4230, -1.4230, -1.4230,  ...,  0.4678,  0.5203,  0.5903],
         [-1.3880, -1.3880, -1.4230,  ...,  0.4678,  0.5028,  0.5553]],

        [[-1.1596, -1.1073, -0.9853,  ...,  1.0365,

In [93]:
import random
import torch.nn.functional as F

def show_sample_predictions(model, dataset, num_samples=4):
    model.eval()
    indices = random.sample(range(len(dataset)), num_samples)

    for idx in indices:
        
        image, targets = dataset[idx] 
 
        cls_true = targets["class"]
        
        cal_true = targets["calories"].item() if isinstance(targets["calories"], torch.Tensor) else targets["calories"]
      
        mac_true = targets["macros"].tolist() if isinstance(targets["macros"], torch.Tensor) else targets["macros"]


        img_in = image.unsqueeze(0).to(DEVICE)

        with torch.no_grad():
            
            cls_pred, cal_pred_tensor, mac_pred_tensor = model(img_in)
       
            cls_pred = torch.argmax(cls_pred, dim=1).item()
            cal_pred = cal_pred_tensor.item()
            mac_pred = mac_pred_tensor.squeeze().cpu().numpy()

        plt.figure(figsize=(5, 5))

        plt.imshow(image.permute(1, 2, 0).cpu().numpy()) 
        plt.axis("off")
        plt.title(
            f"True class: {cls_true} | Pred: {cls_pred}\n"
            f"Cal True: {cal_true:.1f} | Pred: {cal_pred:.1f}\n"
            f"Macros True: {mac_true}\n"
            f"Macros Pred: {mac_pred.round(1).tolist()}"
        )
        plt.show()

show_sample_predictions(model, test_dataset)

ValueError: not enough values to unpack (expected 4, got 2)