# K-Fold Ensemble Evaluation

Submission 2

In [None]:
import sys
sys.path.append("/kaggle/input/aml-irp/pytorch/default/12/AML-Competition-Notebook")

In [None]:
!pip install gdown
folder_id = "1N7KO7zFjJ8PvtwABlRW7Ry5QsBlafTy8"
!gdown --folder $folder_id -O ./checkpoints

In [None]:
!pip install -r /kaggle/input/aml-irp/pytorch/default/11/AML-Competition-Notebook/requirements.txt

In [None]:
# %%
# Add src directory to Python path
import sys
import os
import gc
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler
import pandas as pd
import joblib

# This assumes the notebook is in the 'notebooks' directory
sys.path.append(os.path.abspath(os.path.join(os.path.dirname('__file__'), '..')))

In [None]:
# %%
# --- 1. Import Modules ---
from src.irp_refiner import config
from src.irp_refiner.utils import set_seed
from src.irp_refiner.data_processing import load_and_clean_data
from src.irp_refiner.models.irp import IRPTranslator
from src.irp_refiner.models.mlp import ResidualMLP
from src.irp_refiner.ensembling import EnsembleWrapper
from src.irp_refiner.evaluation import evaluate_retrieval
from src.irp_refiner.baseline_utils import load_data, generate_submission
from src.irp_refiner.training import train_model


In [None]:
# %%
# --- 2. Setup ---
worker_init_fn = set_seed(config.SEED)
DEVICE = config.DEVICE
print(f"Using device: {DEVICE}")

In [None]:
# 1. Load and Clean Data
X_train_np_cleaned, Y_train_np_cleaned = load_and_clean_data(
    config.TRAIN_DATA_PATH, config.NOISE_THRESHOLD
)

!mkdir "checkpoints"

# 2. Initialize KFold
kf = KFold(n_splits=config.K_FOLDS, shuffle=True, random_state=config.SEED)

# 3. K-Fold Training Loop
for fold, (train_idx, val_idx) in enumerate(kf.split(X_train_np_cleaned)):
    print("\\n" + "="*80)
    print(f"=============== FOLD {fold+1}/{config.K_FOLDS} ===============")
    print("="*80)

    # --- Split data for this fold ---
    X_train_fold, X_val_fold = X_train_np_cleaned[train_idx], X_train_np_cleaned[val_idx]
    Y_train_fold, Y_val_fold = Y_train_np_cleaned[train_idx], Y_train_np_cleaned[val_idx]

    # --- IRP Stage ---
    print(f"--- FOLD {fold+1}: IRP Stage ---")
    anchor_indices = np.random.choice(len(X_train_fold), config.K_ANCHORS, replace=False)
    X_anchor = X_train_fold[anchor_indices]
    Y_anchor = Y_train_fold[anchor_indices]

    scaler_X = StandardScaler().fit(X_anchor)
    scaler_Y = StandardScaler().fit(Y_anchor)

    irp_translator_fold = IRPTranslator(
        scaler_X, scaler_Y, 
        omega=config.IRP_OMEGA, delta=config.IRP_DELTA, 
        ridge=config.IRP_RIDGE, verbose=False
    )
    irp_translator_fold.fit(X_anchor, Y_anchor)
    print(f"   âœ“ IRP translator for fold {fold+1} fitted.")

    irp_path = f"{config.CHECKPOINT_DIR}irp_translator_fold_{fold}.pkl"
    joblib.dump(irp_translator_fold, irp_path)
    print(f"   âœ“ IRP translator saved to {irp_path}")

    X_train_IRP_fold = torch.from_numpy(irp_translator_fold.translate(X_train_fold)).float()
    X_val_IRP_fold = torch.from_numpy(irp_translator_fold.translate(X_val_fold)).float()
    print(f"   âœ“ Train and Val data transformed for fold {fold+1}.")

    # --- DataLoader Stage ---
    train_ds_fold = TensorDataset(X_train_IRP_fold, torch.from_numpy(Y_train_fold).float())
    val_ds_fold = TensorDataset(X_val_IRP_fold, torch.from_numpy(Y_val_fold).float())

    train_loader_fold = DataLoader(train_ds_fold, batch_size=config.BATCH_SIZE, shuffle=True, worker_init_fn=worker_init_fn)
    val_loader_fold = DataLoader(val_ds_fold, batch_size=config.BATCH_SIZE, shuffle=False)

    # --- Model Training Stage ---
    print(f"--- FOLD {fold+1}: MLP Refiner Training Stage ---")
    model_fold = ResidualMLP(
        input_dim=config.D_X, output_dim=config.D_Y, hidden_dim=config.HIDDEN_DIM,
        num_hidden_layers=config.NUM_HIDDEN_LAYERS, dropout_p=config.DROPOUT_P
    ).to(config.DEVICE)

    model_path_fold = f"{config.CHECKPOINT_DIR}mlp_fold_{fold}.pth"

    train_model(
        model_fold, train_loader_fold, val_loader_fold, config.DEVICE,
        epochs=config.EPOCHS, lr=config.LR, save_path=model_path_fold,
        patience=config.EARLY_STOP_PATIENCE, min_delta=config.MIN_IMPROVEMENT_DELTA,
        resume=False 
    )

    # --- Clean up memory ---
    del model_fold, train_loader_fold, val_loader_fold, X_train_IRP_fold, X_val_IRP_fold
    gc.collect()
    torch.cuda.empty_cache()

print("\\n" + "="*80)
print("K-Fold Training Complete. All models saved.")
print("="*80)

In [None]:
# %%
# --- 4. Load the K-Fold Ensemble ---
print("Loading K-Fold models and IRP translators...")

model_paths = [f"{config.CHECKPOINT_DIR}mlp_fold_{f}.pth" for f in range(config.K_FOLDS)]
irp_paths = [f"{config.CHECKPOINT_DIR}irp_translator_fold_{f}.pkl" for f in range(config.K_FOLDS)]

# Check if the files exist first
if not os.path.exists(model_paths[0]):
    print("="*80)
    print(f"ERROR: Model file not found at {model_paths[0]}")
    print("Please run 'python scripts/train.py' to train the K-Fold models before running this notebook.")
    print("="*80)
    ensemble_wrapper = None
else:
    ensemble_wrapper = EnsembleWrapper(model_paths, irp_paths, DEVICE)


In [None]:
# %%
# --- 5. Evaluate Ensemble on Validation Set ---
if ensemble_wrapper:
    print("\nGenerating ensemble predictions for the validation set...")
    
    # IMPORTANT: We pass the RAW (non-IRP) validation data.
    # The EnsembleWrapper handles the IRP step for each model internally.
    y_val_pred_ensemble = ensemble_wrapper.translate(X_val_np)
    
    print("Predictions generated. Running evaluation...")

    # Prepare ground truth
    gt_indices_val = np.arange(len(Y_val_np))

    # Run evaluation
    results = evaluate_retrieval(
        y_val_pred_ensemble,
        Y_val_np,
        gt_indices_val,
        batch_size=config.BATCH_SIZE
    )

    print("\n--- ENSEMBLE EVALUATION RESULTS (on hold-out set) ---")
    for metric, value in results.items():
        if 'recall' in metric:
            print(f"  {metric}: {value:.2%}")
        else:
            print(f"  {metric}: {value:.4f}")
    print("-------------------------------------------------------")
else:
    print("Skipping evaluation because ensemble was not loaded.")


## 6. Generate Ensemble Submission File

Now we use the loaded `ensemble_wrapper` to process the actual test data and generate a submission file.


In [None]:
# %%
if ensemble_wrapper:
    print("\n--- 6. Generating Ensemble Submission ---")
    
    # 1. Load test data
    print("Loading test data...")
    test_data = load_data(config.TEST_DATA_PATH)
    test_embds_raw_np = test_data['captions/embeddings']
    print(f"Test data loaded: {len(test_embds_raw_np)} samples.")

    # 2. Generate predictions using the ensemble
    print("Applying ensemble pipeline to test data... (this may take a moment)")
    pred_embds_ensemble = ensemble_wrapper.translate(test_embds_raw_np)
    print("Ensemble predictions generated.")

    # 3. Save submission file
    submission_filename = 'submission_Ensemble_Notebook.csv'
    generate_submission(test_data['captions/ids'], pred_embds_ensemble, submission_filename)
    
    print("\n" + "="*50)
    print(f"âœ… Submission file '{submission_filename}' generated.")
    print("="*50)
else:
    print("Skipping submission generation because ensemble was not loaded.")

 # Direct Model Experimentation
 
Submission 1

In [None]:
# --- 1. Setup and Imports ---
import sys
import os
import gc
import numpy as np
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

# Add src directory to Python path
# This assumes the notebook is in the 'notebooks' directory
sys.path.append(os.path.abspath(os.path.join(os.path.dirname('__file__'), '..')))

# Import all our custom modules
from src import config
from src import baseline_utils
from src import evaluation
from src import training
from src import ensembling
from src.data_processing import load_and_prep_data_direct
from src.models import mlp_direct
from src.utils import set_seed

print("All modules imported successfully.")


In [None]:
# %%
# --- 2. Configuration ---
worker_init_fn = set_seed(config.SEED)
DEVICE = config.DEVICE
print(f"Using device: {DEVICE}")
print(f"Kaggle data path: {config.TRAIN_DATA_PATH}")

In [None]:
# %%
# --- 3. Load, Clean, and Split Data ---
# This single function replicates the entire data prep workflow from Cell 4.
# It handles merging (if enabled), noise cleaning, and splitting.
print("Loading, cleaning, and splitting data...")

(X_train, y_train, X_val, y_val, 
 val_text_embd, val_img_embd_unique, val_label_gt) = load_and_prep_data_direct(
    train_path=config.TRAIN_DATA_PATH,
    coco_path=config.MY_DATA_PATH,
    use_coco=config.USE_COCO_DATASET,
    noise_threshold=config.NOISE_THRESHOLD,
    val_split_ratio=config.VAL_SIZE,
    random_seed=config.SEED
)

print("\n--- Data Loading Complete ---")
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_val (queries) shape: {X_val.shape}")
print(f"val_img_embd_unique (gallery) shape: {val_img_embd_unique.shape}")
print(f"val_label_gt (ground truth) shape: {val_label_gt.shape}")


In [None]:
# %%
# --- 4. Create DataLoaders ---
# (As seen in the original notebook's Cell 8)
train_dl_std = DataLoader(
    TensorDataset(X_train, y_train), 
    batch_size=config.MODERN_HPARAMS['batch_size'], 
    shuffle=True
)
val_dl_std = DataLoader(
    TensorDataset(X_val, y_val), 
    batch_size=config.MODERN_HPARAMS['batch_size'], 
    shuffle=False
)
print("DataLoaders created.")


 ## 5. Model Training

In [None]:

# --- 5a. Train Model I (ResidualMLP, No-Norm) ---
print("--- 1. Training Modello I (ResidualMLP, No-Norm) ---")
model_I = mlp_direct.ResidualMLP_BN(
    input_dim=1024,         # <-- FIX: Was config.D_X (1536)
    output_dim=1536,        # <-- FIX: Was config.D_Y (1536)
    num_layers=2, 
    dropout=0.4
).to(DEVICE)

model_I = training.train_standard_direct(
    model_I, train_dl_std, val_dl_std, 
    epochs=100, 
    lr=config.LR, 
    save_path=f"{config.CHECKPOINT_DIR}model_I_ResidualMLP_NoNorm.pth", 
    patience=10, 
    use_norm_in_loss=False, 
    device=DEVICE
)

# %%
# --- 5b. Train Model C (SwiGLU, No-Norm) ---
print("\n--- 2. Training Modello C (SwiGLU, No-Norm) ---")
model_C = mlp_direct.SwiGLUMLP(
    input_dim=1024,         # <-- FIX: Was config.D_X (1536)
    output_dim=1536,        # <-- FIX: Was config.D_Y (1536)
    num_layers=2, 
    dropout=0.4
).to(DEVICE)

model_C = training.train_standard_direct(
    model_C, train_dl_std, val_dl_std, 
    epochs=100, 
    lr=config.LR, 
    save_path=f"{config.CHECKPOINT_DIR}model_C_SwiGLU_NoNorm.pth", 
    patience=10, 
    use_norm_in_loss=False, 
    device=DEVICE
)

# %%
# --- 5c. Train Model Modern (Ensemble) ---
print("\n--- 3. Training Modello Modern (Ensemble) ---")
models_Modern = [
    training.train_single_modern_model(
        seed, X_train, y_train, X_val, y_val, 
        hparams=config.MODERN_HPARAMS, 
        patience=10, 
        device=DEVICE
    )
    for seed in config.MODERN_SEEDS
]

# %%
# --- 5d. Train Model E (Diverse Ensemble) ---
print("\n--- 4. Training Modello E (Diverse Ensemble) ---")
# This function trains 3 different models and returns them in a list
models_E = training.create_direct_ensemble(
    X_train, y_train, X_val, y_val, DEVICE
)

# %%
print("âœ… All model training complete.")

## 6. Model Evaluation

In [None]:
print("--- Avvio Valutazione Comparativa ---")

# 1. Create Wrappers for evaluation
wrapper_I = evaluation.MLPWrapper(model_I, DEVICE)
wrapper_C = evaluation.MLPWrapper(model_C, DEVICE)
wrapper_Modern = ensembling.DirectEnsembleWrapper(models_Modern, DEVICE)
wrapper_E = ensembling.DirectEnsembleWrapper(models_E, DEVICE)

# 2. Generate prediction embeddings (once)
print("\nâ†’ Generating prediction embeddings...")
emb_I = wrapper_I.translate(val_text_embd)
emb_C = wrapper_C.translate(val_text_embd)
emb_Modern = wrapper_Modern.translate(val_text_embd)
emb_E = wrapper_E.translate(val_text_embd)

# 3. Normalize Ground Truth embeddings (once)
print("\nâ†’ Normalizing ground truth embeddings...")
# Full gallery for N-vs-M retrieval
gallery_emb_norm = F.normalize(val_img_embd_unique.float().to(DEVICE), p=2, dim=1)
# Paired gallery for N-vs-N retrieval
paired_emb_norm = F.normalize(y_val.float().to(DEVICE), p=2, dim=1)

# 4. Run Full Retrieval (N vs. M Gallery)
print("\nðŸ”¥ VALUTAZIONE FULL RETRIEVAL (CLIP-style, N vs M)")
metrics_I_full = evaluation.evaluate_retrieval_full(emb_I, gallery_emb_norm, val_label_gt, device=DEVICE)
metrics_C_full = evaluation.evaluate_retrieval_full(emb_C, gallery_emb_norm, val_label_gt, device=DEVICE)
metrics_Modern_full = evaluation.evaluate_retrieval_full(emb_Modern, gallery_emb_norm, val_label_gt, device=DEVICE)
metrics_E_full = evaluation.evaluate_retrieval_full(emb_E, gallery_emb_norm, val_label_gt, device=DEVICE)

# 5. Run In-Batch Retrieval (N vs. N)
print("\nðŸ”¥ VALUTAZIONE IN-BATCH (Competizione Style, N vs N)")
metrics_I_ib = evaluation.aml_inbatch_retrieval(torch.from_numpy(emb_I).to(DEVICE), paired_emb_norm)
metrics_C_ib = evaluation.aml_inbatch_retrieval(torch.from_numpy(emb_C).to(DEVICE), paired_emb_norm)
metrics_Modern_ib = evaluation.aml_inbatch_retrieval(torch.from_numpy(emb_Modern).to(DEVICE), paired_emb_norm)
metrics_E_ib = evaluation.aml_inbatch_retrieval(torch.from_numpy(emb_E).to(DEVICE), paired_emb_norm)

# 6. Print Report
print("\n\n" + "="*80)
print("          ðŸ“Š RIEPILOGO FINALE: FULL vs IN-BATCH")
print("="*80)

def print_full_and_inbatch(name, full, ib):
    print(f"\nðŸ”µ {name}")
    print("- FULL RETRIEVAL (N vs M Gallery)")
    print(f"  Recall@1:   {full.get('recall@1', 0):.4f}")
    print(f"  Recall@5:   {full.get('recall@5', 0):.4f}")
    print(f"  MRR:        {full.get('mrr', 0):.4f}")
    print("- IN-BATCH RETRIEVAL (N vs N)")
    print(f"  Recall@1:   {ib.get('r1', 0):.4f}")
    print(f"  Recall@5:   {ib.get('r5', 0):.4f}")
    print(f"  MRR:        {ib.get('mrr', 0):.4f}")

print_full_and_inbatch("Modello I (ResidualMLP)", metrics_I_full, metrics_I_ib)
print_full_and_inbatch("Modello C (SwiGLU)", metrics_C_full, metrics_C_ib)
print_full_and_inbatch("Modello Modern (Ensemble)", metrics_Modern_full, metrics_Modern_ib)
print_full_and_inbatch("Modello E (Diverse Ensemble)", metrics_E_full, metrics_E_ib)

## 7. Generate Submission

In [None]:
print("--- Generazione Submission ---")

# --- CHOOSE YOUR WINNING MODEL ---
# The Diverse Ensemble (E) performed best in the evaluation.
wrapper_submission = wrapper_E
# ---------------------------------

print(f"Using model: Diverse Ensemble (E)")

# 1. Load test data
print("Loading test data...")
test_data = baseline_utils.load_data(config.TEST_DATA_PATH)
X_test_np = test_data['captions/embeddings']
test_ids = test_data['captions/ids']
print(f"Test data loaded: {len(X_test_np)} samples.")

# 2. Generate predictions
print("Generating predictions on the test set...")
# The wrapper handles normalization internally
y_test_pred = wrapper_submission.translate(X_test_np, batch_size=512)

# 3. Save submission file
submission_filename = "submission_direct_ensemble_E.csv"
baseline_utils.generate_submission(test_ids, y_test_pred, submission_filename)

print(f"\nâœ… Submission salvata: {submission_filename}")