# K-Fold Ensemble Evaluation Notebook

This notebook loads the K-Fold models and IRP translators (trained by `scripts/train.py`) to:
1.  Instantiate an `EnsembleWrapper`.
2.  Evaluate the ensemble's performance on a hold-out validation set.
3.  Generate a final `submission.csv` using the ensemble.

In [None]:
import sys
sys.path.append("/kaggle/input/aml-irp/pytorch/default/9/AML-Competition-Notebook")

In [None]:
!pip install gdown
folder_id = "1N7KO7zFjJ8PvtwABlRW7Ry5QsBlafTy8"
!gdown --folder $folder_id -O ./checkpoints

In [None]:
!pip install -r /kaggle/input/aml-irp/pytorch/default/6/AML-Competition-Notebook/requirements.txt

In [None]:
# %%
# Add src directory to Python path
import sys
import os
import gc
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler
import pandas as pd
import joblib

# This assumes the notebook is in the 'notebooks' directory
sys.path.append(os.path.abspath(os.path.join(os.path.dirname('__file__'), '..')))

In [None]:
# %%
# --- 1. Import Modules ---
from src.irp_refiner import config
from src.irp_refiner.utils import set_seed
from src.irp_refiner.data_processing import load_and_clean_data
from src.irp_refiner.models.irp import IRPTranslator
from src.irp_refiner.models.mlp import ResidualMLP
from src.irp_refiner.ensembling import EnsembleWrapper
from src.irp_refiner.evaluation import evaluate_retrieval
from src.irp_refiner.baseline_utils import load_data, generate_submission
from src.irp_refiner.training import train_model


In [None]:
# %%
# --- 2. Setup ---
worker_init_fn = set_seed(config.SEED)
DEVICE = config.DEVICE
print(f"Using device: {DEVICE}")

In [None]:
# 1. Load and Clean Data
X_train_np_cleaned, Y_train_np_cleaned = load_and_clean_data(
    config.TRAIN_DATA_PATH, config.NOISE_THRESHOLD
)

!mkdir "checkpoints"

# 2. Initialize KFold
kf = KFold(n_splits=config.K_FOLDS, shuffle=True, random_state=config.SEED)

# 3. K-Fold Training Loop
for fold, (train_idx, val_idx) in enumerate(kf.split(X_train_np_cleaned)):
    print("\\n" + "="*80)
    print(f"=============== FOLD {fold+1}/{config.K_FOLDS} ===============")
    print("="*80)

    # --- Split data for this fold ---
    X_train_fold, X_val_fold = X_train_np_cleaned[train_idx], X_train_np_cleaned[val_idx]
    Y_train_fold, Y_val_fold = Y_train_np_cleaned[train_idx], Y_train_np_cleaned[val_idx]

    # --- IRP Stage ---
    print(f"--- FOLD {fold+1}: IRP Stage ---")
    anchor_indices = np.random.choice(len(X_train_fold), config.K_ANCHORS, replace=False)
    X_anchor = X_train_fold[anchor_indices]
    Y_anchor = Y_train_fold[anchor_indices]

    scaler_X = StandardScaler().fit(X_anchor)
    scaler_Y = StandardScaler().fit(Y_anchor)

    irp_translator_fold = IRPTranslator(
        scaler_X, scaler_Y, 
        omega=config.IRP_OMEGA, delta=config.IRP_DELTA, 
        ridge=config.IRP_RIDGE, verbose=False
    )
    irp_translator_fold.fit(X_anchor, Y_anchor)
    print(f"   ✓ IRP translator for fold {fold+1} fitted.")

    irp_path = f"{config.CHECKPOINT_DIR}irp_translator_fold_{fold}.pkl"
    joblib.dump(irp_translator_fold, irp_path)
    print(f"   ✓ IRP translator saved to {irp_path}")

    X_train_IRP_fold = torch.from_numpy(irp_translator_fold.translate(X_train_fold)).float()
    X_val_IRP_fold = torch.from_numpy(irp_translator_fold.translate(X_val_fold)).float()
    print(f"   ✓ Train and Val data transformed for fold {fold+1}.")

    # --- DataLoader Stage ---
    train_ds_fold = TensorDataset(X_train_IRP_fold, torch.from_numpy(Y_train_fold).float())
    val_ds_fold = TensorDataset(X_val_IRP_fold, torch.from_numpy(Y_val_fold).float())

    train_loader_fold = DataLoader(train_ds_fold, batch_size=config.BATCH_SIZE, shuffle=True, worker_init_fn=worker_init_fn)
    val_loader_fold = DataLoader(val_ds_fold, batch_size=config.BATCH_SIZE, shuffle=False)

    # --- Model Training Stage ---
    print(f"--- FOLD {fold+1}: MLP Refiner Training Stage ---")
    model_fold = ResidualMLP(
        input_dim=config.D_X, output_dim=config.D_Y, hidden_dim=config.HIDDEN_DIM,
        num_hidden_layers=config.NUM_HIDDEN_LAYERS, dropout_p=config.DROPOUT_P
    ).to(config.DEVICE)

    model_path_fold = f"{config.CHECKPOINT_DIR}mlp_fold_{fold}.pth"

    train_model(
        model_fold, train_loader_fold, val_loader_fold, config.DEVICE,
        epochs=config.EPOCHS, lr=config.LR, save_path=model_path_fold,
        patience=config.EARLY_STOP_PATIENCE, min_delta=config.MIN_IMPROVEMENT_DELTA,
        resume=False 
    )

    # --- Clean up memory ---
    del model_fold, train_loader_fold, val_loader_fold, X_train_IRP_fold, X_val_IRP_fold
    gc.collect()
    torch.cuda.empty_cache()

print("\\n" + "="*80)
print("K-Fold Training Complete. All models saved.")
print("="*80)

In [None]:
# %%
# --- 4. Load the K-Fold Ensemble ---
print("Loading K-Fold models and IRP translators...")

model_paths = [f"{config.CHECKPOINT_DIR}mlp_fold_{f}.pth" for f in range(config.K_FOLDS)]
irp_paths = [f"{config.CHECKPOINT_DIR}irp_translator_fold_{f}.pkl" for f in range(config.K_FOLDS)]

# Check if the files exist first
if not os.path.exists(model_paths[0]):
    print("="*80)
    print(f"ERROR: Model file not found at {model_paths[0]}")
    print("Please run 'python scripts/train.py' to train the K-Fold models before running this notebook.")
    print("="*80)
    ensemble_wrapper = None
else:
    ensemble_wrapper = EnsembleWrapper(model_paths, irp_paths, DEVICE)


In [None]:
# %%
# --- 5. Evaluate Ensemble on Validation Set ---
if ensemble_wrapper:
    print("\nGenerating ensemble predictions for the validation set...")
    
    # IMPORTANT: We pass the RAW (non-IRP) validation data.
    # The EnsembleWrapper handles the IRP step for each model internally.
    y_val_pred_ensemble = ensemble_wrapper.translate(X_val_np)
    
    print("Predictions generated. Running evaluation...")

    # Prepare ground truth
    gt_indices_val = np.arange(len(Y_val_np))

    # Run evaluation
    results = evaluate_retrieval(
        y_val_pred_ensemble,
        Y_val_np,
        gt_indices_val,
        batch_size=config.BATCH_SIZE
    )

    print("\n--- ENSEMBLE EVALUATION RESULTS (on hold-out set) ---")
    for metric, value in results.items():
        if 'recall' in metric:
            print(f"  {metric}: {value:.2%}")
        else:
            print(f"  {metric}: {value:.4f}")
    print("-------------------------------------------------------")
else:
    print("Skipping evaluation because ensemble was not loaded.")


## 6. Generate Ensemble Submission File

Now we use the loaded `ensemble_wrapper` to process the actual test data and generate a submission file.


In [None]:
# %%
if ensemble_wrapper:
    print("\n--- 6. Generating Ensemble Submission ---")
    
    # 1. Load test data
    print("Loading test data...")
    test_data = load_data(config.TEST_DATA_PATH)
    test_embds_raw_np = test_data['captions/embeddings']
    print(f"Test data loaded: {len(test_embds_raw_np)} samples.")

    # 2. Generate predictions using the ensemble
    print("Applying ensemble pipeline to test data... (this may take a moment)")
    pred_embds_ensemble = ensemble_wrapper.translate(test_embds_raw_np)
    print("Ensemble predictions generated.")

    # 3. Save submission file
    submission_filename = 'submission_Ensemble_Notebook.csv'
    generate_submission(test_data['captions/ids'], pred_embds_ensemble, submission_filename)
    
    print("\n" + "="*50)
    print(f"✅ Submission file '{submission_filename}' generated.")
    print("="*50)
else:
    print("Skipping submission generation because ensemble was not loaded.")