In [1]:
EXISTING_CHECKPOINT_KAGGLE_DATASET_ID = "hsm-models"
DATA_PREPARATION_VOTE_METHOD = "max_vote_window" # "max_vote_window" or "sum_and_normalize". Decides how to aggregate the predictions of the overlapping windows
DATA_SOURCE = "cv" # "cv" or "test". On Kaggle, this is autpmatically set to "test"
WEIGHTS = [] # Pre-computed weights for each model in the ensemble. For CV, the weights are always computed automatically. For test, they must be provided here.

In [2]:
import os
import sys

from tqdm import tqdm
import torch

import numpy as np
import pandas as pd
import torch.nn.functional as F
from torch.utils.data import DataLoader
from scipy.optimize import minimize


if bool(os.environ.get("KAGGLE_URL_BASE", "")):
  import sys
  # running on kaggle
  sys.path.insert(0, "/kaggle/input/hsm-source-files")
else:
  # running locally
  sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), "..", "..", "..")))

from src.utils.utils import get_raw_data_dir, get_processed_data_dir, get_submission_csv_path, get_models_save_path, set_seeds, running_in_kaggle
from src.utils.constants import Constants

from src.utils.k_folds_creator import KFoldCreator
from src.datasets.multi_spectrogram import MultiSpectrogramDataset
from src.datasets.eeg_dataset_montage import EEGDatasetMontage
from src.models.base_cnn import BaseCNN
from src.models.gru_convolution_attention import NodeAttentionModel
from src.utils.eeg_spectrogram_creator import EEGSpectrogramGenerator
from src.datasets.eeg_processor import EEGDataProcessor

set_seeds(Constants.SEED)

if running_in_kaggle():
  DATA_SOURCE = "test"

2025-11-29 15:03:38,180 :: root :: INFO :: Initialising Utils
2025-11-29 15:03:38,951 :: root :: INFO :: Initialising Datasets
2025-11-29 15:03:39,992 :: root :: INFO :: Initialising Models


In [3]:
def create_eeg_spectrograms(eeg_spectrograms_path, raw_eegs_path, data_df):
  os.makedirs(eeg_spectrograms_path, exist_ok=True)
  existing_specs = len(list(eeg_spectrograms_path.glob("*.npy")))

  eeg_ids = data_df["eeg_id"].unique()
  if existing_specs >= len(eeg_ids):
    print("EEG Spectrograms already created.")
    return
  else:
    spectrogram_creator = EEGSpectrogramGenerator(["cwt"])
    for eeg_id in tqdm(eeg_ids, desc="Generating EEG Spectrograms"):
        eeg_path = os.path.join(raw_eegs_path, f"{eeg_id}.parquet")
        eeg = pd.read_parquet(eeg_path)
        spectrograms = spectrogram_creator.generate(eeg)
        np.save(eeg_spectrograms_path / f"{eeg_id}.npy", spectrograms['cwt']) 

In [4]:
DATA_PATH = get_raw_data_dir()

if DATA_SOURCE == "cv":
  EEG_SPECT_PATH = get_processed_data_dir() / "eeg_spectrograms" / "train" / "cwt"
  processor = EEGDataProcessor(raw_data_path=DATA_PATH, processed_data_path=get_processed_data_dir())
  data_df = processor.process_data(vote_method=DATA_PREPARATION_VOTE_METHOD, skip_parquet=True)

  fold_creator = KFoldCreator(n_splits=5, seed=Constants.SEED)
  data_df = fold_creator.create_folds(data_df, stratify_col='expert_consensus', group_col='patient_id')
else:
  EEG_SPECT_PATH = get_processed_data_dir() / "eeg_spectrograms" / "test" / "cwt"
  data_df = pd.read_csv(DATA_PATH / "test.csv")
  create_eeg_spectrograms(EEG_SPECT_PATH, DATA_PATH / "test_eegs", data_df)

data_df.head()

Processor initialized.
Raw data path: '/home/david/git/aicomp/data'
Processed data path: '/home/david/git/aicomp/data/processed'
Starting EEG Data Processing Pipeline
Skipping Parquet file creation as requested.
Using 'max_vote_window' vote aggregation strategy.

Processed train data saved to '/home/david/git/aicomp/data/processed/train_processed.csv'.
Shape of the final dataframe: (17089, 12)

Pipeline finished successfully!


Unnamed: 0,eeg_id,spectrogram_id,patient_id,expert_consensus,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote,min_offset,max_offset,fold
0,568657,789577333,20654,Other,0.0,0.0,0.25,0.0,0.166667,0.583333,0.0,16.0,1
1,582999,1552638400,20230,LPD,0.0,0.857143,0.0,0.071429,0.0,0.071429,0.0,38.0,1
2,642382,14960202,5955,Other,0.0,0.0,0.0,0.0,0.0,1.0,1008.0,1032.0,1
3,751790,618728447,38549,GPD,0.0,0.0,1.0,0.0,0.0,0.0,908.0,908.0,1
4,778705,52296320,40955,Other,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,4


In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [6]:
multi_spect_config = {
  "batch_size": 64,
  "num_workers": 8,
  "pretrained_model_name": "inception_v3",
  "target_size": 6,
  "img_size": (128, 256), 
  "dropout_p": 0.1,
  "image_alignment": "stacked",
  "augmentations": []
}

multi_spect_model = BaseCNN(
  multi_spect_config["pretrained_model_name"],
  pretrained=False,
  num_classes=multi_spect_config["target_size"],
  dropout_p=multi_spect_config["dropout_p"],
  image_alignment=multi_spect_config["image_alignment"],
)

In [7]:
gru_conv_montage_config = {
  "batch_size": 32,
  "num_workers": 8,
  "num_nodes": 19,
  "node_embed_size": 256,
  "hidden_size": 256,
  "num_layers": 1,
  "target_size": 6,
  "num_cnn_blocks": 3,
  "dropout": 0.4,
  "downsample_factor": 1,
  "use_inception": True,
  "augmentations": []
}

gru_conv_montage_model = NodeAttentionModel(
  num_nodes=gru_conv_montage_config["num_nodes"],
  node_embed_size=gru_conv_montage_config["node_embed_size"],
  hidden_size=gru_conv_montage_config["hidden_size"],
  num_layers=gru_conv_montage_config["num_layers"],
  num_classes=gru_conv_montage_config["target_size"],
  num_cnn_blocks=gru_conv_montage_config["num_cnn_blocks"],
  dropout=gru_conv_montage_config["dropout"],
  use_inception=gru_conv_montage_config["use_inception"]
)

In [8]:
model_configs = [
  {
    "identifier": "multi-spect-cnn",
    "config": multi_spect_config,
    "model": multi_spect_model,
    "model_checkpoints_dir": get_models_save_path(EXISTING_CHECKPOINT_KAGGLE_DATASET_ID) / "multi_spec_cnn" / "inception_v3" / DATA_PREPARATION_VOTE_METHOD,
    "autocast_enabled": False,
    "dataset_creator": lambda df, mode, augmentations: MultiSpectrogramDataset(
      df=df,
      targets=Constants.TARGETS,
      data_path=DATA_PATH,
      img_size=multi_spect_config["img_size"],
      eeg_spec_path=EEG_SPECT_PATH,
      mode=mode,
      apply_augmentations=augmentations
    )
  },
  {
    "identifier": "gru_conv_montage",
    "config": gru_conv_montage_config,
    "model": gru_conv_montage_model,
    "model_checkpoints_dir": get_models_save_path(EXISTING_CHECKPOINT_KAGGLE_DATASET_ID) / "GRUConvModel" / DATA_PREPARATION_VOTE_METHOD,
    "autocast_enabled": device.type == 'cuda',
    "dataset_creator": lambda df, mode, augmentations: EEGDatasetMontage(
      df=df,
      data_path=DATA_PATH,
      mode=mode,
      downsample_factor=gru_conv_montage_config["downsample_factor"],
      augmentations=augmentations
    )
  }
]

In [9]:
def get_ensemble_predictions(all_model_predictions, weights):
    # Returns weighted average of predictions
    weights = np.array(weights) / np.sum(weights) # normalize weights to sum to 1
    ensemble_pred = np.zeros_like(all_model_predictions[0])
    for i, preds in enumerate(all_model_predictions):
        ensemble_pred += weights[i] * preds
    return ensemble_pred

In [10]:
def calculate_kl_score(true_labels, pred_labels):
  kl_loss_fn = torch.nn.KLDivLoss(reduction='batchmean')
  return kl_loss_fn(torch.log(pred_labels + 1e-8), true_labels).item()

In [52]:
def optimize_ensemble_weights(all_model_predictions, true_labels, method, max_iterations):
    n_models = len(all_model_predictions)
    
    def objective(weights):
        # Weighted average of predictions
        ensemble_pred = get_ensemble_predictions(all_model_predictions, weights)
        
        # Calculate KL divergence
        pred_tensor = torch.tensor(ensemble_pred, dtype=torch.float32)
        true_tensor = torch.tensor(true_labels, dtype=torch.float32)
        return calculate_kl_score(true_tensor, pred_tensor)

    # Initial weights (equal for all models)
    initial_weights = np.ones(n_models) / n_models
    
    # Bounds: each weight between 0 and 1
    bounds = [(0, 1) for _ in range(n_models)]
    
    # Optimize
    print(f"Optimizing weights for {n_models} models...")
    print(f"Initial equal weights: {initial_weights}")
    print(f"Initial KL score: {objective(initial_weights):.6f}")
    
    result = minimize(
        objective,
        initial_weights,
        method=method,
        bounds=bounds,
        options={"maxiter": max_iterations}
    )
    
    print(result)

    optimal_weights = result.x / result.x.sum()  # Normalize weights to sum to 1
    best_score = result.fun
    
    print(f"Optimized weights: {optimal_weights}")
    print(f"Optimized KL score: {best_score:.6f}")
    print(f"Improvement: {objective(initial_weights) - best_score:.6f}")
    
    return optimal_weights, best_score

In [54]:
def grid_search_weights(all_model_predictions, true_labels, n_steps=500):
    """Brute force grid search for 2 models"""
    n_models = len(all_model_predictions)
    
    if n_models != 2:
        raise ValueError("Grid search only implemented for 2 models")
    
    best_score = float('inf')
    best_weights = None
    
    print(f"Grid searching with {n_steps} steps...")
    
    for i in range(n_steps + 1):
        w1 = i / n_steps
        w2 = 1 - w1
        weights = np.array([w1, w2])
        
        ensemble_pred = get_ensemble_predictions(all_model_predictions, weights)
        pred_tensor = torch.tensor(ensemble_pred, dtype=torch.float32)
        true_tensor = torch.tensor(true_labels, dtype=torch.float32)
        score = calculate_kl_score(true_tensor, pred_tensor)
        
        if score < best_score:
            best_score = score
            best_weights = weights
            print(f"New best - Weights: [{w1:.2f}, {w2:.2f}], Score: {score:.6f}")
    
    print(f"\nBest weights: {best_weights}")
    print(f"Best score: {best_score:.6f}")
    
    return best_weights, best_score

In [58]:
def run_ensemble_inference_cv():
  """Run OOF inference for CV data. Each fold predicts only its validation set"""
  all_model_predictions = []

  for config in model_configs:
    print(f"\n========== Loading {config['identifier']} ==========")

    model_checkpoints_dir = config["model_checkpoints_dir"]
    model_paths = [os.path.join(model_checkpoints_dir, f'best_model_fold{i}.pth') for i in range(5)]

    # Initialize array to store OOF predictions for this model
    model_oof_preds = np.zeros((len(data_df), len(Constants.TARGETS)))
    
    for fold_idx in range(5):
      print(f"\n========== Inferencing with Fold {fold_idx} Model ==========")
      model_path = model_paths[fold_idx]
      
      if not os.path.exists(model_path):
          print(f"Model file not found: {model_path}. Skipping this fold.")
          continue
      
      # Get validation indices and data for this fold
      valid_df = data_df[data_df['fold'] == fold_idx].reset_index(drop=True)
      valid_indices = data_df[data_df['fold'] == fold_idx].index.tolist()
      
      # Create dataset using lambda function
      fold_dataset = config['dataset_creator'](valid_df, mode='train', augmentations=config['config']['augmentations'])
      
      fold_loader = DataLoader(
          fold_dataset,
          batch_size=config['config']['batch_size'],
          shuffle=False,
          num_workers=config['config']['num_workers']
      )
      
      # Load model
      model = config['model']
      model.load_state_dict(torch.load(model_path))
      model.to(device)
      model.eval()

      fold_preds = []
      with torch.no_grad():
          for x in tqdm(fold_loader, desc=f"{config['identifier']} Fold {fold_idx}"):
              if isinstance(x, (list, tuple)):
                  x = x[0]

              x = x.to(device)

              with torch.autocast(enabled=config["autocast_enabled"], device_type=device.type):
                outputs = model(x)
                probs = F.softmax(outputs, dim=1).cpu().numpy()
                fold_preds.append(probs)
      
      # Store predictions at the correct indices
      fold_preds = np.concatenate(fold_preds)
      model_oof_preds[valid_indices] = fold_preds
      
      print(f"Completed fold {fold_idx} for {config['identifier']}: {fold_preds.shape}")
    
    all_model_predictions.append(model_oof_preds)
    print(f"Completed {config['identifier']}: {model_oof_preds.shape}")

  true_labels = data_df[Constants.TARGETS].values
  optimal_weights, _ = grid_search_weights(
      all_model_predictions, 
      true_labels,
      n_steps=100
  )

  # Simple average across all models
  print("\n========== Combining Model Predictions ==========")
  final_predictions = get_ensemble_predictions(all_model_predictions, weights=optimal_weights)

  submission = pd.DataFrame({"eeg_id": data_df["eeg_id"]})
  submission[Constants.TARGETS] = final_predictions

  return submission, all_model_predictions


def run_ensemble_inference_test(weights):
  """Run inference on test data with optional pre-computed weights"""
  assert weights is not None or len(weights) == 0, "Weights must be provided for test inference"

  all_model_predictions = []

  for config in model_configs:
    print(f"\n========== Loading {config['identifier']} ==========")

    model_checkpoints_dir = config["model_checkpoints_dir"]
    model_paths = [os.path.join(model_checkpoints_dir, f'best_model_fold{i}.pth') for i in range(5)]

    dataset = config['dataset_creator'](data_df, mode='test', augmentations=config['config']['augmentations'])
    
    data_loader = DataLoader(
        dataset,
        batch_size=config['config']['batch_size'],
        shuffle=False,
        num_workers=config['config']['num_workers']
    )

    # Get predictions from all folds for this model
    fold_predictions = []
    for i, path in enumerate(model_paths):
      print(f"\n========== Inferencing with Fold {i} Model ==========")
      if not os.path.exists(path):
          print(f"Model file not found: {path}. Skipping this fold.")
          continue
      
      model = config['model']
      model.load_state_dict(torch.load(path))
      model.to(device)
      model.eval()

      current_fold_preds = []
      with torch.no_grad():
          for x in tqdm(data_loader, desc=f"{config['identifier']} Fold {i}"):
              x = x.to(device)

              with torch.autocast(enabled=config["autocast_enabled"], device_type=device.type):
                outputs = model(x)
                probs = F.softmax(outputs, dim=1).cpu().numpy()
                current_fold_preds.append(probs)
            
      fold_predictions.append(np.concatenate(current_fold_preds))

    # Average across folds for this model
    model_avg = np.mean(fold_predictions, axis=0)
    all_model_predictions.append(model_avg)
    print(f"Completed {config['identifier']}: {model_avg.shape}")

  # Calculate weighted mean across all models
  print("\n========== Combining Model Predictions ==========")
  final_predictions = get_ensemble_predictions(all_model_predictions, weights)
    
  submission = pd.DataFrame({"eeg_id": data_df["eeg_id"]})
  submission[Constants.TARGETS] = final_predictions
  submission.to_csv(get_submission_csv_path(), index=False)

  return submission

In [59]:
if DATA_SOURCE == "cv":
  submission, all_model_predictions = run_ensemble_inference_cv()
else:
  submission = run_ensemble_inference_test(WEIGHTS)





multi-spect-cnn Fold 0: 100%|██████████| 64/64 [01:15<00:00,  1.17s/it]


Completed fold 0 for multi-spect-cnn: (4067, 6)



multi-spect-cnn Fold 1: 100%|██████████| 58/58 [01:09<00:00,  1.20s/it]


Completed fold 1 for multi-spect-cnn: (3658, 6)



multi-spect-cnn Fold 2: 100%|██████████| 53/53 [01:03<00:00,  1.20s/it]


Completed fold 2 for multi-spect-cnn: (3381, 6)



multi-spect-cnn Fold 3: 100%|██████████| 42/42 [00:52<00:00,  1.24s/it]


Completed fold 3 for multi-spect-cnn: (2625, 6)



multi-spect-cnn Fold 4: 100%|██████████| 53/53 [01:08<00:00,  1.30s/it]


Completed fold 4 for multi-spect-cnn: (3358, 6)
Completed multi-spect-cnn: (17089, 6)




gru_conv_montage Fold 0: 100%|██████████| 128/128 [00:42<00:00,  3.04it/s]


Completed fold 0 for gru_conv_montage: (4067, 6)



gru_conv_montage Fold 1: 100%|██████████| 115/115 [00:29<00:00,  3.92it/s]


Completed fold 1 for gru_conv_montage: (3658, 6)



gru_conv_montage Fold 2: 100%|██████████| 106/106 [00:28<00:00,  3.68it/s]


Completed fold 2 for gru_conv_montage: (3381, 6)



gru_conv_montage Fold 3: 100%|██████████| 83/83 [00:27<00:00,  3.00it/s]


Completed fold 3 for gru_conv_montage: (2625, 6)



gru_conv_montage Fold 4: 100%|██████████| 105/105 [01:04<00:00,  1.64it/s]


Completed fold 4 for gru_conv_montage: (3358, 6)
Completed gru_conv_montage: (17089, 6)
Optimizing weights for 2 models...
Initial equal weights: [0.5 0.5]
Initial KL score: 0.551284


  result = minimize(
  result = minimize(


  message: CONVERGENCE: NORM OF PROJECTED GRADIENT <= PGTOL
  success: True
   status: 0
      fun: 0.5512837767601013
        x: [ 5.000e-01  5.000e-01]
      nit: 0
      jac: [ 0.000e+00  0.000e+00]
     nfev: 3
     njev: 1
 hess_inv: <2x2 LbfgsInvHessProduct with dtype=float64>
Optimized weights: [0.5 0.5]
Optimized KL score: 0.551284
Improvement: 0.000000



In [60]:
# true_labels = data_df[Constants.TARGETS].values

# # optimal_weights, _ = optimize_ensemble_weights(
# #     all_model_predictions, 
# #     true_labels,
# #     method="SLSQP",
# #     max_iterations=100
# # )

# optimal_weights, _ = grid_search_weights(
#     all_model_predictions, 
#     true_labels,
#     n_steps=100
# )

Grid searching with 100 steps...
New best - Weights: [0.00, 1.00], Score: 0.588282
New best - Weights: [0.01, 0.99], Score: 0.583548
New best - Weights: [0.02, 0.98], Score: 0.579754
New best - Weights: [0.03, 0.97], Score: 0.576476
New best - Weights: [0.04, 0.96], Score: 0.573566
New best - Weights: [0.05, 0.95], Score: 0.570944
New best - Weights: [0.06, 0.94], Score: 0.568559
New best - Weights: [0.07, 0.93], Score: 0.566375
New best - Weights: [0.08, 0.92], Score: 0.564368
New best - Weights: [0.09, 0.91], Score: 0.562516
New best - Weights: [0.10, 0.90], Score: 0.560805
New best - Weights: [0.11, 0.89], Score: 0.559222
New best - Weights: [0.12, 0.88], Score: 0.557756
New best - Weights: [0.13, 0.87], Score: 0.556398
New best - Weights: [0.14, 0.86], Score: 0.555141
New best - Weights: [0.15, 0.85], Score: 0.553979
New best - Weights: [0.16, 0.84], Score: 0.552905
New best - Weights: [0.17, 0.83], Score: 0.551914
New best - Weights: [0.18, 0.82], Score: 0.551004
New best - Weight

In [24]:
# calculate KL divergence score if using cv data
if DATA_SOURCE == "cv":
  true_labels = torch.tensor(data_df[Constants.TARGETS].values, dtype=torch.float32)
  pred_labels = torch.tensor(submission[Constants.TARGETS].values, dtype=torch.float32)
  kl_score = calculate_kl_score(true_labels, pred_labels)
  print(f"KL Divergence Score on CV Data: {kl_score}")

KL Divergence Score on CV Data: 0.5512837767601013
