In [1]:

DATA_PREPARATION_VOTE_METHOD = "sum_and_normalize" 

import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm.auto import tqdm
import sys

if bool(os.environ.get("KAGGLE_URL_BASE", "")):
    sys.path.insert(0, "/kaggle/input/hsm-source-files")
else:
    sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), "..", "..", "..")))

from src.utils.utils import get_models_save_path, get_submission_csv_path, running_in_kaggle, get_raw_data_dir
from src.utils.constants import Constants
from src.datasets.eeg_dataset import EEGDataset
from src.models.gru import GRUModel


2025-10-10 16:27:56,661 :: root :: INFO :: Initialising Utils
2025-10-10 16:27:57,143 :: root :: INFO :: Initialising Datasets
2025-10-10 16:27:57,147 :: root :: INFO :: Initialising Models


In [2]:
DATA_PATH = get_raw_data_dir()
class InfCFG:
    model_name = 'GRU'
    hidden_units = 128
    num_layers = 2
    target_size = 6
    num_channels = 20
    
    data_path = DATA_PATH
    n_splits = 5
    
    batch_size = 32
    num_workers = 0
    
    if running_in_kaggle():
        MODEL_DIR = f'/kaggle/input/gru-sum-votes/'
    else:
        MODEL_DIR = get_models_save_path() / "GRUModel" / DATA_PREPARATION_VOTE_METHOD

InfCFG.model_paths = [os.path.join(InfCFG.MODEL_DIR, f'best_model_fold{i}.pth') for i in range(InfCFG.n_splits)]

TARGETS = Constants.TARGETS




In [3]:
DATA_PATH

PosixPath('/home/maiko/Documents/HSLU/AICOMP/HSLU.AICOMP.HMS/data')

In [4]:
def run_inference():
    """Executes the main inference loop."""
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")

    test_df = pd.read_csv(os.path.join(InfCFG.data_path, 'test.csv'))
    
    test_dataset = EEGDataset(df=test_df, data_path=InfCFG.data_path, mode='test')
    test_loader = DataLoader(
        test_dataset, 
        batch_size=InfCFG.batch_size, 
        shuffle=False, 
        num_workers=InfCFG.num_workers
    )

    all_fold_predictions = []

    for i, path in enumerate(InfCFG.model_paths):
        print(f"\n========== Inferencing with Fold {i} Model ==========")
        if not os.path.exists(path):
            print(f"Model file not found: {path}. Skipping this fold.")
            continue
            
        model = GRUModel(
            input_size=InfCFG.num_channels,
            hidden_size=InfCFG.hidden_units,
            num_layers=InfCFG.num_layers,
            num_classes=InfCFG.target_size
        )
        model.load_state_dict(torch.load(path, map_location=device))
        model.to(device)
        model.eval()

        current_fold_preds = []
        with torch.no_grad():
            for signals in tqdm(test_loader, desc=f"Predicting Fold {i}"):
                outputs = model(signals.to(device))
                probs = F.softmax(outputs, dim=1).cpu().numpy()
                current_fold_preds.append(probs)
        
        all_fold_predictions.append(np.concatenate(current_fold_preds))

    if not all_fold_predictions:
        print("No models were found for inference. Aborting.")
        return

    avg_predictions = np.mean(all_fold_predictions, axis=0)
    
    submission = pd.DataFrame({"eeg_id": test_df["eeg_id"]})
    submission[TARGETS] = avg_predictions
    submission.to_csv(get_submission_csv_path(), index=False)
    

    print(submission.head())

if __name__ == '__main__':
    run_inference()

Using device: cuda



Predicting Fold 0:   0%|          | 0/1 [00:00<?, ?it/s]




Predicting Fold 1:   0%|          | 0/1 [00:00<?, ?it/s]




Predicting Fold 2:   0%|          | 0/1 [00:00<?, ?it/s]




Predicting Fold 3:   0%|          | 0/1 [00:00<?, ?it/s]




Predicting Fold 4:   0%|          | 0/1 [00:00<?, ?it/s]

       eeg_id  seizure_vote  lpd_vote  gpd_vote  lrda_vote  grda_vote  \
0  3911565283      0.072843  0.082108  0.027828   0.028282   0.030526   

   other_vote  
0    0.758413  
