# Setup Notebook and Repository

In [1]:
import os
import shutil

# Constants
SRC_FOLDER = "/kaggle/input/repository24"
DST_FOLDER = "/kaggle/working"
DIRS_TO_CREATE = ['/kaggle/working/logs', '/kaggle/working/data/cache', '/kaggle/working/checkpoints/other_models', '/kaggle/working/checkpoints/best_models']

def copy_all_python_files(src, dst):
    for root, _, files in os.walk(src):
        dest_path = os.path.join(dst, os.path.relpath(root, src))
        os.makedirs(dest_path, exist_ok=True)
        print(f"Created/checked directory: {dest_path}")
        for file in filter(lambda f: f.endswith(".py"), files):
            src_file = os.path.join(root, file)
            dst_file = os.path.join(dest_path, file)
            shutil.copyfile(src_file, dst_file)
            print(f"Copied {src_file} to {dst_file}")

def create_directories(dirs):
    for directory in dirs:
        os.makedirs(directory, exist_ok=True)
        print(f"Created/checked directory: {directory}")

# Execution
copy_all_python_files(SRC_FOLDER, DST_FOLDER)
create_directories(DIRS_TO_CREATE)

Created/checked directory: /kaggle/working/.
Copied /kaggle/input/repository24/generics.py to /kaggle/working/./generics.py
Created/checked directory: /kaggle/working/utils
Copied /kaggle/input/repository24/utils/inference_utils.py to /kaggle/working/utils/inference_utils.py
Copied /kaggle/input/repository24/utils/failure_utils.py to /kaggle/working/utils/failure_utils.py
Copied /kaggle/input/repository24/utils/mspca_utils.py to /kaggle/working/utils/mspca_utils.py
Copied /kaggle/input/repository24/utils/loader_utils.py to /kaggle/working/utils/loader_utils.py
Copied /kaggle/input/repository24/utils/ica_utils.py to /kaggle/working/utils/ica_utils.py
Copied /kaggle/input/repository24/utils/grid_search_utils.py to /kaggle/working/utils/grid_search_utils.py
Copied /kaggle/input/repository24/utils/signal_preprocessing_utils.py to /kaggle/working/utils/signal_preprocessing_utils.py
Copied /kaggle/input/repository24/utils/eeg_processing_utils.py to /kaggle/working/utils/eeg_processing_utils.

In [2]:
import sys
best_checkpoints_folder_name = 'efficientnetb0config-big-weight-decay'
class Paths:
    PRE_LOADED_EEGS = "/kaggle/input/brain-eeg-spectrograms/eeg_specs.npy"
    PRE_LOADED_SPECTROGRAMS = "/kaggle/input/brain-spectrograms/specs.npy"
    TRAIN_CSV = (
        "/kaggle/input/hms-harmful-brain-activity-classification/train.csv"
    )
    TEST_CSV = "/kaggle/input/hms-harmful-brain-activity-classification/test.csv"
    TEST_EEGS = (
        "/kaggle/input/hms-harmful-brain-activity-classification/test_eegs/"
    )
    TRAIN_EEGS = (
        "/kaggle/input/hms-harmful-brain-activity-classification/train_eegs/"
    )
    TRAIN_SPECTROGRAMS = "/kaggle/input/hms-harmful-brain-activity-classification/train_spectrograms/"
    TEST_SPECTROGRAMS = "/kaggle/input/hms-harmful-brain-activity-classification/test_spectrograms/"

    OTHER_MODEL_CHECKPOINTS = "/kaggle/working/checkpoints/other_models/"
    BEST_MODEL_CHECKPOINTS = f"/kaggle/input/{best_checkpoints_folder_name}/"
    CACHE_PATH_READ = "/kaggle/working/data/cache/"
    CACHE_PATH_WRITE = "/kaggle/working/data/cache/"
    TENSORBOARD = "/kaggle/working/tensorboard/"
    TENSORBOARD_MODELS = "/kaggle/working/tensorboard/models/"
    TENSORBOARD_TRAINING = "/kaggle/working/tensorboard/training/"
    TENSORBOARD_DATASETS = "/kaggle/working/tensorboard/datasets/"
    TENSORBOARD_INFERENCE = "/kaggle/working/tensorboard/inference/"

    LOG_PATH = "/kaggle/working/logs/"

import generics
generics.Paths = Paths
sys.modules['generics'] = generics


In [3]:
import torch
from generics import Generics
Generics.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Inference

In [4]:
from datasets.data_loader import CustomDataset
from datasets.data_loader_configs import BaseDataConfig
from generics import Paths
from models.CustomModel import CustomModel
from models.custom_model_configs import BaseModelConfig, EfficientNetB0Config_Big, EfficientNetB0Config_Big_Weight_Decay_FROZEN_32, EfficientNetB0Config_Big_Weight_Decay_Only_Custom_spectrograms
from utils.data_preprocessing_utils import filter_by_agreement, filter_by_annotators
from utils.general_utils import get_logger
from utils.grid_search_utils import grid_search
from utils.inference_utils import perform_inference
from utils.training_utils import train
from utils.loader_utils import load_main_dfs
from generics import Generics
import numpy as np
import pandas as pd

2024-03-04 14:33:52.823049: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-04 14:33:52.823186: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-04 14:33:52.953101: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [5]:
def create_submission(test_df, predictions, target_columns, submission_file):
    """
    Creates a submission file from the predictions.

    Args:
        test_df (pd.DataFrame): The test DataFrame containing 'eeg_id'.
        predictions (np.ndarray): The predictions from the models.
        target_columns (list): List of target column names.
        submission_file (str): Path to the submission file.

    Returns:
        pd.DataFrame: The created submission DataFrame.
    """
    # Ensure the number of rows in predictions matches test_df
    assert len(test_df) == len(predictions), "Mismatch in number of predictions and number of test samples"
    predictions = np.around(predictions, decimals=32, out=None)
    predictions = np.float32(predictions)
    predictions = predictions / np.sum(predictions, axis=1)[:, np.newaxis]
    if not np.allclose(np.sum(predictions, axis = 1), np.float32(1.0), atol=1e-32, rtol=1e-32):
        raise TypeError(f'Predictions must sum to one! Predictions: {predictions}')

    # Create a DataFrame for submission
    submission_df = pd.DataFrame(predictions, columns=target_columns, dtype='float32')
    submission_df['eeg_id'] = test_df['eeg_id'].values

    # Reorder the columns to have 'eeg_id' first
    column_order = ['eeg_id'] + target_columns
    submission_df = submission_df[column_order]
    submission_df = submission_df.astype({'eeg_id': int,
                                          'seizure_vote': 'float32',
                                          'lpd_vote': 'float32',
                                          'gpd_vote': 'float32',
                                          'lrda_vote': 'float32',
                                          'grda_vote': 'float32',
                                          'other_vote': 'float32'})
    pd.options.display.float_format = '{:,.32f}'.format
    submission_df.head()
    # Save the submission file
    submission_df.to_csv(submission_file, index=None, float_format='%.32f')

    print(f'Submission shape: {submission_df.shape}')
    return submission_df

In [6]:
class Config_Normalize_Group_Raw_ICA(BaseDataConfig):
    NORMALIZE_EEG_SPECTROGRAMS = True
    NORMALIZE_INDIVIDUALLY = False
    APPLY_ICA_RAW_EEG = True
    USE_PRELOADED_EEG_SPECTROGRAMS = False
    USE_PRELOADED_SPECTROGRAMS = False
    BATCH_SIZE_TEST = 1


class EfficientNetB0Config_Big_Weight_Decay(BaseModelConfig):
    MODEL = "tf_efficientnet_b0"
    FREEZE = False
    EPOCHS = 20
    GRADIENT_ACCUMULATION_STEPS = 1
    WEIGHT_DECAY = 0.01
    MAX_LEARNING_RATE_SCHEDULERER = 0.001
    USE_KAGGLE_SPECTROGRAMS = True

data_loader_config = Config_Normalize_Group_Raw_ICA
model_config = EfficientNetB0Config_Big_Weight_Decay
    
_, _, test_df = load_main_dfs(data_loader_config, train_val_split=(0.8, 0.2))

test_dataset = CustomDataset(config=data_loader_config, main_df = test_df, mode="test", cache=False)

model = CustomModel(model_config, pretrained = False)

modelDir = f"{Paths.BEST_MODEL_CHECKPOINTS}best_{model_config.MODEL}_{model_config.NAME}_{data_loader_config.NAME}.pth"

preds = perform_inference(test_dataset, model, modelDir)

create_submission(test_df, preds, Generics.LABEL_COLS, "submission.csv")

2024-03-04 14:34:09,563 - data_loader.log - INFO - Processing and caching new dataset
2024-03-04 14:34:09,571 - eeg_data_loader.log - INFO - Loading 1 EEGs out of 1 available in dataset


Train non-overlapp eeg_id shape: (17089, 12)


100%|██████████| 1/1 [00:00<00:00,  6.33it/s]
Processing EEG Data: 100%|██████████| 1/1 [00:10<00:00, 10.43s/it]
2024-03-04 14:34:20,176 - spectrogram_loader.log - INFO - Loading 1 spectrograms out of 1 available in dataset
Loading Spectrograms: 100%|██████████| 1/1 [00:00<00:00, 10.09it/s]
2024-03-04 14:34:20,280 - data_loader.log - INFO - Dataset loaded: test mode, 1 samples, with config Config_Normalize_Group_Raw_ICA
2024-03-04 14:34:20,284 - models/models.CustomModel - INFO - Using device: cuda
2024-03-04 14:34:21,142 - models/models.CustomModel - INFO - tf_efficientnet_b0 initialized with config EfficientNetB0Config_Big_Weight_Decay
Inference: 100%|██████████| 1/1 [00:00<00:00,  1.21test_batch/s]

Submission shape: (1, 7)





Unnamed: 0,eeg_id,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote
0,3911565283,0.0143048949539661,0.0102349780499935,0.0072324499487876,0.0160406958311796,0.0309787075966596,0.9212083220481871
