# <b><span style='color:#F1A424'>|</span> HMS: <span style='color:#F1A424'>Harmful Brain Activity Classification</span><span style='color:#ABABAB'> [Inference]</span></b> 

***

**Consider upvoting this notebook if you find it useful 🙌🏼**

- [Train Notebook](https://www.kaggle.com/alejopaullier/hms-efficientnetb0-pytorch-train)


Your goal in this competition is to detect and classify seizures and other types of harmful brain activity. You will develop a model trained on electroencephalography (EEG) signals recorded from critically ill hospital patients.

In this notebook you will learn how to infer with an `efficientnet` model for image classification using PyTorch. Hope you enjoy it and find it useful.

### <b><span style='color:#F1A424'>Table of Contents</span></b> <a class='anchor' id='top'></a>
<div style=" background-color:#3b3745; padding: 13px 13px; border-radius: 8px; color: white">
<li><a href="#import_libraries">Import Libraries</a></li>
<li><a href="#configuration">Configuration</a></li>
<li><a href="#utils">Utils</a></li>
<li><a href="#load_data">Load Data</a></li>
<li><a href="#dataset">Dataset</a></li>
<li><a href="#dataloader">DataLoader</a></li>
<li><a href="#model">Model</a></li>
<li><a href="#inference_function">Inference Function</a></li>
<li><a href="#infer">Infer</a></li>
<li><a href="#submission">Save Submission</a></li>
</div>

# <b><span style='color:#F1A424'>|</span> Import Libraries</b><a class='anchor' id='import_libraries'></a> [↑](#top) 

***

Import all the required libraries for this notebook.

In [1]:
import albumentations as A
import gc
import librosa
import matplotlib.pyplot as plt
import math
import multiprocessing
import numpy as np
import os
import pandas as pd
import pywt
import random
import time
import timm
import torch
import torch.nn as nn


from albumentations.pytorch import ToTensorV2
from glob import glob
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
from typing import Dict, List

os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Using', torch.cuda.device_count(), 'GPU(s)')

Using 0 GPU(s)


# <b><span style='color:#F1A424'>|</span> Configuration</b><a class='anchor' id='configuration'></a> [↑](#top) 

***

In [None]:
class config:
    BATCH_SIZE = 64
    MODEL = "tf_efficientnet_b0"
    NUM_WORKERS = 0 # multiprocessing.cpu_count()
    PRINT_FREQ = 20
    SEED = 20
    VISUALIZE = False
    
    
class paths:
    MODEL_WEIGHTS = "/kaggle/input/hms-multi-class-image-classification-train/tf_efficientnet_b0_epoch_3.pth"
    OUTPUT_DIR = "/kaggle/working/"
    TEST_CSV = "/kaggle/input/hms-harmful-brain-activity-classification/test.csv"
    TEST_EEGS= "/kaggle/input/hms-harmful-brain-activity-classification/test_eegs/"
    TEST_SPECTROGRAMS = "/kaggle/input/hms-harmful-brain-activity-classification/test_spectrograms/"
    
model_weights = [x for x in glob("/kaggle/input/cat-boost/*.cat")]
model_weights


#model_weights = [x for x in glob("/kaggle/input/hms-efficientnetb0-5-folds/*.pth")]


# <b><span style='color:#F1A424'>|</span> Load Data</b><a class='anchor' id='load_data'></a> [↑](#top) 

***

Load the competition's data.

In [None]:
test = pd.read_csv('/kaggle/input/hms-harmful-brain-activity-classification/test.csv')
print('Test shape',test.shape)
test.head()


In [None]:
### Infer Test (https://www.kaggle.com/code/cdeotte/catboost-starter-lb-0-60?scriptVersionId=159895287)
 
import pywt, librosa

USE_WAVELET = None 

NAMES = ['LL','LP','RP','RR']

FEATS = [['Fp1','F7','T3','T5','O1'],
         ['Fp1','F3','C3','P3','O1'],
         ['Fp2','F8','T4','T6','O2'],
         ['Fp2','F4','C4','P4','O2']]

# DENOISE FUNCTION
def maddest(d, axis=None):
    return np.mean(np.absolute(d - np.mean(d, axis)), axis)

def denoise(x, wavelet='haar', level=1):    
    coeff = pywt.wavedec(x, wavelet, mode="per")
    sigma = (1/0.6745) * maddest(coeff[-level])

    uthresh = sigma * np.sqrt(2*np.log(len(x)))
    coeff[1:] = (pywt.threshold(i, value=uthresh, mode='hard') for i in coeff[1:])

    ret=pywt.waverec(coeff, wavelet, mode='per')
    
    return ret

def spectrogram_from_eeg(parquet_path, display=False):
    
    # LOAD MIDDLE 50 SECONDS OF EEG SERIES
    eeg = pd.read_parquet(parquet_path)
    middle = (len(eeg)-10_000)//2
    eeg = eeg.iloc[middle:middle+10_000]
    
    # VARIABLE TO HOLD SPECTROGRAM
    img = np.zeros((128,256,4),dtype='float32')
    
    if display: plt.figure(figsize=(10,7))
    signals = []
    for k in range(4):
        COLS = FEATS[k]
        
        for kk in range(4):
        
            # COMPUTE PAIR DIFFERENCES
            x = eeg[COLS[kk]].values - eeg[COLS[kk+1]].values

            # FILL NANS
            m = np.nanmean(x)
            if np.isnan(x).mean()<1: x = np.nan_to_num(x,nan=m)
            else: x[:] = 0

            # DENOISE
            if USE_WAVELET:
                x = denoise(x, wavelet=USE_WAVELET)
            signals.append(x)
            
            # RAW SPECTROGRAM
            mel_spec = librosa.feature.melspectrogram(y=x, sr=200, hop_length=len(x)//256, 
                  n_fft=1024, n_mels=128, fmin=0, fmax=20, win_length=128)

            # LOG TRANSFORM
            width = (mel_spec.shape[1]//32)*32
            mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max).astype(np.float32)[:,:width]

            # STANDARDIZE TO -1 TO 1
            mel_spec_db = (mel_spec_db+40)/40 
            img[:,:,k] += mel_spec_db
                
        # AVERAGE THE 4 MONTAGE DIFFERENCES
        img[:,:,k] /= 4.0
        
        if display:
            plt.subplot(2,2,k+1)
            plt.imshow(img[:,:,k],aspect='auto',origin='lower')
            plt.title(f'EEG {eeg_id} - Spectrogram {NAMES[k]}')
            
    if display: 
        plt.show()
        plt.figure(figsize=(10,5))
        offset = 0
        for k in range(4):
            if k>0: offset -= signals[3-k].min()
            plt.plot(range(10_000),signals[k]+offset,label=NAMES[3-k])
            offset += signals[3-k].max()
        plt.legend()
        plt.title(f'EEG {eeg_id} Signals')
        plt.show()
        print(); print('#'*25); print()
        
    return img

### <b><span style='color:#F1A424'>Read Spectrograms</span></b>

In [None]:
# CREATE ALL EEG SPECTROGRAMS ()
PATH2 = '/kaggle/input/hms-harmful-brain-activity-classification/test_eegs/'
DISPLAY = 0
EEG_IDS2 = test.eeg_id.unique()
all_eegs2 = {}

print('Converting Test EEG to Spectrograms...'); print()
for i,eeg_id in enumerate(EEG_IDS2):
        
    # CREATE SPECTROGRAM FROM EEG PARQUET
    img = spectrogram_from_eeg(f'{PATH2}{eeg_id}.parquet', i<DISPLAY)
    all_eegs2[eeg_id] = img

## Feature Engineering the test set


In [None]:
# (from https://www.kaggle.com/code/cdeotte/catboost-starter-lb-0-60?scriptVersionId=159895287)
# ENGINEER FEATURES
import warnings
warnings.filterwarnings('ignore')

PATH = '/kaggle/input/hms-harmful-brain-activity-classification/train_spectrograms/'

# FEATURE NAMES
SPEC_COLS = pd.read_parquet(f'{PATH}1000086677.parquet').columns[1:]
FEATURES = [f'{c}_mean_10m' for c in SPEC_COLS]
FEATURES += [f'{c}_min_10m' for c in SPEC_COLS]
FEATURES += [f'{c}_mean_20s' for c in SPEC_COLS]
FEATURES += [f'{c}_min_20s' for c in SPEC_COLS]
FEATURES += [f'eeg_mean_f{x}_10s' for x in range(512)]
FEATURES += [f'eeg_min_f{x}_10s' for x in range(512)]
FEATURES += [f'eeg_max_f{x}_10s' for x in range(512)]
FEATURES += [f'eeg_std_f{x}_10s' for x in range(512)]


In [None]:
# FEATURE ENGINEER TEST - variables slightly adapted (https://www.kaggle.com/code/cdeotte/catboost-starter-lb-0-60?scriptVersionId=159895287)
PATH2 = '/kaggle/input/hms-harmful-brain-activity-classification/test_spectrograms/'
data = np.zeros((len(test),len(FEATURES)))
    
for k in range(len(test)):
    row = test.iloc[k]
    s = int( row.spectrogram_id )
    spec = pd.read_parquet(f'{PATH2}{s}.parquet')
    
    # 10 MINUTE WINDOW FEATURES
    x = np.nanmean( spec.iloc[:,1:].values, axis=0)
    data[k,:400] = x
    x = np.nanmin( spec.iloc[:,1:].values, axis=0)
    data[k,400:800] = x

    # 20 SECOND WINDOW FEATURES
    x = np.nanmean( spec.iloc[145:155,1:].values, axis=0)
    data[k,800:1200] = x
    x = np.nanmin( spec.iloc[145:155,1:].values, axis=0)
    data[k,1200:1600] = x
    
    # RESHAPE EEG SPECTROGRAMS 128x256x4 => 512x256
    eeg_spec = np.zeros((512,256),dtype='float32')
    xx = all_eegs2[row.eeg_id]
    for j in range(4): eeg_spec[128*j:128*(j+1),] = xx[:,:,j]

    # 10 SECOND WINDOW FROM EEG SPECTROGRAMS 
    x = np.nanmean(eeg_spec.T[100:-100,:],axis=0)
    data[k,1600:2112] = x
    x = np.nanmin(eeg_spec.T[100:-100,:],axis=0)
    data[k,2112:2624] = x
    x = np.nanmax(eeg_spec.T[100:-100,:],axis=0)
    data[k,2624:3136] = x
    x = np.nanstd(eeg_spec.T[100:-100,:],axis=0)
    data[k,3136:3648] = x

test[FEATURES] = data
print('New test shape',test.shape)

In [None]:
# ## including possible exceptions in test data --> was not necessary after all 

# # FEATURE ENGINEER TEST (https://www.kaggle.com/code/cdeotte/catboost-starter-lb-0-60?scriptVersionId=159895287)
# PATH2 = '/kaggle/input/hms-harmful-brain-activity-classification/test_spectrograms/'
# data = np.zeros((len(test), len(FEATURES)))

# for k in range(len(test)):
#     try:
#         row = test.iloc[k]
#         s = int(row.spectrogram_id)
#         spec = pd.read_parquet(f'{PATH2}{s}.parquet')

#         # 10 MINUTE WINDOW FEATURES
#         x = np.nanmean(spec.iloc[:, 1:].values, axis=0)
#         data[k, :400] = x
#         x = np.nanmin(spec.iloc[:, 1:].values, axis=0)
#         data[k, 400:800] = x

#         # 20 SECOND WINDOW FEATURES
#         x = np.nanmean(spec.iloc[145:155, 1:].values, axis=0)
#         data[k, 800:1200] = x
#         x = np.nanmin(spec.iloc[145:155, 1:].values, axis=0)
#         data[k, 1200:1600] = x

#         # RESHAPE EEG SPECTROGRAMS 128x256x4 => 512x256
#         eeg_spec = np.zeros((512, 256), dtype='float32')
#         xx = all_eegs2[row.eeg_id]
#         for j in range(4): eeg_spec[128 * j:128 * (j + 1), ] = xx[:, :, j]

#         # 10 SECOND WINDOW FROM EEG SPECTROGRAMS
#         x = np.nanmean(eeg_spec.T[100:-100, :], axis=0)
#         data[k, 1600:2112] = x
#         x = np.nanmin(eeg_spec.T[100:-100, :], axis=0)
#         data[k, 2112:2624] = x
#         x = np.nanmax(eeg_spec.T[100:-100, :], axis=0)
#         data[k, 2624:3136] = x
#         x = np.nanstd(eeg_spec.T[100:-100, :], axis=0)
#         data[k, 3136:3648] = x

#     except Exception as e:
#         # Handle exceptions here (set values to zero, log the error, etc.)
#         print(f"Exception occurred at index {k}: {e}")
#         data[k, :] = 0  # Setting all values to zero in case of an exception

# test[FEATURES] = data
# print('New test shape', test.shape)


# <b><span style='color:#F1A424'>|</span> Infer</b><a class='anchor' id='infer'></a> [↑](#top) 

***

In [None]:
# INFER CATBOOST ON TEST - slightly adapted (https://www.kaggle.com/code/cdeotte/catboost-starter-lb-0-60?scriptVersionId=159895287)
import catboost as cat
from catboost import CatBoostClassifier, Pool

preds = []

for i in range(5):
    print(i,', ',end='')
    model = CatBoostClassifier(task_type='CPU')
    model.load_model(f'/kaggle/input/cat-boost/CAT_v_1_f{i}.cat') 
    #checkpoint = torch.load(model_weight)
    #model.load_state_dict(checkpoint["model"])
    
    test_pool = Pool(
        data = test[FEATURES]
    )
    
    pred = model.predict_proba(test_pool)
    preds.append(pred)
    
pred = np.mean(preds,axis=0)
print()
print('Test preds shape',pred.shape)

# <b><span style='color:#F1A424'>|</span> Save Submission</b><a class='anchor' id='submission'></a> [↑](#top) 

***

In [None]:
TARGETS = ['seizure_vote', 'lpd_vote', 'gpd_vote', 'lrda_vote', 'grda_vote', 'other_vote']
sub = pd.DataFrame({'eeg_id':test.eeg_id.values})
sub[TARGETS] = pred
sub.to_csv('submission.csv',index=False)
print('submission shape',sub.shape)
sub.head()

In [None]:
# SANITY CHECK TO CONFIRM PREDICTIONS SUM TO ONE
sub.iloc[:,-6:].sum(axis=1)