In [None]:
import numpy as np, pandas as pd, tensorflow as tf

In [None]:
cols = ['eeg_id', 'spectrogram_id', 'seizure_vote', 'lpd_vote', 'gpd_vote', 'lrda_vote', 'grda_vote', 'other_vote']
label_cols = ['seizure_vote', 'lpd_vote', 'gpd_vote', 'lrda_vote', 'grda_vote', 'other_vote']

train_df = pd.read_csv('/kaggle/input/hms-harmful-brain-activity-classification/train.csv', usecols=cols)
train_df.head()

In [None]:
data = train_df.groupby(['eeg_id', 'spectrogram_id'], as_index=False).sum()
data['total_vote'] = data[label_cols].sum(axis=1)

for label in label_cols:
    data[label] = data[label] / data['total_vote']
    
data['eeg_path'] = data['eeg_id'].apply(lambda x: '/kaggle/input/hms-harmful-brain-activity-classification/train_eegs/'+str(x)+'.parquet')
data['spectrogram_path'] = data['spectrogram_id'].apply(lambda x: '/kaggle/input/hms-harmful-brain-activity-classification/train_spectrograms/'+str(x)+'.parquet')
data.drop(columns=['eeg_id', 'spectrogram_id', 'total_vote'], inplace=True)
data.head()

In [None]:
train_num = int(0.8 * data.shape[0])
train_data, val_data = data.iloc[:train_num], data.iloc[train_num:]

In [None]:
from scipy.signal import butter, lfilter

def butter_lowpass_filter(data, cutoff_freq=20, sampling_rate=200, order=4):
    nyquist = 0.5 * sampling_rate
    normal_cutoff = cutoff_freq / nyquist
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    filtered_data = lfilter(b, a, data, axis=1)
    return filtered_data

In [None]:
class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, tmp_df, batch_size=32, shuffle=False, mode='train'): 

        self.batch_size = batch_size
        self.mode = mode
        
        eeg_data = [] 
        for i, row in tmp_df.iterrows():
            
            FEATS = ['Fp1','T3','C3','O1','Fp2','C4','T4','O2']
            eeg_tmp = pd.read_parquet(row['eeg_path'], columns=FEATS).fillna(0).values.astype('float32')
            eeg_offset = (len(eeg_tmp)-10000) // 2
            eeg_data.append(eeg_tmp[eeg_offset:(eeg_offset+10000):5])
            
        eeg_data = np.array(eeg_data)
        
        self.eeg_data_agg = np.empty(shape=(eeg_data.shape[0], 2000, 8))
        self.eeg_data_agg[:,:,0] = eeg_data[:,:,0] - eeg_data[:,:,1]
        self.eeg_data_agg[:,:,1] = eeg_data[:,:,1] - eeg_data[:,:,3]
        self.eeg_data_agg[:,:,2] = eeg_data[:,:,0] - eeg_data[:,:,2]
        self.eeg_data_agg[:,:,3] = eeg_data[:,:,2] - eeg_data[:,:,3]
        self.eeg_data_agg[:,:,4] = eeg_data[:,:,4] - eeg_data[:,:,5]
        self.eeg_data_agg[:,:,5] = eeg_data[:,:,5] - eeg_data[:,:,7]
        self.eeg_data_agg[:,:,6] = eeg_data[:,:,4] - eeg_data[:,:,6]
        self.eeg_data_agg[:,:,7] = eeg_data[:,:,6] - eeg_data[:,:,7]
        
        del eeg_data

        self.eeg_data_agg = np.clip(self.eeg_data_agg, -1024, 1024) 
        self.eeg_data_agg = np.nan_to_num(self.eeg_data_agg, nan=0) / 32.0
        self.eeg_data_agg = butter_lowpass_filter(self.eeg_data_agg)
     
        if self.mode != 'test':
            self.labels = tmp_df[label_cols].values
       
    def __len__(self):
        return int(np.ceil(len(self.eeg_data_agg) / self.batch_size))

    def __getitem__(self, idx):
        X = self.eeg_data_agg[idx*self.batch_size:(idx+1)*self.batch_size]
        y = np.zeros((self.batch_size,6),dtype='float32')
        if self.mode != 'test':
            y = self.labels[idx*self.batch_size:(idx+1)*self.batch_size]
        return X, y 

In [None]:
from tensorflow.keras.layers import Input, Dense, Multiply, Add, Conv1D, Concatenate

def wave_block(x, filters, kernel_size, n):
    dilation_rates = [2**i for i in range(n)]
    x = Conv1D(filters = filters,
               kernel_size = 1,
               padding = 'same')(x)
    res_x = x
    for dilation_rate in dilation_rates:
        tanh_out = Conv1D(filters = filters,
                          kernel_size = kernel_size,
                          padding = 'same', 
                          activation = 'tanh', 
                          dilation_rate = dilation_rate)(x)
        sigm_out = Conv1D(filters = filters,
                          kernel_size = kernel_size,
                          padding = 'same',
                          activation = 'sigmoid', 
                          dilation_rate = dilation_rate)(x)
        x = Multiply()([tanh_out, sigm_out])
        x = Conv1D(filters = filters,
                   kernel_size = 1,
                   padding = 'same')(x)
        res_x = Add()([res_x, x])
    return res_x

In [None]:
def build_model():

    inp = tf.keras.Input(shape=(2_000,8))

    inp2 = tf.keras.Input(shape=(2_000,1))
    x = wave_block(inp2, 8, 3, 12)
    x = wave_block(x, 16, 3, 8)
    x = wave_block(x, 32, 3, 4)
    x = wave_block(x, 64, 3, 1)
    model2 = tf.keras.Model(inputs=inp2, outputs=x)
    
    x1 = model2(inp[:,:,0:1])
    x1 = tf.keras.layers.GlobalAveragePooling1D()(x1)
    x2 = model2(inp[:,:,1:2])
    x2 = tf.keras.layers.GlobalAveragePooling1D()(x2)
    z1 = tf.keras.layers.Average()([x1,x2])
    
    x1 = model2(inp[:,:,2:3])
    x1 = tf.keras.layers.GlobalAveragePooling1D()(x1)
    x2 = model2(inp[:,:,3:4])
    x2 = tf.keras.layers.GlobalAveragePooling1D()(x2)
    z2 = tf.keras.layers.Average()([x1,x2])
    
    x1 = model2(inp[:,:,4:5])
    x1 = tf.keras.layers.GlobalAveragePooling1D()(x1)
    x2 = model2(inp[:,:,5:6])
    x2 = tf.keras.layers.GlobalAveragePooling1D()(x2)
    z3 = tf.keras.layers.Average()([x1,x2])

    x1 = model2(inp[:,:,6:7])
    x1 = tf.keras.layers.GlobalAveragePooling1D()(x1)
    x2 = model2(inp[:,:,7:8])
    x2 = tf.keras.layers.GlobalAveragePooling1D()(x2)
    z4 = tf.keras.layers.Average()([x1,x2])

    y = tf.keras.layers.Concatenate()([z1,z2,z3,z4])
    y = tf.keras.layers.Dense(64, activation='relu')(y)
    y = tf.keras.layers.Dense(6,activation='softmax', dtype='float32')(y)
    
    model = tf.keras.Model(inputs=inp, outputs=y)
    opt = tf.keras.optimizers.Adam(learning_rate = 1e-3)
    loss = tf.keras.losses.KLDivergence()
    model.compile(loss=loss, optimizer = opt)
    
    return model

In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES']='0,1'
gpus = tf.config.list_physical_devices('GPU')
if len(gpus)<=1: 
    strategy = tf.distribute.OneDeviceStrategy(device='/gpu:0')
else: 
    strategy = tf.distribute.MirroredStrategy()

In [None]:
train_gen = DataGenerator(train_data, shuffle=True, batch_size=32, mode='train') 
val_gen = DataGenerator(val_data, shuffle=False, batch_size=32, mode='val')

with strategy.scope():
    model = build_model() 
    
model.fit(train_gen, verbose=1, validation_data = val_gen, epochs=5)

In [None]:
test_df = pd.read_csv('/kaggle/input/hms-harmful-brain-activity-classification/test.csv')
test_df['eeg_path'] = test_df['eeg_id'].apply(lambda x: '/kaggle/input/hms-harmful-brain-activity-classification/test_eegs/'+str(x)+'.parquet')
test_df['spectrogram_path'] = test_df['spectrogram_id'].apply(lambda x: '/kaggle/input/hms-harmful-brain-activity-classification/test_spectrograms/'+str(x)+'.parquet')
test_df

test_gen = DataGenerator(test_df, shuffle=True, batch_size=32, mode='test')
test_preds = model.predict(test_gen)

In [None]:
submission = pd.DataFrame({'eeg_id':test_df['eeg_id']})
submission[label_cols] = test_preds
submission.to_csv('submission.csv', index=False)
submission