In [5]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [6]:
import torch
import timm

device = "cuda" if torch.cuda.is_available() else "cpu"

In [7]:
model_path = "/kaggle/input/resnet34d_hmsbrain_5e_512i/pytorch/1/1/resnet34d_5E_512.pth"
model = timm.create_model('resnet34d', pretrained=False, num_classes=6, in_chans=3)
model.load_state_dict(torch.load(model_path))
model.to(device)
model.eval()

ResNet(
  (conv1): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  )
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (drop_block): Identity()
   

In [15]:
def get_data(paths):
    """
    given a list of paths [0-10], the function will collect parquet file for each path/parquet file
    do data preporcessing - na value, clip, log, normalization, resize to meet model input
    convert to tensors
    
    return stacked tensor of all the parquet files passed
    """
    eps = 1e-6 # incase data_std returns 0
    batch_data = []
    for path in paths:
        data = pd.read_parquet(path)
        data = data.fillna(-1).values[:, 1:].T
        data = np.clip(data, np.exp(-6), np.exp(10)) # avioid log(0)
        data = np.log(data)
        
        data_mean = data.mean(axis=(0, 1))
        data_std = data.std(axis=(0, 1))
        data = (data - data_mean) / (data_std + eps)
        
        data_tensor = torch.unsqueeze(torch.Tensor(data), dim=0)
        data = Config.image_transform(data_tensor)
        
        batch_data.append(data)
        
    return torch.stack(batch_data)

In [25]:
def submission_ready(single_path):
    data = get_data([single_path])
    data = torch.cat([data, data, data], dim = 1)
    pred = model(data.to(device))
    return pred

In [27]:
import pandas as pd
import torch.nn.functional as F
import torchvision.transforms as transforms
import warnings
warnings.filterwarnings("ignore")

In [23]:
class Config:
    seed = 42
    image_transform = transforms.Resize((512, 512))
    batch_size = 16
    num_epochs = 5

In [9]:
test_df = pd.read_csv("/kaggle/input/hms-harmful-brain-activity-classification/test.csv")
test_df

Unnamed: 0,spectrogram_id,eeg_id,patient_id
0,853520,3911565283,6885


In [14]:
df = test_df[["spectrogram_id"]].copy()
df["path"] = df["spectrogram_id"].apply(lambda x: "/kaggle/input/hms-harmful-brain-activity-classification/test_spectrograms/" + str(x) + ".parquet")
df

Unnamed: 0,spectrogram_id,path
0,853520,/kaggle/input/hms-harmful-brain-activity-class...


In [12]:
y_features = ["seizure_vote_sum", 
              "lpd_vote_sum", 
              "gpd_vote_sum", 
              "lrda_vote_sum", 
              "grda_vote_sum", 
              "other_vote_sum"]

In [36]:

eeg = []
seizure_vote_sum = []
lpd_vote_sum = []
gpd_vote_sum = []
lrda_vote_sum = []
grda_vote_sum = []
other_vote_sum = []

for index, path in enumerate(df["path"]):
    pred_list = submission_ready(path)
    pred_list = F.softmax(pred_list, dim = 1)
    pred_list = pred_list.tolist()[0]
    eeg_id = test_df.iloc[index]["eeg_id"]
    eeg.append(eeg_id)
    seizure_vote_sum.append(pred_list[0])
    lpd_vote_sum.append(pred_list[1])
    gpd_vote_sum.append(pred_list[2])
    lrda_vote_sum.append(pred_list[3])
    grda_vote_sum.append(pred_list[4])
    other_vote_sum.append(pred_list[5])

In [43]:
data = {
    'eeg_id': eeg,
    'seizure_vote': seizure_vote_sum,
    'lpd_vote': lpd_vote_sum,
    'gpd_vote': gpd_vote_sum,
    'lrda_vote': lrda_vote_sum,
    'grda_vote': grda_vote_sum,
    'other_vote': other_vote_sum
}
sub_df = pd.DataFrame(data)
sub_df.to_csv("submission.csv", index=False)

In [44]:
sub_df

Unnamed: 0,eeg_id,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote
0,3911565283,0.143057,0.151177,0.13985,0.260793,0.126878,0.178246
