In [None]:
import os, gc, random
import numpy as np
import pandas as pd 
from pathlib import Path
import matplotlib.pyplot as plt
from typing import List, Dict
from tqdm.notebook import tqdm
from time import time, ctime

from sklearn.model_selection import KFold, GroupKFold

import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import v2
from torch.optim.lr_scheduler import OneCycleLR,  CosineAnnealingWarmRestarts
from torch.optim import Adam, AdamW
from torch.cuda.amp import autocast, GradScaler

from scipy.stats import entropy
from scipy.special import rel_entr, softmax

In [None]:
class KagglePaths:
    OUTPUT_DIR = "/kaggle/working/"
    PRE_LOADED_EEGS = '/kaggle/input/brain-eeg-spectrograms/eeg_specs.npy'
    PRE_LOADED_SPECTROGRAMS = '/kaggle/input/brain-spectrograms/specs.npy'
    TRAIN_CSV = "/kaggle/input/hms-harmful-brain-activity-classification/train.csv"
    TRAIN_EEGS = "/kaggle/input/hms-harmful-brain-activity-classification/train_eegs/"
    TRAIN_SPECTROGRAMS = "/kaggle/input/hms-harmful-brain-activity-classification/train_spectrograms/"
    TEST_CSV = "/kaggle/input/hms-harmful-brain-activity-classification/test.csv"
    TEST_SPECTROGRAMS = "/kaggle/input/hms-harmful-brain-activity-classification/test_spectrograms/"
    TEST_EEGS = "/kaggle/input/hms-harmful-brain-activity-classification/test_eegs/"

class LocalPaths:
    OUTPUT_DIR = "./outputs/"
    PRE_LOADED_EEGS = './inputs/brain-eeg-spectrograms/eeg_specs.npy'
    PRE_LOADED_SPECTROGRAMS = './inputs/brain-spectrograms/specs.npy'
    TRAIN_CSV = "./inputs/hms-harmful-brain-activity-classification/train.csv"
    TRAIN_EEGS = "./inputs/hms-harmful-brain-activity-classification/train_eegs"
    TRAIN_SPECTROGRAMS = "./inputs/hms-harmful-brain-activity-classification/train_spectrograms"
    TEST_CSV = "./inputs/hms-harmful-brain-activity-classification/test.csv"
    TEST_SPECTROGRAMS = "./inputs/hms-harmful-brain-activity-classification/test_spectrograms"
    TEST_EEGS = "./inputs/hms-harmful-brain-activity-classification/test_eegs"

PATHS = KagglePaths if os.path.exists("/kaggle") else LocalPaths

print("Output Dir: ", PATHS.OUTPUT_DIR)

EEG_FEAT_ALL = [
    'Fp1', 'F3', 'C3', 'P3', 
    'F7', 'T3', 'T5', 'O1', 
    'Fz', 'Cz', 'Pz', 'Fp2', 
    'F4', 'C4', 'P4', 'F8', 
    'T4', 'T6', 'O2', 'EKG'
    ]

EEG_FEAT_USE =  ['Fp1','T3','C3','O1','Fp2','C4','T4','O2']
EEG_FEAT_INDEX = {x:y for x,y in zip(EEG_FEAT_USE, range(len(EEG_FEAT_USE)))}

BRAIN_ACTIVITY = ['seizure', 'lpd', 'gpd', 'lrda', 'grda', 'other']
TARGETS = [f"{lb}_vote" for lb in BRAIN_ACTIVITY]
TARGETS_PRED = [f"{lb}_pred" for lb in BRAIN_ACTIVITY]

print(EEG_FEAT_INDEX)

In [None]:
def get_logger(log_dir, logger_name="train_model.log"):
    from logging import getLogger, INFO, StreamHandler, FileHandler, Formatter
    logger_file = os.path.join(log_dir, logger_name)
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=logger_file, mode="a+")
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger


def seed_everything(seed: int):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)

In [None]:
class ModelConfig:
    SEED = 20
    SPLIT_ENTROPY = 5.5
    MODEL_NAME = "ResnetGRU_v2_Vstack_Filter_05_20_20_60_30_Normalized"
    MODEL_BACKBONE = "reset_gru"
    BATCH_SIZE = 32
    EPOCHS = 20
    EARLY_STOP_ROUNDS = 5
    GRADIENT_ACCUMULATION_STEPS = 1
    DROP_RATE = 0.15 # default: 0.1
    DROP_PATH_RATE = 0.25 # default: 0.2
    WEIGHT_DECAY = 0.01
    AMP = True
    PRINT_FREQ = 100
    NUM_WORKERS = 0 
    MAX_GRAD_NORM = 1e7
    REGULARIZATION = 0.15
    RESNET_GRU_BANDPASS = (0.5, 20) #(0.5, 20)
    RESNET_GRU_LOWPASS = 20
    RESNET_GRU_NOTCH = (60, 30) # (Freq, Quality)
    RESNET_GRU_IN_CHANNELS = 8
    RESNET_GRU_KERNELS = [3, 5, 7, 9, 11]
    RESNET_GRU_FIXED_KERNEL_SIZE = 5
    RESNET_GRU_DOWNSAMPLE = 5
    RESNET_GRU_HIDDEN_SIZE = 304 
    RESNET_GRU_DILATED = False
    

In [None]:
seed_everything(ModelConfig.SEED)
logger = get_logger(PATHS.OUTPUT_DIR, f"{ModelConfig.MODEL_NAME}_train.log")

# Load Data

In [None]:
def eeg_from_parquet(parquet_path: str, use_feature=EEG_FEAT_USE, display: bool = False) -> np.ndarray:
    # === Extract full length EEG Sequence ===
    # fill missing values with mean
    # first fill missing values with mean of each column
    # then if all values are missing, fill with 0
    eeg = pd.read_parquet(parquet_path, columns=use_feature)
    eeg = eeg.fillna(eeg.mean(skipna=True)).fillna(0)
    data = eeg.values.astype(np.float32)
    
    # rows = len(eeg)
    # offset = (rows - 10_000) // 2 # 50 * 200 = 10_000
    # data = data[offset:offset+10_000, :]

    if display:
        fig, ax = plt.subplots(len(use_feature), 1, figsize=(10, 2*len(use_feature)), sharex=True)
        
        for i, feat in enumerate(use_feature):
            ax[i].plot(data[:, i], label=feat)
            ax[i].legend()
            ax[i].grid()
       
        name = parquet_path.split('/')[-1].split('.')[0]
        ax[0].set_title(f'EEG {name}',size=16)
        fig.tight_layout()
        plt.show()    
    return data

In [None]:
%%time
CREATE_EEGS = True
ALL_EEG_SIGNALS = {}
eeg_paths = list(Path(PATHS.TRAIN_EEGS).glob('*.parquet'))
preload_eegs_path = Path('./inputs/eegs_full.npy')

if CREATE_EEGS:
    count = 0
    for parquet_path in tqdm(eeg_paths, total=len(eeg_paths)):
        eeg_id = int(parquet_path.stem)
        eeg_path = str(parquet_path)
        data = eeg_from_parquet(eeg_path, display=False)
        ALL_EEG_SIGNALS[eeg_id] = data
        count += 1
    np.save("./inputs/eegs_full.npy", ALL_EEG_SIGNALS)
else:
    ALL_EEG_SIGNALS = np.load(preload_eegs_path, allow_pickle=True).item()

In [None]:
def gen_non_overlap_samples(df_csv, targets):
    # Reference Discussion:
    # https://www.kaggle.com/competitions/hms-harmful-brain-activity-classification/discussion/467021

    tgt_list = targets.tolist()
    brain_activity = ['seizure', 'lpd', 'gpd', 'lrda', 'grda', 'other']

    agg_dict = {
        'spectrogram_id': 'first',
        'spectrogram_label_offset_seconds': ['min', 'max'],
        'patient_id': 'first',
        'expert_consensus': 'first'
    }

    groupby = df_csv.groupby(['eeg_id'] + tgt_list)
    train = groupby.agg(agg_dict)
    train = train.reset_index()
    train.columns = ['_'.join(col).strip() for col in train.columns.values]
    train.columns = ["eeg_id"] + tgt_list + ['spectrogram_id', 'min', 'max', 'patient_id', 'target']

    # df_eeg_offsets = groupby.agg({'eeg_label_offset_seconds': lambda x: x.tolist()}).reset_index()
    # df_eeg_offsets.columns = ["eeg_id"] + tgt_list + ['eeg_offset_list']
    # df_eeg_offsets['n_segments'] = df_eeg_offsets['eeg_offset_list'].apply(len)

    # train = train.merge(df_eeg_offsets, on=['eeg_id']+tgt_list, how='left')
    
    train['total_votes'] = train[tgt_list].sum(axis=1)
    train[tgt_list] = train[tgt_list].div(train['total_votes'], axis=0)
    
    return train

In [None]:
# Original Split 

train_csv = pd.read_csv(PATHS.TRAIN_CSV)
targets = train_csv.columns[-6:]

print("targets: ", targets.to_list())

train_csv['total_votes'] = train_csv[targets].sum(axis=1)
train_csv[targets] = train_csv[targets].astype('float32')

targets_prob = [f"{t.split('_')[0]}_prob" for t in targets]
train_csv[targets_prob] = train_csv[targets].div(train_csv['total_votes'], axis=0)

hard_csv = train_csv[train_csv['total_votes'] >= 6].copy().reset_index(drop=True)

train_all = gen_non_overlap_samples(train_csv, targets)
train_hard = gen_non_overlap_samples(hard_csv, targets)

# def get_offset_seconds(x):
#     tmp = x['eeg_offset_list']
#     if x['target'] == 'Other':
#         return [tmp[len(tmp)//2]]
#     else:
#         rand_idx = np.random.randint(0, len(tmp), len(tmp)//2)
#         return [tmp[idx] for idx in set(rand_idx)]
    
# train_all['offset_seconds'] = train_all[['target', 'eeg_offset_list']].apply(get_offset_seconds, axis=1)
# train_hard['offset_seconds'] = train_hard[['target', 'eeg_offset_list']].apply(get_offset_seconds, axis=1)

print("train_all.shape = ", train_all.shape)
print("train_all nan_count: ", train_all.isnull().sum().sum())
display(train_all.head())

print(" ")

print("train_hard.shape = ", train_hard.shape)
print("train_hard nan_count: ", train_hard.isnull().sum().sum())
display(train_hard.head())

In [None]:
train_all[train_all['offset_seconds'].apply(len) == 0] 

# Dataset

In [None]:
from scipy.signal import butter, lfilter, iirnotch 
from scipy.fft import fft, fftfreq

def plot_fft(ax, data, title, fs=200):
    n = len(data)
    yf = fft(data)
    xf = fftfreq(n, 1/fs)
    ax.plot(xf[:n//2], 2.0/n * np.abs(yf[0:n//2]))
    ax.set_title(title)
    ax.grid()

def bandpass_filter(data, lowcut=0.5, highcut=20, fs=200, order=2):
    low = 2 * lowcut / fs 
    high = 2 * highcut / fs
    b, a = butter(order, [low, high], btype='band')
    y = lfilter(b, a, data, axis=0)
    return y

def butter_lowpass_filter(data, cutoff_freq=20, sampling_rate=200, order=4):
    nyquist = 0.5 * sampling_rate
    normal_cutoff = cutoff_freq / nyquist
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    filtered_data = lfilter(b, a, data, axis=0)
    return filtered_data

def notch_filter(data, fs=200, f0=60, Q=30):
    b, a = iirnotch(f0, Q, fs)
    y = lfilter(b, a, data, axis=0)
    return y

def filter_eeg(eeg_seq, bandpass=(0.5, 20), lowpass=20, notch=(60, 30), fs=200):

    if bandpass is not None:
        eeg_seq = bandpass_filter(eeg_seq, lowcut=bandpass[0], highcut=bandpass[1], fs=fs, order=2)
    if lowpass is not None:
        eeg_seq = butter_lowpass_filter(eeg_seq, cutoff_freq=lowpass, sampling_rate=fs, order=4)
    if notch is not None:
        eeg_seq = notch_filter(eeg_seq, fs=fs, f0=notch[0], Q=notch[1])

    return eeg_seq

In [None]:
class EEGSeqDataset(Dataset):
    def __init__(self, df, config, eegs, mode='train', verbose=False):
        self.df = df
        self.mode = mode
        self.eegs = eegs
        self.verbose = verbose
        self.downsample = config.RESNET_GRU_DOWNSAMPLE
        self.bandpass = config.RESNET_GRU_BANDPASS
        self.lowpass = config.RESNET_GRU_LOWPASS
        self.notch = config.RESNET_GRU_NOTCH
    
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        
        X, y_prob = self.__data_generation(idx)
        
        if self.downsample is not None:
            X = X[::self.downsample, :]
        
        return torch.tensor(X, dtype=torch.float32), torch.tensor(y_prob, dtype=torch.float32)
    
    def __data_generation(self, index):
        row = self.df.iloc[index]
        
        if self.verbose:
            print(f"Row {index}", row[['eeg_id', 'target']].tolist())

        X = np.zeros((10_000, 8), dtype='float32')

        # if self.mode == 'train':
        #     offset_seconds = row['offset_seconds']
        #     if len(offset_seconds) == 0:
        #         offset_sec = 0
        #     else:
        #         offset_sec = int(random.choice(offset_seconds))
        # else:
        #     offset_sec = 0
        # data = self.eegs[row.eeg_id][offset_sec*200:offset_sec*200+10_000, :]
        
        data = self.eegs[row.eeg_id][:, :]

        # === Feature engineering ===
        X[:,0] = data[:,EEG_FEAT_INDEX['Fp1']] - data[:,EEG_FEAT_INDEX['T3']]
        X[:,1] = data[:,EEG_FEAT_INDEX['T3']] - data[:,EEG_FEAT_INDEX['O1']]

        X[:,2] = data[:,EEG_FEAT_INDEX['Fp1']] - data[:,EEG_FEAT_INDEX['C3']]
        X[:,3] = data[:,EEG_FEAT_INDEX['C3']] - data[:,EEG_FEAT_INDEX['O1']]

        X[:,4] = data[:,EEG_FEAT_INDEX['Fp2']] - data[:,EEG_FEAT_INDEX['C4']]
        X[:,5] = data[:,EEG_FEAT_INDEX['C4']] - data[:,EEG_FEAT_INDEX['O2']]

        X[:,6] = data[:,EEG_FEAT_INDEX['Fp2']] - data[:,EEG_FEAT_INDEX['T4']]
        X[:,7] = data[:,EEG_FEAT_INDEX['T4']] - data[:,EEG_FEAT_INDEX['O2']]

        # === Standarize ===
        X = np.clip(X, -1024, 1024)
        X = np.nan_to_num(X, nan=0) / 32.0

        # === Butter Low-pass Filter ===
        # ??? change to bandpass filter (low=0.5, hight=20, order=2) ???
        X = filter_eeg(X, bandpass=self.bandpass, lowpass=self.lowpass, notch=self.notch, fs=200)
        
        if self.mode != 'test':
            y_prob = row[TARGETS].values.astype(np.float32)
        else:
            y_prob = np.zeros(6, dtype='float32')

        return X, y_prob 

In [None]:
# row = train_all.iloc[132]
# print(row[['eeg_id', 'min', 'max', 'target']].tolist())

# eeg_seq = ALL_EEG_SIGNALS[row.eeg_id]#[:, EEG_FEAT_INDEX['Fp1']]
# print("eeg_seq.shape: ", eeg_seq.shape)

# diff_raw = eeg_seq[:, EEG_FEAT_INDEX['Fp1']] - eeg_seq[:, EEG_FEAT_INDEX['T3']]
# diff_filt = filter_eeg(diff_raw, bandpass=(0.25, 40), lowpass=None, notch=None, fs=200)
# diff_low = filter_eeg(diff_raw, bandpass=(0.25, 40), lowpass=40, notch=None, fs=200)
# diff_notch = filter_eeg(diff_raw, bandpass=(0.25, 40), lowpass=None, notch=(60, 30), fs=200)

# fig, axes = plt.subplots(1, 4, figsize=(12, 3))
# titles = ['Raw', 'Bandpass', 'Lowpass', 'Notch']
# for i, data in enumerate([diff_raw, diff_filt, diff_low, diff_notch]):
#     plot_fft(axes[i], data, titles[i])
#     axes[i].grid(True)
# fig.tight_layout()
# plt.show()


# fig, ax = plt.subplots(1, 1, figsize=(12, 3))
# x_slice = np.arange(2000, 4000, 5)
# ax.plot(diff_raw[x_slice], label='Raw')
# ax.plot(diff_filt[x_slice], label='Bandpass')
# ax.plot(diff_low[x_slice], label='Lowpass')
# ax.plot(diff_notch[x_slice], label='Notch')
# ax.legend()
# fig.tight_layout()
# plt.show()


In [None]:
# visualize the dataset
train_dataset = EEGSeqDataset(train_all, ModelConfig, ALL_EEG_SIGNALS, mode="train", verbose=False)
train_loader = DataLoader(train_dataset, drop_last=True, batch_size=16, num_workers=4, pin_memory=True, shuffle=False)

for batch in train_loader:
    X, y = batch
    print(f"X shape: {X.shape}")
    print(f"y shape: {y.shape}")
    
    fig, axes = plt.subplots(4, 1, figsize=(20, 20))
    ax_idx = 0
    for item in range(4): #np.random.choice(range(X.shape[0]), 4):
        offset = 0
        for col in range(X.shape[-1]):
            if col != 0:
                offset -= X[item,:,col].min()
            axes[ax_idx].plot(np.arange(X.shape[1]), X[item,:,col]+offset, label=f'feature {col+1}')
            offset += X[item,:,col].max()
        print(y[item])
        # axes[ax_idx].set_title(f'Weight = {weights[item]}',size=14)
        axes[ax_idx].legend()
        ax_idx += 1
    fig.tight_layout()
    plt.show()
    break

del train_dataset, train_loader
torch.cuda.empty_cache()
gc.collect()

# Model

In [None]:
class ResNet_1D_Block(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, downsampling, dropout=0.0, dilation=1):
        super(ResNet_1D_Block, self).__init__()
        self.block = nn.Sequential(
            nn.BatchNorm1d(num_features=in_channels),
            nn.Hardswish(), #nn.ReLU(),
            nn.Dropout(p=dropout),
            nn.Conv1d(in_channels, out_channels, kernel_size, stride, padding, dilation=dilation, bias=False),
            nn.BatchNorm1d(num_features=out_channels),
            nn.Hardswish(), #nn.ReLU(),
            nn.Dropout(p=dropout),
            nn.Conv1d(out_channels, out_channels, kernel_size, stride, padding, dilation=dilation, bias=False),
        )
        
        self.downsampling = downsampling

    def forward(self, x):
        identity = x
        out = self.block(x)
        
        if self.downsampling is not None:
            identity = self.downsampling(identity)
            out = self.downsampling(out)
            
        out += identity
        return out

class SelfAttentionPooling(nn.Module):
    """
    Implementation of SelfAttentionPooling 
    Original Paper: Self-Attention Encoding and Pooling for Speaker Recognition
    https://arxiv.org/pdf/2008.01077v1.pdf
    """
    def __init__(self, input_dim):
        super(SelfAttentionPooling, self).__init__()
        self.W = nn.Linear(input_dim, 1)
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, batch_rep):
        """
        input:
            batch_rep : size (N, T, H), N: batch size, T: sequence length, H: Hidden dimension
        attention_weight:
            att_w : size (N, T, 1)
        return:
            utter_rep: size (N, H)
        """
        att_w = self.softmax(self.W(batch_rep).squeeze(-1)).unsqueeze(-1)
        utter_rep = torch.sum(batch_rep * att_w, dim=1)

        return utter_rep

class ResNetGRU(nn.Module):
    def __init__(self, config=ModelConfig, num_classes=6):
        super(ResNetGRU, self).__init__()

        self.planes = 24
        self.kernels = config.RESNET_GRU_KERNELS
        self.in_channels = config.RESNET_GRU_IN_CHANNELS
        self.use_dilation = config.RESNET_GRU_DILATED

        self.conv_in_size = self.planes * len(self.kernels)

        fixed_kernel_size = config.RESNET_GRU_FIXED_KERNEL_SIZE
        rnn_hidden_size = 128
        hidden_size = self.conv_in_size + rnn_hidden_size*2 #config.RESNET_GRU_HIDDEN_SIZE

        # Define the separate convolutional layers
        self.parallel_conv = self._make_parallel_conv_layers()
        # Define the ResNet part of the model
        self.resnet_part = self._make_resnet_part(fixed_kernel_size, n_blocks=9)
        # Define the GRU part of the model
        self.rnn = nn.GRU(input_size=self.in_channels, hidden_size=rnn_hidden_size, num_layers=1, bidirectional=True)
        
        # Define the pooling layers
        self.pooling_resnet = SelfAttentionPooling(self.conv_in_size)
        self.pooling_rnn = SelfAttentionPooling(rnn_hidden_size * 2)
        
        # Define the final fully connected layer
        self.fc = nn.Linear(in_features=hidden_size, out_features=num_classes)

    def _make_parallel_conv_layers(self):
        
        return nn.ModuleList([
            nn.Conv1d(
                in_channels=self.in_channels, 
                out_channels=self.planes, 
                kernel_size=kernel_size,
                stride=1, 
                padding=kernel_size//2, 
                bias=False
            ) for kernel_size in self.kernels
        ])

    def _make_resnet_part(self, fixed_kernel_size, n_blocks=9):
        # prepare resnet layers
        if self.use_dilation:
            dilation_rates = [1, 2, 2, 2, 2, 4, 4, 4, 4] #[1] * n_blocks
        else:
            dilation_rates = [1] * n_blocks
        paddings = [fixed_kernel_size//2 * rate for rate in dilation_rates]
        
        ds_layer = nn.MaxPool1d(kernel_size=2, stride=2, padding=0)
        ds_layers = [ds_layer if i % 2 == 0 else None for i in range(n_blocks-1)] + [None]
        
        resnet_layers = [
            ResNet_1D_Block(
                in_channels=self.conv_in_size,
                out_channels=self.conv_in_size, 
                kernel_size=fixed_kernel_size, 
                stride=1, 
                padding=paddings[i], 
                downsampling=ds_layers[i],
                dropout=0.0,
                dilation=dilation_rates[i])
            for i in range(n_blocks)
        ]
        
        # return the resnet encoder
        return nn.Sequential(
            nn.BatchNorm1d(num_features=self.conv_in_size),
            nn.SiLU(), #nn.ReLU(inplace=False),
            nn.Conv1d( # -> downsample by stride=2
                in_channels=self.conv_in_size, 
                out_channels=self.conv_in_size,
                kernel_size=fixed_kernel_size, 
                stride=1, 
                padding=fixed_kernel_size//2, 
                bias=False
            ), 
            *resnet_layers,
            nn.BatchNorm1d(num_features=self.conv_in_size),
            nn.SiLU(), #nn.ReLU(inplace=False),
            nn.AvgPool1d(kernel_size=5, stride=5, padding=0)
        )
    
    def forward(self, x):
        
        x = x.permute(0, 2, 1)

        cov_sep = [conv(x) for conv in self.parallel_conv]
        cov_out = torch.cat(cov_sep, dim=1) # Vstack -> (N, sum(C_i), L)
        
        # - pass through the resnet part
        resnet_out = self.resnet_part(cov_out)
        out_1 = self.pooling_resnet(resnet_out.permute(0, 2, 1))
        
        # - extract features using rnn
        rnn_out, _ = self.rnn(x.permute(0, 2, 1))
        out_2 = self.pooling_rnn(rnn_out)

        # - concat features and pass to FC
        feat = torch.cat([out_1, out_2], dim=1) 
        result = self.fc(feat)  
        
        return result


In [None]:
train_dataset = EEGSeqDataset(train_all, ModelConfig, ALL_EEG_SIGNALS, mode="train")
train_loader = DataLoader(train_dataset, drop_last=True, batch_size=4, num_workers=4, pin_memory=True, shuffle=False)

model = ResNetGRU(config=ModelConfig, num_classes=6)

for i, batch in enumerate(train_loader):
    X, y = batch

    print(f"X shape: {X.shape}")
    print(f"y shape: {y.shape}")
    
    y_pred = model(X)
    print(y_pred.shape)
    break 

del model, train_dataset, train_loader, X, y
gc.collect()

In [None]:
!nvidia-smi

# Train

In [None]:
import warnings
warnings.filterwarnings("ignore")

class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
        
class Trainer:

    def __init__(self, model, config, logger):

        self.model = model
        self.logger = logger
        self.config = config
        
        self.early_stop_rounds = config.EARLY_STOP_ROUNDS
        self.early_stop_counter = 0
        
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.kl_div_loss = nn.KLDivLoss(reduction='batchmean')
        self.ce_loss = nn.CrossEntropyLoss()
        self.gamma = config.REGULARIZATION
        
        # self.criterion = nn.KLDivLoss(reduction="batchmean")
    
    def criterion(self, y_pred, y_true, weights=None, mode='train'):
        kl_loss = self.kl_div_loss(F.log_softmax(y_pred, dim=1), y_true)
        if (self.gamma is not None) & (mode == 'train'):
            softmax_probs = F.softmax(y_pred, dim=1)  # Compute softmax probabilities
            entropy_loss = -(softmax_probs * torch.log(softmax_probs + 1e-9)).sum(dim=1).mean(dim=0) # Compute entropy, add epsilon to avoid log(0)
            return kl_loss - self.gamma * entropy_loss
        else:
            return kl_loss
        
    def train(self, train_loader, valid_loader, from_checkpoint=None):

        self.optimizer = AdamW(self.model.parameters(), lr=8e-3, weight_decay=self.config.WEIGHT_DECAY)

        # CosineAnnealingWarmRestarts( 
        #     self.optimizer,
        #     T_0=20,
        #     eta_min=1e-6,
        #     T_mult=1,
        #     last_epoch=-1
        # )
        self.scheduler =  OneCycleLR(
            self.optimizer,
            max_lr=1e-4,
            epochs=self.config.EPOCHS,
            steps_per_epoch=len(train_loader),
            pct_start=0.1,
            anneal_strategy="cos",
            final_div_factor=100,
        )

        if from_checkpoint is not None:
            self.model.load_state_dict(torch.load(from_checkpoint, map_location=self.device))

        self.model.to(self.device)
        best_weights, best_preds, best_loss = None, None, float("inf")
        loss_records = {"train": [], "valid": []}

        for epoch in range(self.config.EPOCHS):
            start_epoch = time()

            train_loss, _ = self._train_or_valid_epoch(epoch, train_loader, is_train=True)
            valid_loss, valid_preds = self._train_or_valid_epoch(epoch, valid_loader, is_train=False)

            loss_records["train"].append(train_loss)
            loss_records["valid"].append(valid_loss)

            elapsed = time() - start_epoch

            info = f"{'-' * 100}\nEpoch {epoch + 1} - "
            info += f"Average Loss: (train) {train_loss:.4f}; (valid) {valid_loss:.4f} | Time: {elapsed:.2f}s"
            self.logger.info(info)

            if valid_loss < best_loss:
                best_loss = valid_loss
                best_weights = self.model.state_dict()
                best_preds = valid_preds
                self.logger.info(f"Best model found in epoch {epoch + 1} | valid loss: {best_loss:.4f}")
                self.early_stop_counter = 0
            
            else:
                self.early_stop_counter += 1
                if self.early_stop_counter >= self.early_stop_rounds:
                    self.logger.info(f"Early stopping at epoch {epoch + 1}")
                    break

        return best_weights, best_preds, loss_records

    def _train_or_valid_epoch(self, epoch_id, dataloader, is_train=True):

        self.model.train() if is_train else self.model.eval()
        mode = "Train" if is_train else "Valid"

        len_loader = len(dataloader)
        scaler = GradScaler(enabled=self.config.AMP)
        loss_meter, predicts_record = AverageMeter(), []

        start = time()
        pbar = tqdm(dataloader, total=len(dataloader), unit="batch", desc=f"{mode} [{epoch_id}]")
        for step, (X, y) in enumerate(pbar):
            X, y = X.to(self.device), y.to(self.device)

            if is_train:
                with autocast(enabled=self.config.AMP):
                    y_pred = self.model(X)
                    loss = self.criterion(y_pred, y)
                if self.config.GRADIENT_ACCUMULATION_STEPS > 1:
                    loss = loss / self.config.GRADIENT_ACCUMULATION_STEPS
                scaler.scale(loss).backward()
                grad_norm = torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.config.MAX_GRAD_NORM)
                if (step + 1) % self.config.GRADIENT_ACCUMULATION_STEPS == 0:
                    scaler.step(self.optimizer)
                    scaler.update()
                    self.optimizer.zero_grad()
                    self.scheduler.step()
            else:
                with torch.no_grad():
                    y_pred = self.model(X)
                    loss = self.criterion(y_pred, y, mode='valid')
                if self.config.GRADIENT_ACCUMULATION_STEPS > 1:
                    loss = loss / self.config.GRADIENT_ACCUMULATION_STEPS
                
                predicts_record.append(y_pred.to('cpu').numpy())
            
            loss_meter.update(loss.item(), y.size(0))
            end = time()

            if (step % self.config.PRINT_FREQ == 0) or (step == (len_loader - 1)):
                lr = self.scheduler.get_last_lr()[0]
                info = f"Epoch {epoch_id + 1} [{step}/{len_loader}] | {mode} Loss: {loss_meter.avg:.4f}"
                if is_train:
                    info += f" Grad: {grad_norm:.4f} LR: {lr:.4e}"
                info += f" | Elapse: {end - start:.2f}s"
                print(info)

        if not is_train:
            predicts_record = np.concatenate(predicts_record)
            
        return loss_meter.avg, predicts_record


In [None]:
def train_fold(model, fold_id, train_folds, valid_folds, logger, stage=1, checkpoint=None):

    train_dataset = EEGSeqDataset(train_folds, ModelConfig, ALL_EEG_SIGNALS, mode="train")
    valid_dataset = EEGSeqDataset(valid_folds, ModelConfig, ALL_EEG_SIGNALS, mode="valid")

    # ======== DATALOADERS ==========
    loader_kwargs = {
        "batch_size": ModelConfig.BATCH_SIZE,
        "num_workers": ModelConfig.NUM_WORKERS,
        "pin_memory": True,
        "shuffle": False,
    }

    train_loader = DataLoader(train_dataset, drop_last=True, collate_fn=None, **loader_kwargs)
    valid_loader = DataLoader(valid_dataset, drop_last=False, collate_fn=None, **loader_kwargs)

    if checkpoint is not None:
        print(f"Loading model from checkpoint: {checkpoint}")

    trainer = Trainer(model, ModelConfig, logger)
    best_weights, best_preds, loss_records = trainer.train(
        train_loader, valid_loader, from_checkpoint=checkpoint)

    save_model_name = f"{ModelConfig.MODEL_NAME}_fold_{fold_id}_stage_{stage}.pth"
    torch.save(best_weights, os.path.join(PATHS.OUTPUT_DIR, save_model_name))

    del train_dataset, valid_dataset, train_loader, valid_loader
    torch.cuda.empty_cache()
    gc.collect()

    return best_preds, loss_records

def evaluate_oof(oof_df):
    '''
    Evaluate the out-of-fold dataframe using KL Divergence (torch and kaggle)
    '''
    kl_loss = nn.KLDivLoss(reduction="batchmean")
    labels = torch.tensor(oof_df[TARGETS].values.astype('float32'))
    preds = F.log_softmax(
        torch.tensor(oof_df[TARGETS_PRED].values.astype('float32'), requires_grad=False),
        dim=1
    )
    kl_torch = kl_loss(preds, labels).item()

    return kl_torch

def prepare_k_fold(df, k_folds=5):

    kf = KFold(n_splits=k_folds, shuffle=True, random_state=ModelConfig.SEED)
    unique_spec_id = df['spectrogram_id'].unique()
    df['fold'] = k_folds

    for fold, (train_index, valid_index) in enumerate(kf.split(unique_spec_id)):
        df.loc[df['spectrogram_id'].isin(unique_spec_id[valid_index]), 'fold'] = fold

    return df

In [22]:
from kl_divergence import score as kaggle_score 
from sklearn.metrics import confusion_matrix
import seaborn as sns

TARGET2ID = {'Seizure': 0, 'LPD': 1, 'GPD': 2, 'LRDA': 3, 'GRDA': 4, 'Other': 5}

def calc_kaggle_score(oof_df):
    submission_df = oof_df[['eeg_id']+TARGETS_PRED].copy()
    submission_df.columns = ['eeg_id'] + TARGETS
    solution_df = oof_df[['eeg_id']+TARGETS].copy()
    return kaggle_score(solution_df, submission_df, 'eeg_id')

def analyze_oof(oof_csv):

    kl_criteria = nn.KLDivLoss(reduction='batchmean')
    softmax = nn.Softmax(dim=1)

    oof_df = pd.read_csv(oof_csv)
    oof_df['target_pred'] = oof_df[TARGETS_PRED].apply(lambda x: np.argmax(x), axis=1)
    oof_df['target_id'] = oof_df[TARGETS].apply(lambda x: np.argmax(x), axis=1)
    
    oof_df["kl_loss"] = oof_df.apply(
    lambda row: 
        kl_criteria(
            F.log_softmax(
                    torch.tensor(row[TARGETS_PRED].values.astype(np.float32)).unsqueeze(0)
                , dim=1
                ), 
            torch.tensor(row[TARGETS].values.astype(np.float32))
            ).numpy(),
    axis=1)

    oof_df["kl_loss"] = oof_df['kl_loss'].astype(np.float32)

    oof_df[TARGETS_PRED] = softmax( torch.tensor(oof_df[TARGETS_PRED].values.astype(np.float32)) )

    oof_df.head()

    return oof_df

In [24]:
# Major Train Loop
# ================== Logger ==================
logger.info(f"{'*' * 100}")
logger.info(f"Script Start: {ctime()}")
logger.info(f"Model Configurations:")
for key, value in ModelConfig.__dict__.items():
    if not key.startswith("__"):
        logger.info(f"{key}: {value}")
logger.info(f"{'*' * 100}")

# ================== Prepare Training ==================
oof_stage_1, oof_stage_2 = pd.DataFrame(), pd.DataFrame()
loss_history_1, loss_history_2 = [], []
t_start = time()

K_FOLDS = 5
train_all = prepare_k_fold(train_all, k_folds=K_FOLDS)

for fold in range(0, K_FOLDS):
    tik_total = time()
    

    valid_folds = train_all[(train_all['fold'] == fold) ].reset_index(drop=True)
    train_folds = train_all[(train_all['fold'] != fold) ].reset_index(drop=True)
    train_size, valid_size = train_folds.shape[0], valid_folds.shape[0]

    # ================== Stage 1: Train ====================
    tik = time()
    ## STAGE 1
    model = ResNetGRU(config=ModelConfig, num_classes=6)

    logger.info(f"{'=' * 100}\nFold: {fold}\n{'=' * 100}")
    logger.info(f"- Stage 1 | Train: {train_size}; Valid: {valid_size} -")
    valid_predicts, loss_records = train_fold(
        model, fold, train_folds, valid_folds, logger, stage=1, checkpoint=None)

    loss_history_1.append(loss_records)
    valid_folds[TARGETS_PRED] = valid_predicts
    kl_loss_torch = evaluate_oof(valid_folds)
    info = f"{'=' * 100}\nFold {fold} Valid Loss: {kl_loss_torch}\n"
    info += f"Elapse: {(time() - tik) / 60:.2f} min \n{'=' * 100}"
    logger.info(info)

    oof_stage_1 = pd.concat([oof_stage_1, valid_folds], axis=0).reset_index(drop=True)
    oof_stage_1.to_csv(os.path.join(PATHS.OUTPUT_DIR, f"{ModelConfig.MODEL_NAME}_oof_1.csv"), index=False)

    # ================== Stage 2: Train ====================
    tik = time()

    ModelConfig.REGULARIZATION = 0.2 

    model = ResNetGRU(config=ModelConfig, num_classes=6)
    
    train_folds_2 = train_hard[~train_hard['eeg_id'].isin(valid_folds['eeg_id'])].reset_index(drop=True)
    valid_folds_2 = train_hard[ train_hard['eeg_id'].isin(valid_folds['eeg_id'])].reset_index(drop=True)
    train_size = train_folds_2.shape[0]
    valid_size = valid_folds_2.shape[0]
    
    ## STAGE 2
    logger.info(f"- Stage 2 | Train: {train_size}; Valid: {valid_size} | Reg = {ModelConfig.REGULARIZATION} -")

    # model_dir = "/home/shiyi/kaggle_hms/outputs/ResnetGRU_Originalsplit/Reg015"
    # checkpoint = list(Path(model_dir).glob(f"*_fold_{fold}_stage_1.pth"))[0]
    checkpoint = list(Path(PATHS.OUTPUT_DIR).glob(f"{ModelConfig.MODEL_NAME}_fold_{fold}_stage_1.pth"))[0]

    valid_predicts, loss_records = train_fold(
        model, fold, train_folds_2, valid_folds_2, logger, stage=2, checkpoint=checkpoint)
    
    loss_history_2.append(loss_records)
    valid_folds_2[TARGETS_PRED] = valid_predicts
    kl_loss_torch = evaluate_oof(valid_folds_2)
    info = f"{'=' * 100}\nFold {fold} Valid Loss: {kl_loss_torch}\n"
    info += f"Elapse: {(time() - tik) / 60:.2f} min \n{'=' * 100}"
    logger.info(info)

    oof_stage_2 = pd.concat([oof_stage_2, valid_folds_2], axis=0).reset_index(drop=True)
    oof_stage_2.to_csv(os.path.join(PATHS.OUTPUT_DIR, f"{ModelConfig.MODEL_NAME}_oof_2.csv"), index=False)

    logger.info(f"Fold {fold} Elapse: {(time() - tik_total) / 60:.2f} min")

info = f"{'=' * 100}\nTraining Complete!\n"
cv_results_1 = evaluate_oof(oof_stage_1)
cv_results_2 = evaluate_oof(oof_stage_2)
info += f"CV Result: Stage 1: {cv_results_1} | Stage 2: {cv_results_2}\n"
info += f"Elapse: {(time() - t_start) / 60:.2f} min \n{'=' * 100}"
logger.info(info)

****************************************************************************************************
Script Start: Sat Apr  6 23:44:12 2024
Model Configurations:
SEED: 20
SPLIT_ENTROPY: 5.5
MODEL_NAME: ResnetGRU_v2_Vstack_Filter_05_20_20_60_30_Normalized
MODEL_BACKBONE: reset_gru
BATCH_SIZE: 32
EPOCHS: 20
EARLY_STOP_ROUNDS: 5
GRADIENT_ACCUMULATION_STEPS: 1
DROP_RATE: 0.15
DROP_PATH_RATE: 0.25
WEIGHT_DECAY: 0.01
AMP: True
PRINT_FREQ: 100
NUM_WORKERS: 0
MAX_GRAD_NORM: 10000000.0
REGULARIZATION: None
RESNET_GRU_BANDPASS: (0.5, 20)
RESNET_GRU_LOWPASS: 20
RESNET_GRU_NOTCH: (60, 30)
RESNET_GRU_IN_CHANNELS: 8
RESNET_GRU_KERNELS: [3, 5, 7, 9, 11]
RESNET_GRU_FIXED_KERNEL_SIZE: 5
RESNET_GRU_DOWNSAMPLE: 5
RESNET_GRU_HIDDEN_SIZE: 304
RESNET_GRU_DILATED: False
****************************************************************************************************
- Stage 2 | Train: 5215; Valid: 1277 | Reg = 0.2 -


Loading model from checkpoint: outputs/ResnetGRU_v2_Vstack_Filter_05_20_20_60_30_Normalized_fold_0_stage_1.pth


Train [0]:   0%|          | 0/162 [00:00<?, ?batch/s]

Epoch 1 [0/162] | Train Loss: 0.2301 Grad: 1341501.8750 LR: 4.0023e-06 | Elapse: 0.11s
Epoch 1 [100/162] | Train Loss: 0.1275 Grad: 417967.6875 LR: 2.5357e-05 | Elapse: 10.03s
Epoch 1 [161/162] | Train Loss: 0.1129 Grad: 208554.8281 LR: 5.2233e-05 | Elapse: 16.06s


Valid [0]:   0%|          | 0/40 [00:00<?, ?batch/s]

Epoch 1 [0/40] | Valid Loss: 0.3930 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Loss: (train) 0.1129; (valid) 0.4271 | Time: 19.76s
Best model found in epoch 1 | valid loss: 0.4271


Epoch 1 [39/40] | Valid Loss: 0.4271 | Elapse: 3.70s


Train [1]:   0%|          | 0/162 [00:00<?, ?batch/s]

Epoch 2 [0/162] | Train Loss: 0.0116 Grad: 300951.2812 LR: 5.2700e-05 | Elapse: 0.10s
Epoch 2 [100/162] | Train Loss: 0.0534 Grad: 287319.0938 LR: 9.2056e-05 | Elapse: 10.01s
Epoch 2 [161/162] | Train Loss: 0.0506 Grad: 241645.5312 LR: 1.0000e-04 | Elapse: 16.05s


Valid [1]:   0%|          | 0/40 [00:00<?, ?batch/s]

Epoch 2 [0/40] | Valid Loss: 0.4319 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Loss: (train) 0.0506; (valid) 0.4270 | Time: 19.76s
Best model found in epoch 2 | valid loss: 0.4270


Epoch 2 [39/40] | Valid Loss: 0.4270 | Elapse: 3.71s


Train [2]:   0%|          | 0/162 [00:00<?, ?batch/s]

Epoch 3 [0/162] | Train Loss: -0.0096 Grad: 220587.2344 LR: 1.0000e-04 | Elapse: 0.10s
Epoch 3 [100/162] | Train Loss: 0.0214 Grad: 294426.5000 LR: 9.9699e-05 | Elapse: 9.97s
Epoch 3 [161/162] | Train Loss: 0.0179 Grad: 177267.7812 LR: 9.9231e-05 | Elapse: 15.98s


Valid [2]:   0%|          | 0/40 [00:00<?, ?batch/s]

Epoch 3 [0/40] | Valid Loss: 0.3719 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Loss: (train) 0.0179; (valid) 0.4071 | Time: 19.70s
Best model found in epoch 3 | valid loss: 0.4071


Epoch 3 [39/40] | Valid Loss: 0.4071 | Elapse: 3.71s


Train [3]:   0%|          | 0/162 [00:00<?, ?batch/s]

Epoch 4 [0/162] | Train Loss: -0.0392 Grad: 187071.4688 LR: 9.9222e-05 | Elapse: 0.10s
Epoch 4 [100/162] | Train Loss: 0.0004 Grad: 231185.2188 LR: 9.7992e-05 | Elapse: 9.96s
Epoch 4 [161/162] | Train Loss: 0.0001 Grad: 135300.9844 LR: 9.6967e-05 | Elapse: 15.96s


Valid [3]:   0%|          | 0/40 [00:00<?, ?batch/s]

Epoch 4 [0/40] | Valid Loss: 0.3711 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Loss: (train) 0.0001; (valid) 0.4186 | Time: 19.67s


Epoch 4 [39/40] | Valid Loss: 0.4186 | Elapse: 3.71s


Train [4]:   0%|          | 0/162 [00:00<?, ?batch/s]

Epoch 5 [0/162] | Train Loss: -0.0521 Grad: 149109.0625 LR: 9.6949e-05 | Elapse: 0.10s
Epoch 5 [100/162] | Train Loss: -0.0113 Grad: 191243.2344 LR: 9.4828e-05 | Elapse: 9.97s
Epoch 5 [161/162] | Train Loss: -0.0118 Grad: 133959.0312 LR: 9.3277e-05 | Elapse: 15.98s


Valid [4]:   0%|          | 0/40 [00:00<?, ?batch/s]

Epoch 5 [0/40] | Valid Loss: 0.3542 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Loss: (train) -0.0118; (valid) 0.4077 | Time: 19.70s


Epoch 5 [39/40] | Valid Loss: 0.4077 | Elapse: 3.71s


Train [5]:   0%|          | 0/162 [00:00<?, ?batch/s]

Epoch 6 [0/162] | Train Loss: -0.0690 Grad: 114816.6406 LR: 9.3250e-05 | Elapse: 0.10s
Epoch 6 [100/162] | Train Loss: -0.0257 Grad: 152110.4062 LR: 9.0302e-05 | Elapse: 9.97s
Epoch 6 [161/162] | Train Loss: -0.0285 Grad: 342313.6562 LR: 8.8272e-05 | Elapse: 15.98s


Valid [5]:   0%|          | 0/40 [00:00<?, ?batch/s]

Epoch 6 [0/40] | Valid Loss: 0.3744 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Loss: (train) -0.0285; (valid) 0.4146 | Time: 19.70s


Epoch 6 [39/40] | Valid Loss: 0.4146 | Elapse: 3.71s


Train [6]:   0%|          | 0/162 [00:00<?, ?batch/s]

Epoch 7 [0/162] | Train Loss: -0.0749 Grad: 108748.0547 LR: 8.8238e-05 | Elapse: 0.10s
Epoch 7 [100/162] | Train Loss: -0.0384 Grad: 220981.6719 LR: 8.4553e-05 | Elapse: 9.95s
Epoch 7 [161/162] | Train Loss: -0.0418 Grad: 168694.1719 LR: 8.2105e-05 | Elapse: 15.95s


Valid [6]:   0%|          | 0/40 [00:00<?, ?batch/s]

Epoch 7 [0/40] | Valid Loss: 0.3502 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 7 - Average Loss: (train) -0.0418; (valid) 0.4327 | Time: 19.67s


Epoch 7 [39/40] | Valid Loss: 0.4327 | Elapse: 3.71s


Train [7]:   0%|          | 0/162 [00:00<?, ?batch/s]

Epoch 8 [0/162] | Train Loss: -0.0767 Grad: 140662.9375 LR: 8.2064e-05 | Elapse: 0.10s
Epoch 8 [100/162] | Train Loss: -0.0471 Grad: 224700.3438 LR: 7.7754e-05 | Elapse: 9.96s
Epoch 8 [161/162] | Train Loss: -0.0506 Grad: 123958.4375 LR: 7.4963e-05 | Elapse: 15.97s


Valid [7]:   0%|          | 0/40 [00:00<?, ?batch/s]

Epoch 8 [0/40] | Valid Loss: 0.3718 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 8 - Average Loss: (train) -0.0506; (valid) 0.4214 | Time: 19.68s
Early stopping at epoch 8
Fold 0 Valid Loss: 0.4070611894130707
Elapse: 2.63 min 


Epoch 8 [39/40] | Valid Loss: 0.4214 | Elapse: 3.71s


Fold 0 Elapse: 2.63 min
- Stage 2 | Train: 5248; Valid: 1244 | Reg = 0.2 -


Loading model from checkpoint: outputs/ResnetGRU_v2_Vstack_Filter_05_20_20_60_30_Normalized_fold_1_stage_1.pth


Train [0]:   0%|          | 0/164 [00:00<?, ?batch/s]

Epoch 1 [0/164] | Train Loss: 0.1561 Grad: 348455.7188 LR: 4.0022e-06 | Elapse: 0.11s
Epoch 1 [100/164] | Train Loss: 0.1710 Grad: 293488.8750 LR: 2.4879e-05 | Elapse: 10.01s
Epoch 1 [163/164] | Train Loss: 0.1453 Grad: 260240.0312 LR: 5.2231e-05 | Elapse: 16.24s


Valid [0]:   0%|          | 0/39 [00:00<?, ?batch/s]

Epoch 1 [0/39] | Valid Loss: 0.3753 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Loss: (train) 0.1453; (valid) 0.4255 | Time: 19.87s
Best model found in epoch 1 | valid loss: 0.4255


Epoch 1 [38/39] | Valid Loss: 0.4255 | Elapse: 3.62s


Train [1]:   0%|          | 0/164 [00:00<?, ?batch/s]

Epoch 2 [0/164] | Train Loss: 0.0660 Grad: 201749.2812 LR: 5.2692e-05 | Elapse: 0.10s
Epoch 2 [100/164] | Train Loss: 0.0884 Grad: 239460.4375 LR: 9.1734e-05 | Elapse: 9.98s
Epoch 2 [163/164] | Train Loss: 0.0777 Grad: 189415.7969 LR: 1.0000e-04 | Elapse: 16.21s


Valid [1]:   0%|          | 0/39 [00:00<?, ?batch/s]

Epoch 2 [0/39] | Valid Loss: 0.3645 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Loss: (train) 0.0777; (valid) 0.4093 | Time: 19.84s
Best model found in epoch 2 | valid loss: 0.4093


Epoch 2 [38/39] | Valid Loss: 0.4093 | Elapse: 3.62s


Train [2]:   0%|          | 0/164 [00:00<?, ?batch/s]

Epoch 3 [0/164] | Train Loss: 0.0458 Grad: 203458.6094 LR: 1.0000e-04 | Elapse: 0.10s
Epoch 3 [100/164] | Train Loss: 0.0465 Grad: 152300.3750 LR: 9.9706e-05 | Elapse: 10.03s
Epoch 3 [163/164] | Train Loss: 0.0400 Grad: 219552.7812 LR: 9.9231e-05 | Elapse: 16.30s


Valid [2]:   0%|          | 0/39 [00:00<?, ?batch/s]

Epoch 3 [0/39] | Valid Loss: 0.3627 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Loss: (train) 0.0400; (valid) 0.4118 | Time: 19.93s


Epoch 3 [38/39] | Valid Loss: 0.4118 | Elapse: 3.63s


Train [3]:   0%|          | 0/164 [00:00<?, ?batch/s]

Epoch 4 [0/164] | Train Loss: -0.0011 Grad: 190450.8125 LR: 9.9222e-05 | Elapse: 0.10s
Epoch 4 [100/164] | Train Loss: 0.0184 Grad: 193391.5156 LR: 9.8011e-05 | Elapse: 10.06s
Epoch 4 [163/164] | Train Loss: 0.0128 Grad: 191615.1094 LR: 9.6968e-05 | Elapse: 16.33s


Valid [3]:   0%|          | 0/39 [00:00<?, ?batch/s]

Epoch 4 [0/39] | Valid Loss: 0.3686 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Loss: (train) 0.0128; (valid) 0.4143 | Time: 19.96s


Epoch 4 [38/39] | Valid Loss: 0.4143 | Elapse: 3.63s


Train [4]:   0%|          | 0/164 [00:00<?, ?batch/s]

Epoch 5 [0/164] | Train Loss: -0.0204 Grad: 161533.7500 LR: 9.6949e-05 | Elapse: 0.10s
Epoch 5 [100/164] | Train Loss: -0.0006 Grad: 234850.5781 LR: 9.4857e-05 | Elapse: 9.98s
Epoch 5 [163/164] | Train Loss: -0.0029 Grad: 236423.5938 LR: 9.3277e-05 | Elapse: 16.19s


Valid [4]:   0%|          | 0/39 [00:00<?, ?batch/s]

Epoch 5 [0/39] | Valid Loss: 0.3502 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Loss: (train) -0.0029; (valid) 0.4150 | Time: 19.82s


Epoch 5 [38/39] | Valid Loss: 0.4150 | Elapse: 3.63s


Train [5]:   0%|          | 0/164 [00:00<?, ?batch/s]

Epoch 6 [0/164] | Train Loss: -0.0228 Grad: 153229.0156 LR: 9.3251e-05 | Elapse: 0.10s
Epoch 6 [100/164] | Train Loss: -0.0079 Grad: 181432.3906 LR: 9.0342e-05 | Elapse: 9.99s
Epoch 6 [163/164] | Train Loss: -0.0125 Grad: 148336.5781 LR: 8.8273e-05 | Elapse: 16.22s


Valid [5]:   0%|          | 0/39 [00:00<?, ?batch/s]

Epoch 6 [0/39] | Valid Loss: 0.3571 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Loss: (train) -0.0125; (valid) 0.4203 | Time: 19.84s


Epoch 6 [38/39] | Valid Loss: 0.4203 | Elapse: 3.62s


Train [6]:   0%|          | 0/164 [00:00<?, ?batch/s]

Epoch 7 [0/164] | Train Loss: -0.0513 Grad: 160649.1719 LR: 8.8238e-05 | Elapse: 0.10s
Epoch 7 [100/164] | Train Loss: -0.0229 Grad: 273481.8750 LR: 8.4601e-05 | Elapse: 10.01s
Epoch 7 [163/164] | Train Loss: -0.0270 Grad: 208186.0312 LR: 8.2106e-05 | Elapse: 16.25s


Valid [6]:   0%|          | 0/39 [00:00<?, ?batch/s]

Epoch 7 [0/39] | Valid Loss: 0.3526 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 7 - Average Loss: (train) -0.0270; (valid) 0.4220 | Time: 19.87s
Early stopping at epoch 7
Fold 1 Valid Loss: 0.40928131341934204
Elapse: 2.32 min 


Epoch 7 [38/39] | Valid Loss: 0.4220 | Elapse: 3.61s


Fold 1 Elapse: 2.32 min
- Stage 2 | Train: 5070; Valid: 1422 | Reg = 0.2 -


Loading model from checkpoint: outputs/ResnetGRU_v2_Vstack_Filter_05_20_20_60_30_Normalized_fold_2_stage_1.pth


Train [0]:   0%|          | 0/158 [00:00<?, ?batch/s]

Epoch 1 [0/158] | Train Loss: 0.1443 Grad: 342099.6562 LR: 4.0024e-06 | Elapse: 0.11s
Epoch 1 [100/158] | Train Loss: 0.1718 Grad: 451266.7188 LR: 2.6361e-05 | Elapse: 10.01s
Epoch 1 [157/158] | Train Loss: 0.1453 Grad: 263898.1562 LR: 5.2239e-05 | Elapse: 15.65s


Valid [0]:   0%|          | 0/45 [00:00<?, ?batch/s]

Epoch 1 [0/45] | Valid Loss: 0.3327 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Loss: (train) 0.1453; (valid) 0.4113 | Time: 19.78s
Best model found in epoch 1 | valid loss: 0.4113


Epoch 1 [44/45] | Valid Loss: 0.4113 | Elapse: 4.12s


Train [1]:   0%|          | 0/158 [00:00<?, ?batch/s]

Epoch 2 [0/158] | Train Loss: 0.0655 Grad: 230695.9844 LR: 5.2718e-05 | Elapse: 0.10s
Epoch 2 [100/158] | Train Loss: 0.0933 Grad: 213591.6406 LR: 9.2706e-05 | Elapse: 9.99s
Epoch 2 [157/158] | Train Loss: 0.0814 Grad: 211467.8594 LR: 1.0000e-04 | Elapse: 15.63s


Valid [1]:   0%|          | 0/45 [00:00<?, ?batch/s]

Epoch 2 [0/45] | Valid Loss: 0.3469 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Loss: (train) 0.0814; (valid) 0.4178 | Time: 19.75s


Epoch 2 [44/45] | Valid Loss: 0.4178 | Elapse: 4.12s


Train [2]:   0%|          | 0/158 [00:00<?, ?batch/s]

Epoch 3 [0/158] | Train Loss: 0.0283 Grad: 161698.4844 LR: 1.0000e-04 | Elapse: 0.10s
Epoch 3 [100/158] | Train Loss: 0.0498 Grad: 159215.9375 LR: 9.9683e-05 | Elapse: 9.99s
Epoch 3 [157/158] | Train Loss: 0.0421 Grad: 145639.5938 LR: 9.9231e-05 | Elapse: 15.64s


Valid [2]:   0%|          | 0/45 [00:00<?, ?batch/s]

Epoch 3 [0/45] | Valid Loss: 0.3263 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Loss: (train) 0.0421; (valid) 0.4238 | Time: 19.77s


Epoch 3 [44/45] | Valid Loss: 0.4238 | Elapse: 4.12s


Train [3]:   0%|          | 0/158 [00:00<?, ?batch/s]

Epoch 4 [0/158] | Train Loss: -0.0082 Grad: 137965.3750 LR: 9.9221e-05 | Elapse: 0.11s
Epoch 4 [100/158] | Train Loss: 0.0287 Grad: 182930.0000 LR: 9.7953e-05 | Elapse: 10.04s
Epoch 4 [157/158] | Train Loss: 0.0215 Grad: 211547.4531 LR: 9.6967e-05 | Elapse: 15.70s


Valid [3]:   0%|          | 0/45 [00:00<?, ?batch/s]

Epoch 4 [0/45] | Valid Loss: 0.3239 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Loss: (train) 0.0215; (valid) 0.4129 | Time: 19.83s


Epoch 4 [44/45] | Valid Loss: 0.4129 | Elapse: 4.13s


Train [4]:   0%|          | 0/158 [00:00<?, ?batch/s]

Epoch 5 [0/158] | Train Loss: -0.0237 Grad: 142882.4531 LR: 9.6948e-05 | Elapse: 0.10s
Epoch 5 [100/158] | Train Loss: 0.0087 Grad: 128822.1797 LR: 9.4766e-05 | Elapse: 10.02s
Epoch 5 [157/158] | Train Loss: 0.0036 Grad: 229787.9062 LR: 9.3276e-05 | Elapse: 15.68s


Valid [4]:   0%|          | 0/45 [00:00<?, ?batch/s]

Epoch 5 [0/45] | Valid Loss: 0.3271 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Loss: (train) 0.0036; (valid) 0.4167 | Time: 19.81s


Epoch 5 [44/45] | Valid Loss: 0.4167 | Elapse: 4.13s


Train [5]:   0%|          | 0/158 [00:00<?, ?batch/s]

Epoch 6 [0/158] | Train Loss: -0.0243 Grad: 168302.9531 LR: 9.3249e-05 | Elapse: 0.10s
Epoch 6 [100/158] | Train Loss: -0.0050 Grad: 120276.2656 LR: 9.0220e-05 | Elapse: 10.02s
Epoch 6 [157/158] | Train Loss: -0.0104 Grad: 166148.0781 LR: 8.8271e-05 | Elapse: 15.68s


Valid [5]:   0%|          | 0/45 [00:00<?, ?batch/s]

Epoch 6 [0/45] | Valid Loss: 0.3156 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Loss: (train) -0.0104; (valid) 0.4091 | Time: 19.82s
Best model found in epoch 6 | valid loss: 0.4091


Epoch 6 [44/45] | Valid Loss: 0.4091 | Elapse: 4.14s


Train [6]:   0%|          | 0/158 [00:00<?, ?batch/s]

Epoch 7 [0/158] | Train Loss: -0.0462 Grad: 140026.8594 LR: 8.8236e-05 | Elapse: 0.10s
Epoch 7 [100/158] | Train Loss: -0.0183 Grad: 214689.2500 LR: 8.4452e-05 | Elapse: 10.03s
Epoch 7 [157/158] | Train Loss: -0.0208 Grad: 113912.5469 LR: 8.2104e-05 | Elapse: 15.68s


Valid [6]:   0%|          | 0/45 [00:00<?, ?batch/s]

Epoch 7 [0/45] | Valid Loss: 0.2893 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 7 - Average Loss: (train) -0.0208; (valid) 0.3913 | Time: 19.82s
Best model found in epoch 7 | valid loss: 0.3913


Epoch 7 [44/45] | Valid Loss: 0.3913 | Elapse: 4.14s


Train [7]:   0%|          | 0/158 [00:00<?, ?batch/s]

Epoch 8 [0/158] | Train Loss: -0.0596 Grad: 136450.7812 LR: 8.2062e-05 | Elapse: 0.10s
Epoch 8 [100/158] | Train Loss: -0.0322 Grad: 112427.7891 LR: 7.7638e-05 | Elapse: 10.01s
Epoch 8 [157/158] | Train Loss: -0.0348 Grad: 172993.5469 LR: 7.4962e-05 | Elapse: 15.66s


Valid [7]:   0%|          | 0/45 [00:00<?, ?batch/s]

Epoch 8 [0/45] | Valid Loss: 0.2755 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 8 - Average Loss: (train) -0.0348; (valid) 0.3855 | Time: 19.80s
Best model found in epoch 8 | valid loss: 0.3855


Epoch 8 [44/45] | Valid Loss: 0.3855 | Elapse: 4.14s


Train [8]:   0%|          | 0/158 [00:00<?, ?batch/s]

Epoch 9 [0/158] | Train Loss: -0.0653 Grad: 166284.2812 LR: 7.4914e-05 | Elapse: 0.10s
Epoch 9 [100/158] | Train Loss: -0.0414 Grad: 192084.9375 LR: 6.9985e-05 | Elapse: 10.04s
Epoch 9 [157/158] | Train Loss: -0.0436 Grad: 122131.5234 LR: 6.7062e-05 | Elapse: 15.71s


Valid [8]:   0%|          | 0/45 [00:00<?, ?batch/s]

Epoch 9 [0/45] | Valid Loss: 0.2960 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 9 - Average Loss: (train) -0.0436; (valid) 0.3952 | Time: 19.85s


Epoch 9 [44/45] | Valid Loss: 0.3952 | Elapse: 4.14s


Train [9]:   0%|          | 0/158 [00:00<?, ?batch/s]

Epoch 10 [0/158] | Train Loss: -0.0835 Grad: 119627.0156 LR: 6.7010e-05 | Elapse: 0.10s
Epoch 10 [100/158] | Train Loss: -0.0529 Grad: 158803.7031 LR: 6.1725e-05 | Elapse: 10.05s
Epoch 10 [157/158] | Train Loss: -0.0545 Grad: 141877.2344 LR: 5.8645e-05 | Elapse: 15.71s


Valid [9]:   0%|          | 0/45 [00:00<?, ?batch/s]

Epoch 10 [0/45] | Valid Loss: 0.3302 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 10 - Average Loss: (train) -0.0545; (valid) 0.3962 | Time: 19.85s


Epoch 10 [44/45] | Valid Loss: 0.3962 | Elapse: 4.14s


Train [10]:   0%|          | 0/158 [00:00<?, ?batch/s]

Epoch 11 [0/158] | Train Loss: -0.0607 Grad: 212022.7969 LR: 5.8590e-05 | Elapse: 0.10s
Epoch 11 [100/158] | Train Loss: -0.0627 Grad: 109874.4141 LR: 5.3110e-05 | Elapse: 10.03s
Epoch 11 [157/158] | Train Loss: -0.0638 Grad: 89026.4375 LR: 4.9965e-05 | Elapse: 15.68s


Valid [10]:   0%|          | 0/45 [00:00<?, ?batch/s]

Epoch 11 [0/45] | Valid Loss: 0.3265 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 11 - Average Loss: (train) -0.0638; (valid) 0.4059 | Time: 19.80s


Epoch 11 [44/45] | Valid Loss: 0.4059 | Elapse: 4.13s


Train [11]:   0%|          | 0/158 [00:00<?, ?batch/s]

Epoch 12 [0/158] | Train Loss: -0.0667 Grad: 125734.0469 LR: 4.9910e-05 | Elapse: 0.10s
Epoch 12 [100/158] | Train Loss: -0.0713 Grad: 107682.2188 LR: 4.4401e-05 | Elapse: 10.00s
Epoch 12 [157/158] | Train Loss: -0.0716 Grad: 88025.4297 LR: 4.1287e-05 | Elapse: 15.64s


Valid [11]:   0%|          | 0/45 [00:00<?, ?batch/s]

Epoch 12 [0/45] | Valid Loss: 0.3288 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 12 - Average Loss: (train) -0.0716; (valid) 0.4022 | Time: 19.76s


Epoch 12 [44/45] | Valid Loss: 0.4022 | Elapse: 4.12s


Train [12]:   0%|          | 0/158 [00:00<?, ?batch/s]

Epoch 13 [0/158] | Train Loss: -0.0875 Grad: 106796.6719 LR: 4.1232e-05 | Elapse: 0.10s
Epoch 13 [100/158] | Train Loss: -0.0769 Grad: 76809.3984 LR: 3.5862e-05 | Elapse: 9.99s
Epoch 13 [157/158] | Train Loss: -0.0774 Grad: 102842.4766 LR: 3.2874e-05 | Elapse: 15.63s


Valid [12]:   0%|          | 0/45 [00:00<?, ?batch/s]

Epoch 13 [0/45] | Valid Loss: 0.3604 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 13 - Average Loss: (train) -0.0774; (valid) 0.4077 | Time: 19.78s
Early stopping at epoch 13
Fold 2 Valid Loss: 0.3854878842830658
Elapse: 4.29 min 


Epoch 13 [44/45] | Valid Loss: 0.4077 | Elapse: 4.15s


Fold 2 Elapse: 4.29 min
- Stage 2 | Train: 5266; Valid: 1226 | Reg = 0.2 -


Loading model from checkpoint: outputs/ResnetGRU_v2_Vstack_Filter_05_20_20_60_30_Normalized_fold_3_stage_1.pth


Train [0]:   0%|          | 0/164 [00:00<?, ?batch/s]

Epoch 1 [0/164] | Train Loss: 0.1896 Grad: 382419.0625 LR: 4.0022e-06 | Elapse: 0.11s
Epoch 1 [100/164] | Train Loss: 0.1595 Grad: 293448.5000 LR: 2.4879e-05 | Elapse: 10.13s
Epoch 1 [163/164] | Train Loss: 0.1319 Grad: 219974.9375 LR: 5.2231e-05 | Elapse: 16.45s


Valid [0]:   0%|          | 0/39 [00:00<?, ?batch/s]

Epoch 1 [0/39] | Valid Loss: 0.4923 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Loss: (train) 0.1319; (valid) 0.4362 | Time: 20.06s
Best model found in epoch 1 | valid loss: 0.4362


Epoch 1 [38/39] | Valid Loss: 0.4362 | Elapse: 3.61s


Train [1]:   0%|          | 0/164 [00:00<?, ?batch/s]

Epoch 2 [0/164] | Train Loss: 0.0723 Grad: 260126.5000 LR: 5.2692e-05 | Elapse: 0.10s
Epoch 2 [100/164] | Train Loss: 0.0812 Grad: 257746.7188 LR: 9.1734e-05 | Elapse: 10.11s
Epoch 2 [163/164] | Train Loss: 0.0709 Grad: 203656.3281 LR: 1.0000e-04 | Elapse: 16.41s


Valid [1]:   0%|          | 0/39 [00:00<?, ?batch/s]

Epoch 2 [0/39] | Valid Loss: 0.4345 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Loss: (train) 0.0709; (valid) 0.4223 | Time: 20.01s
Best model found in epoch 2 | valid loss: 0.4223


Epoch 2 [38/39] | Valid Loss: 0.4223 | Elapse: 3.60s


Train [2]:   0%|          | 0/164 [00:00<?, ?batch/s]

Epoch 3 [0/164] | Train Loss: 0.0159 Grad: 218330.9062 LR: 1.0000e-04 | Elapse: 0.10s
Epoch 3 [100/164] | Train Loss: 0.0402 Grad: 220019.6094 LR: 9.9706e-05 | Elapse: 10.15s
Epoch 3 [163/164] | Train Loss: 0.0327 Grad: 197471.7500 LR: 9.9231e-05 | Elapse: 16.47s


Valid [2]:   0%|          | 0/39 [00:00<?, ?batch/s]

Epoch 3 [0/39] | Valid Loss: 0.4270 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Loss: (train) 0.0327; (valid) 0.4049 | Time: 20.07s
Best model found in epoch 3 | valid loss: 0.4049


Epoch 3 [38/39] | Valid Loss: 0.4049 | Elapse: 3.60s


Train [3]:   0%|          | 0/164 [00:00<?, ?batch/s]

Epoch 4 [0/164] | Train Loss: -0.0068 Grad: 191319.3750 LR: 9.9222e-05 | Elapse: 0.10s
Epoch 4 [100/164] | Train Loss: 0.0174 Grad: 190423.0312 LR: 9.8011e-05 | Elapse: 10.11s
Epoch 4 [163/164] | Train Loss: 0.0112 Grad: 224385.8125 LR: 9.6968e-05 | Elapse: 16.42s


Valid [3]:   0%|          | 0/39 [00:00<?, ?batch/s]

Epoch 4 [0/39] | Valid Loss: 0.4683 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Loss: (train) 0.0112; (valid) 0.4197 | Time: 20.02s


Epoch 4 [38/39] | Valid Loss: 0.4197 | Elapse: 3.60s


Train [4]:   0%|          | 0/164 [00:00<?, ?batch/s]

Epoch 5 [0/164] | Train Loss: -0.0317 Grad: 169913.3594 LR: 9.6949e-05 | Elapse: 0.10s
Epoch 5 [100/164] | Train Loss: 0.0016 Grad: 251048.9531 LR: 9.4857e-05 | Elapse: 10.12s
Epoch 5 [163/164] | Train Loss: -0.0046 Grad: 136287.1406 LR: 9.3277e-05 | Elapse: 16.44s


Valid [4]:   0%|          | 0/39 [00:00<?, ?batch/s]

Epoch 5 [0/39] | Valid Loss: 0.4261 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Loss: (train) -0.0046; (valid) 0.4180 | Time: 20.05s


Epoch 5 [38/39] | Valid Loss: 0.4180 | Elapse: 3.60s


Train [5]:   0%|          | 0/164 [00:00<?, ?batch/s]

Epoch 6 [0/164] | Train Loss: -0.0196 Grad: 179183.4062 LR: 9.3251e-05 | Elapse: 0.10s
Epoch 6 [100/164] | Train Loss: -0.0157 Grad: 188963.0156 LR: 9.0342e-05 | Elapse: 10.15s
Epoch 6 [163/164] | Train Loss: -0.0198 Grad: 136569.5781 LR: 8.8273e-05 | Elapse: 16.47s


Valid [5]:   0%|          | 0/39 [00:00<?, ?batch/s]

Epoch 6 [0/39] | Valid Loss: 0.4611 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Loss: (train) -0.0198; (valid) 0.4245 | Time: 20.07s


Epoch 6 [38/39] | Valid Loss: 0.4245 | Elapse: 3.60s


Train [6]:   0%|          | 0/164 [00:00<?, ?batch/s]

Epoch 7 [0/164] | Train Loss: -0.0453 Grad: 185233.7500 LR: 8.8238e-05 | Elapse: 0.10s
Epoch 7 [100/164] | Train Loss: -0.0275 Grad: 131715.2031 LR: 8.4601e-05 | Elapse: 10.13s
Epoch 7 [163/164] | Train Loss: -0.0298 Grad: 158688.2812 LR: 8.2106e-05 | Elapse: 16.44s


Valid [6]:   0%|          | 0/39 [00:00<?, ?batch/s]

Epoch 7 [0/39] | Valid Loss: 0.4694 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 7 - Average Loss: (train) -0.0298; (valid) 0.4265 | Time: 20.06s


Epoch 7 [38/39] | Valid Loss: 0.4265 | Elapse: 3.61s


Train [7]:   0%|          | 0/164 [00:00<?, ?batch/s]

Epoch 8 [0/164] | Train Loss: -0.0632 Grad: 128500.2812 LR: 8.2065e-05 | Elapse: 0.10s
Epoch 8 [100/164] | Train Loss: -0.0401 Grad: 130477.8750 LR: 7.7810e-05 | Elapse: 10.11s
Epoch 8 [163/164] | Train Loss: -0.0442 Grad: 127944.2422 LR: 7.4964e-05 | Elapse: 16.42s


Valid [7]:   0%|          | 0/39 [00:00<?, ?batch/s]

Epoch 8 [0/39] | Valid Loss: 0.5111 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 8 - Average Loss: (train) -0.0442; (valid) 0.4133 | Time: 20.02s
Early stopping at epoch 8
Fold 3 Valid Loss: 0.4049282371997833
Elapse: 2.68 min 


Epoch 8 [38/39] | Valid Loss: 0.4133 | Elapse: 3.60s


Fold 3 Elapse: 2.68 min
- Stage 2 | Train: 5169; Valid: 1323 | Reg = 0.2 -


Loading model from checkpoint: outputs/ResnetGRU_v2_Vstack_Filter_05_20_20_60_30_Normalized_fold_4_stage_1.pth


Train [0]:   0%|          | 0/161 [00:00<?, ?batch/s]

Epoch 1 [0/161] | Train Loss: 0.0906 Grad: 371371.5000 LR: 4.0023e-06 | Elapse: 0.11s
Epoch 1 [100/161] | Train Loss: 0.1490 Grad: 332485.1562 LR: 2.5602e-05 | Elapse: 10.08s
Epoch 1 [160/161] | Train Loss: 0.1267 Grad: 291067.3438 LR: 5.2235e-05 | Elapse: 16.05s


Valid [0]:   0%|          | 0/42 [00:00<?, ?batch/s]

Epoch 1 [0/42] | Valid Loss: 0.5104 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 1 - Average Loss: (train) 0.1267; (valid) 0.4113 | Time: 19.92s
Best model found in epoch 1 | valid loss: 0.4113


Epoch 1 [41/42] | Valid Loss: 0.4113 | Elapse: 3.87s


Train [1]:   0%|          | 0/161 [00:00<?, ?batch/s]

Epoch 2 [0/161] | Train Loss: 0.0152 Grad: 187093.3281 LR: 5.2705e-05 | Elapse: 0.10s
Epoch 2 [100/161] | Train Loss: 0.0710 Grad: 221137.3594 LR: 9.2218e-05 | Elapse: 10.05s
Epoch 2 [160/161] | Train Loss: 0.0619 Grad: 227268.0312 LR: 1.0000e-04 | Elapse: 16.02s


Valid [1]:   0%|          | 0/42 [00:00<?, ?batch/s]

Epoch 2 [0/42] | Valid Loss: 0.5399 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 2 - Average Loss: (train) 0.0619; (valid) 0.4076 | Time: 19.88s
Best model found in epoch 2 | valid loss: 0.4076


Epoch 2 [41/42] | Valid Loss: 0.4076 | Elapse: 3.86s


Train [2]:   0%|          | 0/161 [00:00<?, ?batch/s]

Epoch 3 [0/161] | Train Loss: 0.0107 Grad: 188398.2812 LR: 1.0000e-04 | Elapse: 0.10s
Epoch 3 [100/161] | Train Loss: 0.0323 Grad: 187855.5625 LR: 9.9695e-05 | Elapse: 10.03s
Epoch 3 [160/161] | Train Loss: 0.0268 Grad: 180547.2969 LR: 9.9231e-05 | Elapse: 15.97s


Valid [2]:   0%|          | 0/42 [00:00<?, ?batch/s]

Epoch 3 [0/42] | Valid Loss: 0.5431 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 3 - Average Loss: (train) 0.0268; (valid) 0.4084 | Time: 19.84s


Epoch 3 [41/42] | Valid Loss: 0.4084 | Elapse: 3.86s


Train [3]:   0%|          | 0/161 [00:00<?, ?batch/s]

Epoch 4 [0/161] | Train Loss: -0.0276 Grad: 145875.9844 LR: 9.9222e-05 | Elapse: 0.10s
Epoch 4 [100/161] | Train Loss: 0.0113 Grad: 152068.9062 LR: 9.7982e-05 | Elapse: 10.03s
Epoch 4 [160/161] | Train Loss: 0.0062 Grad: 241710.2344 LR: 9.6967e-05 | Elapse: 15.98s


Valid [3]:   0%|          | 0/42 [00:00<?, ?batch/s]

Epoch 4 [0/42] | Valid Loss: 0.5830 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 4 - Average Loss: (train) 0.0062; (valid) 0.4110 | Time: 19.84s


Epoch 4 [41/42] | Valid Loss: 0.4110 | Elapse: 3.86s


Train [4]:   0%|          | 0/161 [00:00<?, ?batch/s]

Epoch 5 [0/161] | Train Loss: -0.0270 Grad: 178581.8906 LR: 9.6949e-05 | Elapse: 0.10s
Epoch 5 [100/161] | Train Loss: -0.0070 Grad: 205974.2500 LR: 9.4813e-05 | Elapse: 10.03s
Epoch 5 [160/161] | Train Loss: -0.0118 Grad: 197538.0469 LR: 9.3277e-05 | Elapse: 15.97s


Valid [4]:   0%|          | 0/42 [00:00<?, ?batch/s]

Epoch 5 [0/42] | Valid Loss: 0.5780 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 5 - Average Loss: (train) -0.0118; (valid) 0.4189 | Time: 19.84s


Epoch 5 [41/42] | Valid Loss: 0.4189 | Elapse: 3.87s


Train [5]:   0%|          | 0/161 [00:00<?, ?batch/s]

Epoch 6 [0/161] | Train Loss: -0.0332 Grad: 200052.5625 LR: 9.3250e-05 | Elapse: 0.10s
Epoch 6 [100/161] | Train Loss: -0.0208 Grad: 136281.3438 LR: 9.0282e-05 | Elapse: 10.01s
Epoch 6 [160/161] | Train Loss: -0.0243 Grad: 138190.3906 LR: 8.8272e-05 | Elapse: 15.95s


Valid [5]:   0%|          | 0/42 [00:00<?, ?batch/s]

Epoch 6 [0/42] | Valid Loss: 0.5908 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 6 - Average Loss: (train) -0.0243; (valid) 0.4074 | Time: 19.82s
Best model found in epoch 6 | valid loss: 0.4074


Epoch 6 [41/42] | Valid Loss: 0.4074 | Elapse: 3.86s


Train [6]:   0%|          | 0/161 [00:00<?, ?batch/s]

Epoch 7 [0/161] | Train Loss: -0.0538 Grad: 136303.6094 LR: 8.8237e-05 | Elapse: 0.10s
Epoch 7 [100/161] | Train Loss: -0.0353 Grad: 106480.9688 LR: 8.4528e-05 | Elapse: 10.01s
Epoch 7 [160/161] | Train Loss: -0.0364 Grad: 111008.4219 LR: 8.2105e-05 | Elapse: 15.95s


Valid [6]:   0%|          | 0/42 [00:00<?, ?batch/s]

Epoch 7 [0/42] | Valid Loss: 0.5587 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 7 - Average Loss: (train) -0.0364; (valid) 0.4101 | Time: 19.82s


Epoch 7 [41/42] | Valid Loss: 0.4101 | Elapse: 3.87s


Train [7]:   0%|          | 0/161 [00:00<?, ?batch/s]

Epoch 8 [0/161] | Train Loss: -0.0731 Grad: 110793.2266 LR: 8.2063e-05 | Elapse: 0.10s
Epoch 8 [100/161] | Train Loss: -0.0464 Grad: 117960.8125 LR: 7.7725e-05 | Elapse: 10.02s
Epoch 8 [160/161] | Train Loss: -0.0483 Grad: 123862.5078 LR: 7.4963e-05 | Elapse: 15.97s


Valid [7]:   0%|          | 0/42 [00:00<?, ?batch/s]

Epoch 8 [0/42] | Valid Loss: 0.5641 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 8 - Average Loss: (train) -0.0483; (valid) 0.4023 | Time: 19.83s
Best model found in epoch 8 | valid loss: 0.4023


Epoch 8 [41/42] | Valid Loss: 0.4023 | Elapse: 3.87s


Train [8]:   0%|          | 0/161 [00:00<?, ?batch/s]

Epoch 9 [0/161] | Train Loss: -0.0760 Grad: 112058.4766 LR: 7.4916e-05 | Elapse: 0.10s
Epoch 9 [100/161] | Train Loss: -0.0567 Grad: 116133.3516 LR: 7.0081e-05 | Elapse: 10.03s
Epoch 9 [160/161] | Train Loss: -0.0581 Grad: 112845.5859 LR: 6.7063e-05 | Elapse: 15.97s


Valid [8]:   0%|          | 0/42 [00:00<?, ?batch/s]

Epoch 9 [0/42] | Valid Loss: 0.5279 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 9 - Average Loss: (train) -0.0581; (valid) 0.4035 | Time: 19.84s


Epoch 9 [41/42] | Valid Loss: 0.4035 | Elapse: 3.87s


Train [9]:   0%|          | 0/161 [00:00<?, ?batch/s]

Epoch 10 [0/161] | Train Loss: -0.0883 Grad: 88579.8594 LR: 6.7012e-05 | Elapse: 0.10s
Epoch 10 [100/161] | Train Loss: -0.0645 Grad: 162720.9844 LR: 6.1827e-05 | Elapse: 10.03s
Epoch 10 [160/161] | Train Loss: -0.0660 Grad: 125196.5312 LR: 5.8646e-05 | Elapse: 15.98s


Valid [9]:   0%|          | 0/42 [00:00<?, ?batch/s]

Epoch 10 [0/42] | Valid Loss: 0.5125 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 10 - Average Loss: (train) -0.0660; (valid) 0.4049 | Time: 19.85s


Epoch 10 [41/42] | Valid Loss: 0.4049 | Elapse: 3.87s


Train [10]:   0%|          | 0/161 [00:00<?, ?batch/s]

Epoch 11 [0/161] | Train Loss: -0.0794 Grad: 104415.9375 LR: 5.8592e-05 | Elapse: 0.10s
Epoch 11 [100/161] | Train Loss: -0.0726 Grad: 142375.1406 LR: 5.3215e-05 | Elapse: 9.87s
Epoch 11 [160/161] | Train Loss: -0.0734 Grad: 89555.8047 LR: 4.9966e-05 | Elapse: 15.72s


Valid [10]:   0%|          | 0/42 [00:00<?, ?batch/s]

Epoch 11 [0/42] | Valid Loss: 0.5597 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 11 - Average Loss: (train) -0.0734; (valid) 0.4104 | Time: 19.51s


Epoch 11 [41/42] | Valid Loss: 0.4104 | Elapse: 3.79s


Train [11]:   0%|          | 0/161 [00:00<?, ?batch/s]

Epoch 12 [0/161] | Train Loss: -0.0891 Grad: 97061.4688 LR: 4.9912e-05 | Elapse: 0.10s
Epoch 12 [100/161] | Train Loss: -0.0794 Grad: 100993.2188 LR: 4.4505e-05 | Elapse: 9.86s
Epoch 12 [160/161] | Train Loss: -0.0799 Grad: 81971.7578 LR: 4.1288e-05 | Elapse: 15.71s


Valid [11]:   0%|          | 0/42 [00:00<?, ?batch/s]

Epoch 12 [0/42] | Valid Loss: 0.5162 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 12 - Average Loss: (train) -0.0799; (valid) 0.3971 | Time: 19.51s
Best model found in epoch 12 | valid loss: 0.3971


Epoch 12 [41/42] | Valid Loss: 0.3971 | Elapse: 3.80s


Train [12]:   0%|          | 0/161 [00:00<?, ?batch/s]

Epoch 13 [0/161] | Train Loss: -0.0990 Grad: 101625.8047 LR: 4.1234e-05 | Elapse: 0.10s
Epoch 13 [100/161] | Train Loss: -0.0845 Grad: 86026.7188 LR: 3.5963e-05 | Elapse: 9.85s
Epoch 13 [160/161] | Train Loss: -0.0857 Grad: 100102.4766 LR: 3.2875e-05 | Elapse: 15.69s


Valid [12]:   0%|          | 0/42 [00:00<?, ?batch/s]

Epoch 13 [0/42] | Valid Loss: 0.5606 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 13 - Average Loss: (train) -0.0857; (valid) 0.4064 | Time: 19.47s


Epoch 13 [41/42] | Valid Loss: 0.4064 | Elapse: 3.77s


Train [13]:   0%|          | 0/161 [00:00<?, ?batch/s]

Epoch 14 [0/161] | Train Loss: -0.0905 Grad: 123599.8438 LR: 3.2824e-05 | Elapse: 0.10s
Epoch 14 [100/161] | Train Loss: -0.0876 Grad: 85881.5234 LR: 2.7848e-05 | Elapse: 9.84s
Epoch 14 [160/161] | Train Loss: -0.0889 Grad: 90172.1797 LR: 2.4983e-05 | Elapse: 15.68s


Valid [13]:   0%|          | 0/42 [00:00<?, ?batch/s]

Epoch 14 [0/42] | Valid Loss: 0.5759 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 14 - Average Loss: (train) -0.0889; (valid) 0.4095 | Time: 19.45s


Epoch 14 [41/42] | Valid Loss: 0.4095 | Elapse: 3.77s


Train [14]:   0%|          | 0/161 [00:00<?, ?batch/s]

Epoch 15 [0/161] | Train Loss: -0.0918 Grad: 111460.2969 LR: 2.4936e-05 | Elapse: 0.10s
Epoch 15 [100/161] | Train Loss: -0.0922 Grad: 82625.2266 LR: 2.0406e-05 | Elapse: 9.83s
Epoch 15 [160/161] | Train Loss: -0.0936 Grad: 72339.0078 LR: 1.7852e-05 | Elapse: 15.72s


Valid [14]:   0%|          | 0/42 [00:00<?, ?batch/s]

Epoch 15 [0/42] | Valid Loss: 0.5520 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 15 - Average Loss: (train) -0.0936; (valid) 0.4042 | Time: 19.54s


Epoch 15 [41/42] | Valid Loss: 0.4042 | Elapse: 3.82s


Train [15]:   0%|          | 0/161 [00:00<?, ?batch/s]

Epoch 16 [0/161] | Train Loss: -0.0944 Grad: 94664.5000 LR: 1.7811e-05 | Elapse: 0.10s
Epoch 16 [100/161] | Train Loss: -0.0977 Grad: 68758.0625 LR: 1.3865e-05 | Elapse: 9.90s
Epoch 16 [160/161] | Train Loss: -0.0994 Grad: 68247.5859 LR: 1.1698e-05 | Elapse: 15.78s


Valid [15]:   0%|          | 0/42 [00:00<?, ?batch/s]

Epoch 16 [0/42] | Valid Loss: 0.5190 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 16 - Average Loss: (train) -0.0994; (valid) 0.4009 | Time: 19.59s


Epoch 16 [41/42] | Valid Loss: 0.4009 | Elapse: 3.82s


Train [16]:   0%|          | 0/161 [00:00<?, ?batch/s]

Epoch 17 [0/161] | Train Loss: -0.1021 Grad: 92672.2266 LR: 1.1664e-05 | Elapse: 0.10s
Epoch 17 [100/161] | Train Loss: -0.1021 Grad: 63732.2422 LR: 8.4218e-06 | Elapse: 9.90s
Epoch 17 [160/161] | Train Loss: -0.1034 Grad: 75050.6016 LR: 6.7090e-06 | Elapse: 15.77s


Valid [16]:   0%|          | 0/42 [00:00<?, ?batch/s]

Epoch 17 [0/42] | Valid Loss: 0.5143 | Elapse: 0.10s


----------------------------------------------------------------------------------------------------
Epoch 17 - Average Loss: (train) -0.1034; (valid) 0.4009 | Time: 19.58s
Early stopping at epoch 17
Fold 4 Valid Loss: 0.39711377024650574
Elapse: 5.59 min 


Epoch 17 [41/42] | Valid Loss: 0.4009 | Elapse: 3.81s


Fold 4 Elapse: 5.59 min


KeyError: "None of [Index(['seizure_vote', 'lpd_vote', 'gpd_vote', 'lrda_vote', 'grda_vote',\n       'other_vote'],\n      dtype='object')] are in the [columns]"

In [25]:
# nfo = f"{'=' * 100}\nTraining Complete!\n"
# cv_results_1 = None 
# #evaluate_oof(oof_stage_1)
# cv_results_2 = evaluate_oof(oof_stage_2)
# info += f"CV Result: Stage 1: {cv_results_1} | Stage 2: {cv_results_2}\n"
# info += f"Elapse: {(time() - t_start) / 60:.2f} min \n{'=' * 100}"
# logger.info(info)

Training Complete!
CV Result: Stage 1: None | Stage 2: 0.40033119916915894
Elapse: 20.79 min 


In [None]:
# plot loss history
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5), sharey=False)

for i, loss in enumerate(loss_history_1):
    # ax1.plot(loss['train'], marker="*", ls="-", label=f"Fold {i} Train")
    ax1.plot(loss['valid'], marker="o", ls=":", label=f"Fold {i} Valid")

for i, loss in enumerate(loss_history_2):
    # ax2.plot(loss['train'], marker="*", ls="-", label=f"Fold {i} Train")
    ax2.plot(loss['valid'], marker="o", ls=":", label=f"Fold {i} Valid")

ax1.set_title("Stage 1 Loss")
ax2.set_title("Stage 2 Loss")

for ax in (ax1, ax2):
    ax.set_xlabel("Epochs")
    ax.set_ylabel("Loss")
    ax.legend()
    ax.grid(True)

fig.tight_layout()
fig.savefig(Path(PATHS.OUTPUT_DIR) / f"{ModelConfig.MODEL_NAME}_loss_history.png")
plt.show()

In [None]:
csv_path = f'./outputs/{ModelConfig.MODEL_NAME}_oof_1.csv'
print("CSV Path: ", csv_path)

oof_df = analyze_oof(csv_path)

print("Kaggle Score: ", calc_kaggle_score(oof_df))
print("Average KL Loss: ", oof_df["kl_loss"].mean())

display(oof_df.head())

# plot confusion matrix
cm = confusion_matrix(oof_df['target_id'], oof_df['target_pred']) # (y_true, y_pred)
cm = cm / cm.sum(axis=1)[:, np.newaxis]

fig = plt.figure(figsize=(6, 6))
sns.heatmap(cm, annot=True, cmap='Blues', xticklabels=TARGET2ID.keys(), yticklabels=TARGET2ID.keys())
plt.xlabel('Predicted', fontsize=12)
plt.ylabel('True', fontsize=12)
plt.title(csv_path.split('/')[-1].split('.')[0], fontsize=12)
fig.tight_layout()
fig.savefig(f"./outputs/{csv_path.split('/')[-1].split('.')[0]}_CM.png")
plt.show()

In [None]:
csv_path = f'./outputs/{ModelConfig.MODEL_NAME}_oof_2.csv'
print("CSV Path: ", csv_path)

oof_df = analyze_oof(csv_path)

print("Kaggle Score: ", calc_kaggle_score(oof_df))
print("Average KL Loss: ", oof_df["kl_loss"].mean())

display(oof_df.head())

# plot confusion matrix
cm = confusion_matrix(oof_df['target_id'], oof_df['target_pred']) # (y_true, y_pred)
cm = cm / cm.sum(axis=1)[:, np.newaxis]

fig = plt.figure(figsize=(6, 6))
sns.heatmap(cm, annot=True, cmap='Blues', xticklabels=TARGET2ID.keys(), yticklabels=TARGET2ID.keys())
plt.xlabel('Predicted', fontsize=12)
plt.ylabel('True', fontsize=12)
plt.title(csv_path.split('/')[-1].split('.')[0], fontsize=12)
fig.tight_layout()
fig.savefig(f"./outputs/{csv_path.split('/')[-1].split('.')[0]}_CM.png")
plt.show()

In [None]:
fig, axes = plt.subplots(5, 5, figsize=(15, 15), sharex=True, sharey=True)

oof_samples = oof_df.loc[0:len(oof_df):250]

for i, ax in enumerate(axes.flatten()):
    row = oof_samples.iloc[i]
    x = np.arange(6)
    ax.plot(x, row[TARGETS].T, marker="o", ls="-", label="True")
    ax.plot(x, row[TARGETS_PRED].T, marker="*", ls="--", label="Predicted")
    ax.set_title(f"{row['target']} | KL Loss: {row['kl_loss']:.4f}")
    ax.legend()
    
fig.tight_layout()
fig.savefig(f"./outputs/{csv_path.split('/')[-1].split('.')[0]}_samples.png")
plt.show()