In [None]:
import pandas as pd 
import numpy as np 
from scipy.stats import entropy
import matplotlib.pyplot as plt

from engine_hms_trainer import *
from engine_hms_model import CustomModel, JobConfig, ModelConfig

import torch
from torch import nn
import torch.nn.functional as F

import warnings
warnings.filterwarnings('ignore')

In [None]:
seed_everything(JobConfig.SEED)

ModelConfig.EPOCHS = 6
ModelConfig.MODEL_BACKBONE = 'tf_efficientnet_b2'
ModelConfig.MODEL_NAME = "ENet_b2_softmax"
ModelConfig.AUGMENT = True
ModelConfig.USE_KAGGLE_SPECTROGRAMS = True
ModelConfig.USE_EEG_SPECTROGRAMS = True
ModelConfig.REGULARIZATION = None
ModelConfig.AUGMENTATIONS = ['xy_masking']

hms_predictor = HMSPredictor(JobConfig, ModelConfig)

In [None]:
train_easy, train_hard, all_specs, all_eegs = hms_predictor.load_train_data()

print(train_easy.shape)
print(train_hard.shape)

# check if contain NaN
print(train_easy.isnull().sum().sum())
print(train_hard.isnull().sum().sum())

display(train_easy.head())
print(" ")
display(train_hard.head())

In [None]:
hms_predictor.train_folds(train_easy, train_hard, all_specs, all_eegs)

In [None]:
dataset = CustomDataset(train_easy, TARGETS, ModelConfig, all_specs, all_eegs, mode='test')

X, y = dataset[0]
print(X.shape, y.shape)

model = CustomModel(ModelConfig, num_classes=6, pretrained=True)
y_pred = model(X.unsqueeze(0))

print(y_pred.shape)

In [None]:
pd.set_option('display.max_columns', None)
KL_CRITERION = nn.KLDivLoss(reduction='batchmean')
SOFTMAX = nn.Softmax(dim=1)

In [None]:
TARGET2ID = {
    'Seizure': 0,
    'LPD': 1,
    'GPD': 2,
    'LRDA': 3,
    'GRDA': 4,
    'Other': 5
}

from kl_divergence import score as kaggle_score 

def calc_kaggle_score(solution, submission):
    solution = solution.to_frame().T
    solution[TARGETS] = solution[TARGETS].astype(np.float32)
    submission = submission.to_frame().T
    submission.columns = ['eeg_id'] + TARGETS
    submission[TARGETS] = submission[TARGETS].astype(np.float32)
    
    return kaggle_score(solution, submission, 'eeg_id')


def calc_kl_div(p, q):
    p = torch.tensor(p.astype(np.float32)).unsqueeze(0)
    q = torch.tensor(q.astype(np.float32)).unsqueeze(0)
    kl_score = KL_CRITERION(F.log_softmax(p, dim=1), q)
    return kl_score.item()

# def analyze_oof(oof_csv_path):
    
#     oof_df = pd.read_csv(oof_csv_path)
#     oof_df
    
    # y_pred = oof_df[TARGETS].values.astype(np.float32)
    # y_pred_smax = SOFTMAX(torch.tensor(y_pred)).numpy()
    # oof_df[TARGETS_PRED] = y_pred_smax #- y_pred_smax.min(axis=1)[:, np.newaxis]
    # oof_df[TARGETS_PRED] = oof_df[TARGETS_PRED].div(oof_df[TARGETS_PRED].sum(axis=1), axis=0)
    
    # y_pred = oof_df[TARGETS].values.astype(np.float32)
    # y_pred_min = y_pred.min(axis=1)[:, np.newaxis]
    # y_pred_max = y_pred.max(axis=1)[:, np.newaxis]
    # y_pred_norm = (y_pred - y_pred_min) / (y_pred_max - y_pred_min)  
    
    # oof_df[TARGETS_PRED] = y_pred_norm
    # oof_df[TARGETS_PRED] = oof_df[TARGETS_PRED].div(oof_df[TARGETS_PRED].sum(axis=1), axis=0)
    
    # cv = evaluate_oof(oof_df)

    # oof_df["kl_loss"] = oof_df.apply(lambda row: calc_kl_div(row[TARGETS_PRED].values, row[TARGETS].values), axis=1)

    # print(f"KL Loss Mean: {oof_df['kl_loss'].mean()}")

    # oof_df['target_pred'] = oof_df[TARGETS_PRED].apply(lambda x: np.argmax(x), axis=1)
    # oof_df['target_id'] = oof_df['target'].map(TARGET2ID)
    # cv=0
    # return oof_df, cv


In [None]:
fig, axes = plt.subplots(1, 2, figsize=(10, 5))

oof_df = pd.read_csv("./outputs/ENet_b2_xymasking_remove_less/ENet_b2_xymasking_remove_less_oof_2.csv")
display_rows = oof_df[TARGETS].nunique(axis=1)>2
display(oof_df[display_rows].head(10))

display(oof_df[oof_df['eeg_id'] == 4279600028])

# plot_idx = 20163 #np.random.choice(oof_df[display_rows].index, 1)                 
# y_pred = oof_df[TARGETS].values.astype(np.float32)

# print(plot_idx, oof_df.loc[plot_idx][TARGETS].values)

# y_pred_smax = SOFTMAX(torch.tensor(y_pred)).numpy()
# oof_df[TARGETS_PRED] = y_pred_smax
# axes[0].plot(oof_df.loc[plot_idx][TARGETS].values, label='True')
# axes[0].plot(oof_df.loc[plot_idx][TARGETS_PRED].values , label='Pred')
# print(f"Softmax pred: {oof_df.loc[plot_idx][TARGETS_PRED].values }")

# y_pred_new = (y_pred - y_pred.min(axis=1)[:, np.newaxis]) / (y_pred.max(axis=1)[:, np.newaxis] - y_pred.min(axis=1)[:, np.newaxis])
# y_pred_norm = y_pred_new / y_pred_new.sum(axis=1)[:, np.newaxis]
# oof_df[TARGETS_PRED] = y_pred_norm
# axes[1].plot(oof_df.loc[plot_idx][TARGETS].values , label='True')
# axes[1].plot(oof_df.loc[plot_idx][TARGETS_PRED].values , label='Pred')
# print(f"Normalized pred: {oof_df.loc[plot_idx][TARGETS_PRED].values }")


# for ax in axes:
#     ax.legend()
#     ax.grid(True)

# plt.show()




In [None]:
oof_df[oof_df['eeg_id'] == 11127485]

In [None]:
oof_df[oof_df['eeg_id'].duplicated()] #oof_df.shape #.groupby('eeg_id')['patient_id'].agg(['nunique', 'count']).sort_values(by='count', ascending=False).head(10)

In [None]:
score_kaggle = oof_df2.loc[:10].apply(lambda row: calc_kaggle_score(row[['eeg_id']+TARGETS], row[['eeg_id']+TARGETS_PRED]), axis=1)
score_kaggle

In [None]:
submission_df = oof_df2[['eeg_id']+TARGETS_PRED].copy()
submission_df.columns = ['eeg_id'] + TARGETS

solution_df = oof_df2[['eeg_id']+TARGETS].copy()

score_value = kaggle_score(solution_df, submission_df, 'eeg_id')

score_value

In [None]:
# oof_df1, cv_1 = analyze_oof("./outputs/ENet_b2_xymasking_remove_less/ENet_b2_xymasking_remove_less_oof_1.csv")
# print(cv_1)
# oof_df1.head()

In [None]:
plot_oof = oof_df2.copy()

# plot confusion matrix
from sklearn.metrics import confusion_matrix
import seaborn as sns

cm = confusion_matrix(plot_oof['target_id'], plot_oof['target_pred']) # (y_true, y_pred)
cm = cm / cm.sum(axis=1)[:, np.newaxis]

fig = plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, cmap='Blues', xticklabels=TARGET2ID.keys(), yticklabels=TARGET2ID.keys())
plt.xlabel('Predicted', fontsize=12)
plt.ylabel('True', fontsize=12)
plt.show()

In [None]:
# new figure
fig, axes = plt.subplots(6, 5, figsize=(18, 16), sharex=True, sharey=True)

for row in range(axes.shape[0]):
    row_selects = plot_oof[plot_oof['target_id']==row]
    target_label = BRAIN_ACTIVITY[row]
    for col in range(axes.shape[1]):
        ax = axes[row, col]
        idx = np.random.choice(row_selects.index)
        df_rows = plot_oof.loc[idx]
        ax.plot(df_rows[TARGETS].values , label='True')
        ax.plot(df_rows[TARGETS_PRED].values, label='Pred')
        ax.set_title(f"{idx} | KL: {df_rows['kl_loss']:.4f}")
        ax.set_xticks(range(6))
        ax.set_xticklabels(BRAIN_ACTIVITY)
        ax.grid(True)
        ax.legend()
        if col == 0:
            ax.set_ylabel(target_label, fontsize=12)
       
fig.tight_layout()
plt.show()