In [None]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt

from engine_hms_trainer import *
from engine_hms_model import KagglePaths, LocalPaths, ModelConfig

import torch
from torch import nn
import torch.nn.functional as F

import warnings
warnings.filterwarnings('ignore')

paths = KagglePaths if os.path.exists(KagglePaths.OUTPUT_DIR) else LocalPaths
print("Output Dir: ", paths.OUTPUT_DIR)

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(DEVICE)

In [None]:
train_easy, train_hard, all_specs, all_eegs = load_kaggle_data(
    paths.TRAIN_CSV, paths.PRE_LOADED_SPECTOGRAMS, paths.PRE_LOADED_EEGS, split_entropy=ModelConfig.SPLIT_ENTROPY)

print(train_easy.shape)
print(train_hard.shape)

# check if contain NaN
print(train_easy.isnull().sum().sum())
print(train_hard.isnull().sum().sum())

display(train_easy.head())
print(" ")
display(train_hard.head())

In [None]:
ModelConfig.EPOCHS = 1
ModelConfig.BATCH_SIZE = 16
ModelConfig.GRADIENT_ACCUMULATION_STEPS = 2
ModelConfig.MODEL_BACKBONE = 'dual_encoder'
ModelConfig.MODEL_NAME = "DualEncoder_Default_debug"
ModelConfig.USE_KAGGLE_SPECTROGRAMS = True
ModelConfig.USE_EEG_SPECTROGRAMS = True
ModelConfig.REGULARIZATION = None
ModelConfig.AUGMENT = False
ModelConfig.AUGMENTATIONS = []

In [None]:
valid_df_1 = train_easy.copy()
valid_df_2 = train_hard.copy()
valid_df = pd.concat([valid_df_1, valid_df_2], axis=0).reset_index(drop=True)

dataset = CustomDataset( valid_df, TARGETS, ModelConfig, all_specs, all_eegs, mode="valid" )

loader_kwargs = {
    "batch_size": ModelConfig.BATCH_SIZE,
    "num_workers": ModelConfig.NUM_WORKERS,
    "pin_memory": True,
    "shuffle": False,
}

valid_loader = DataLoader(dataset, drop_last=False, **loader_kwargs)

In [None]:
model = DualEncoderModel(ModelConfig, num_classes=6, pretrained=False)

In [None]:
from logging import getLogger, INFO
logger = getLogger(__name__)
logger.setLevel(INFO)

In [None]:
trainer = Trainer(model, logger, ModelConfig)

In [None]:
valid_df_1.shape

In [None]:
len(valid_loader)

In [None]:
preds_list = []
pbar = tqdm(valid_loader, total=len(valid_loader), unit="batch")
        
for step, (x, y) in enumerate(pbar):
    x = x.to(DEVICE)
    y = y.to(DEVICE)
    with torch.no_grad():
        y_pred = model(x)
        
    preds_list.append(y_pred.to('cpu').numpy())

In [None]:
preds = np.concatenate(preds_list)
preds.shape

In [None]:
valid_df[TARGETS_PRED] = preds 
valid_df

In [None]:
kl_loss = nn.KLDivLoss(reduction="batchmean")
labels = torch.tensor(valid_df[TARGETS].values.astype('float32'))
model_preds = F.log_softmax(
        torch.tensor(valid_df[TARGETS_PRED].values.astype('float32'))
    )
kl_torch = kl_loss(model_preds, labels).item()
kl_torch