# About

In this notebook, I'll share an image clacification approach for given spectrograms.  

I tried several experiments, but didn't obtain good results :(


* **version 1**: naive approach
* **version 2**: For comparing with [Chris's EfficientNetB2 Starter](https://www.kaggle.com/code/cdeotte/efficientnetb2-starter-lb-0-57), I added **log transform** and **LR scheduling**.

## Experimental Settings

### model
* backbone: resnet34d (use the pretrained model provided by [timm](https://github.com/huggingface/pytorch-image-models))
* head classifier: one linear layer
* num of input channels: 1

### data augmentation¶
* implemented by [albumentations](https://albumentations.ai/)
* Train
    * Resize
* Val, Test
    * Resize
    
### learning settings
* CV Strategy: Stratified Group KFold (K=5)
    * y: `expert_consensus`
    * group: `patient_id`
* max epochs: 9
* data:
    * input image size: 1x512x512
    * batch size: 32
* loss: [KLDivLoss](https://pytorch.org/docs/stable/generated/torch.nn.KLDivLoss.html)
* optimizer: AdamW
    * learning rate: 1.0e-03
    * weight decay: 1.0e-02
    
* lr scheduler: OneCycleLR
    * max lr: 1.0e-03
    * min lr: 1.0e-04
    
### NOTE: I normalized spectrograms per image
```python
img = np.load(path)  # shape: (Hz, Time) = (400, 300)
eps = 1e-6
img_mean = img.mean(axis=(0, 1))
img = img - img_mean
img_std = img.std(axis=(0, 1))
img = img / (img_std + eps)
```

# Prepare

## import

In [1]:
import sys
import os
import gc
import copy
import yaml
import random
import shutil
from time import time
import typing as tp
from pathlib import Path

import numpy as np
import pandas as pd

from tqdm.notebook import tqdm
from sklearn.model_selection import StratifiedGroupKFold

import torch
from torch import nn
from torch import optim
from torch.optim import lr_scheduler
from torch.cuda import amp

import timm

import albumentations as A
from albumentations.pytorch import ToTensorV2



In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [4]:
ROOT = Path.cwd().parent
INPUT = ROOT / "input"
OUTPUT = ROOT / "output"
SRC = ROOT / "src"

DATA = INPUT / "hms-harmful-brain-activity-classification"
TRAIN_SPEC = DATA / "train_spectrograms"
TEST_SPEC = DATA / "test_spectrograms"

TMP = ROOT / "tmp"
TRAIN_SPEC_SPLIT = TMP / "train_spectrograms_split"
TEST_SPEC_SPLIT = TMP / "test_spectrograms_split"
TMP.mkdir(exist_ok=True)
TRAIN_SPEC_SPLIT.mkdir(exist_ok=True)
TEST_SPEC_SPLIT.mkdir(exist_ok=True)


RANDAM_SEED = 1086
CLASSES = ["seizure_vote", "lpd_vote", "gpd_vote", "lrda_vote", "grda_vote", "other_vote"]
N_CLASSES = len(CLASSES)
FOLDS = [0, 1, 2, 3, 4]
N_FOLDS = len(FOLDS)

## Read Data, Split Folds, Split Spectrograms

In [5]:
train = pd.read_csv(DATA / "train.csv")

# convert vote to probability
train[CLASSES] /= train[CLASSES].sum(axis=1).values[:, None]

print(train.shape)

(106800, 15)


### NOTE: I used the **first** `spectrogram_sub_id` for each `spectrogram_id` in order to train model faster.

In [6]:
train = train.groupby("spectrogram_id").head(1).reset_index(drop=True)
print(train.shape)

(11138, 15)


### split folds

In [7]:
sgkf = StratifiedGroupKFold(n_splits=N_FOLDS, shuffle=True, random_state=RANDAM_SEED)

train["fold"] = -1

for fold_id, (_, val_idx) in enumerate(
    sgkf.split(train, y=train["expert_consensus"], groups=train["patient_id"])
):
    train.loc[val_idx, "fold"] = fold_id

In [8]:
train.groupby("fold")[CLASSES].sum()

Unnamed: 0_level_0,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,407.87897,240.84782,262.474513,142.304068,286.40759,800.087038
1,360.427388,231.931854,193.738,173.763906,333.566517,1166.572336
2,441.934721,328.255479,237.291923,163.192668,355.493987,926.831222
3,425.68598,195.568155,182.017264,148.850582,259.828026,864.049993
4,392.391708,234.916737,120.355588,129.112045,258.598367,873.625556


### split sepectogram files

In [9]:
for spec_id, df in tqdm(train.groupby("spectrogram_id")):
    spec = pd.read_parquet(TRAIN_SPEC / f"{spec_id}.parquet")
    
    spec_arr = spec.fillna(0).values[:, 1:].T.astype("float32")  # (Hz, Time) = (400, 300)
    
    for spec_offset, label_id in df[
        ["spectrogram_label_offset_seconds", "label_id"]
    ].astype(int).values:
        spec_offset = spec_offset // 2
        split_spec_arr = spec_arr[:, spec_offset: spec_offset + 300]
        np.save(TRAIN_SPEC_SPLIT / f"{label_id}.npy" , split_spec_arr)

  0%|          | 0/11138 [00:00<?, ?it/s]

## Difinition, Model, Dataset, Metric

### model

In [10]:
class HMSHBACSpecModel(nn.Module):

    def __init__(
            self,
            model_name: str,
            pretrained: bool,
            in_channels: int,
            num_classes: int,
        ):
        super().__init__()
        self.model = timm.create_model(
            model_name=model_name, pretrained=pretrained,
            num_classes=num_classes, in_chans=in_channels)

    def forward(self, x):
        h = self.model(x)      

        return h

### dataset

In [11]:
FilePath = tp.Union[str, Path]
Label = tp.Union[int, float, np.ndarray]

class HMSHBACSpecDataset(torch.utils.data.Dataset):

    def __init__(
        self,
        image_paths: tp.Sequence[FilePath],
        labels: tp.Sequence[Label],
        transform: A.Compose,
    ):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, index: int):
        img_path = self.image_paths[index]
        label = self.labels[index]

        img = np.load(img_path)  # shape: (Hz, Time) = (400, 300)
        
        # log transform
        img = np.clip(img,np.exp(-4), np.exp(8))
        img = np.log(img)
        
        # normalize per image
        eps = 1e-6
        img_mean = img.mean(axis=(0, 1))
        img = img - img_mean
        img_std = img.std(axis=(0, 1))
        img = img / (img_std + eps)

        img = img[..., None] # shape: (Hz, Time) -> (Hz, Time, Channel)
        img = self._apply_transform(img)

        return {"data": img, "target": label}

    def _apply_transform(self, img: np.ndarray):
        """apply transform to image and mask"""
        transformed = self.transform(image=img)
        img = transformed["image"]
        return img

### loss

In [12]:
class KLDivLossWithLogits(nn.KLDivLoss):

    def __init__(self):
        super().__init__(reduction="batchmean")

    def forward(self, y, t):
        y = nn.functional.log_softmax(y,  dim=1)
        loss = super().forward(y, t)

        return loss


class KLDivLossWithLogitsForVal(nn.KLDivLoss):
    
    def __init__(self):
        """"""
        super().__init__(reduction="batchmean")
        self.log_prob_list  = []
        self.label_list = []

    def forward(self, y, t):
        y = nn.functional.log_softmax(y, dim=1)
        self.log_prob_list.append(y.numpy())
        self.label_list.append(t.numpy())
        
    def compute(self):
        log_prob = np.concatenate(self.log_prob_list, axis=0)
        label = np.concatenate(self.label_list, axis=0)
        final_metric = super().forward(
            torch.from_numpy(log_prob),
            torch.from_numpy(label)
        ).item()
        self.log_prob_list = []
        self.label_list = []
        
        return final_metric

# Training

## Config

In [13]:
class CFG:
    model_name = "resnet34d"
    img_size = 512
    max_epoch = 9
    batch_size = 32
    lr = 1.0e-03
    weight_decay = 1.0e-02
    es_patience =  5
    seed = 1086
    deterministic = True
    enable_amp = True
    device = "cuda"

## Functions for training

In [14]:
def set_random_seed(seed: int = 42, deterministic: bool = False):
    """Set seeds"""
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # type: ignore
    torch.backends.cudnn.deterministic = deterministic  # type: ignore
    
def to_device(
    tensors: tp.Union[tp.Tuple[torch.Tensor], tp.Dict[str, torch.Tensor]],
    device: torch.device, *args, **kwargs
):
    if isinstance(tensors, tuple):
        return (t.to(device, *args, **kwargs) for t in tensors)
    elif isinstance(tensors, dict):
        return {
            k: t.to(device, *args, **kwargs) for k, t in tensors.items()}
    else:
        return tensors.to(device, *args, **kwargs)

In [15]:
def get_path_label(val_fold, train_all: pd.DataFrame):
    """Get file path and target info."""
    
    train_idx = train_all[train_all["fold"] != val_fold].index.values
    val_idx   = train_all[train_all["fold"] == val_fold].index.values
    img_paths = []
    labels = train_all[CLASSES].values
    for label_id in train_all["label_id"].values:
        img_path = TRAIN_SPEC_SPLIT / f"{label_id}.npy"
        img_paths.append(img_path)

    train_data = {
        "image_paths": [img_paths[idx] for idx in train_idx],
        "labels": [labels[idx].astype("float32") for idx in train_idx]}

    val_data = {
        "image_paths": [img_paths[idx] for idx in val_idx],
        "labels": [labels[idx].astype("float32") for idx in val_idx]}
    
    return train_data, val_data, train_idx, val_idx


def get_transforms(CFG):
    train_transform = A.Compose([
        A.Resize(p=1.0, height=CFG.img_size, width=CFG.img_size),
        ToTensorV2(p=1.0)
    ])
    val_transform = A.Compose([
        A.Resize(p=1.0, height=CFG.img_size, width=CFG.img_size),
        ToTensorV2(p=1.0)
    ])
    return train_transform, val_transform

In [16]:
def train_one_fold(CFG, val_fold, train_all, output_path):
    """Main"""
    torch.backends.cudnn.benchmark = True
    set_random_seed(CFG.seed, deterministic=CFG.deterministic)
    device = torch.device(CFG.device)
    
    train_path_label, val_path_label, _, _ = get_path_label(val_fold, train_all)
    train_transform, val_transform = get_transforms(CFG)
    
    train_dataset = HMSHBACSpecDataset(**train_path_label, transform=train_transform)
    val_dataset = HMSHBACSpecDataset(**val_path_label, transform=val_transform)
    
    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=CFG.batch_size, num_workers=4, shuffle=True, drop_last=True)
    val_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=CFG.batch_size, num_workers=4, shuffle=False, drop_last=False)
    
    model = HMSHBACSpecModel(
        model_name=CFG.model_name, pretrained=True, num_classes=6, in_channels=1)
    model.to(device)
    
    optimizer = optim.AdamW(params=model.parameters(), lr=CFG.lr, weight_decay=CFG.weight_decay)
    scheduler = lr_scheduler.OneCycleLR(
        optimizer=optimizer, epochs=CFG.max_epoch,
        pct_start=0.0, steps_per_epoch=len(train_loader),
        max_lr=CFG.lr, div_factor=25, final_div_factor=4.0e-01
    )
    
    loss_func = KLDivLossWithLogits()
    loss_func.to(device)
    loss_func_val = KLDivLossWithLogitsForVal()
    
    use_amp = CFG.enable_amp
    scaler = amp.GradScaler(enabled=use_amp)
    
    best_val_loss = 1.0e+09
    best_epoch = 0
    train_loss = 0
    
    for epoch in range(1, CFG.max_epoch + 1):
        epoch_start = time()
        model.train()
        for batch in train_loader:
            batch = to_device(batch, device)
            x, t = batch["data"], batch["target"]
                
            optimizer.zero_grad()
            with amp.autocast(use_amp):
                y = model(x)
                loss = loss_func(y, t)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            scheduler.step()
            train_loss += loss.item()
            
        train_loss /= len(train_loader)
            
        model.eval()
        for batch in val_loader:
            x, t = batch["data"], batch["target"]
            x = to_device(x, device)
            with torch.no_grad(), amp.autocast(use_amp):
                y = model(x)
            y = y.detach().cpu().to(torch.float32)
            loss_func_val(y, t)
        val_loss = loss_func_val.compute()        
        if val_loss < best_val_loss:
            best_epoch = epoch
            best_val_loss = val_loss
            # print("save model")
            torch.save(model.state_dict(), str(output_path / f'snapshot_epoch_{epoch}.pth'))
        
        elapsed_time = time() - epoch_start
        print(
            f"[epoch {epoch}] train loss: {train_loss: .6f}, val loss: {val_loss: .6f}, elapsed_time: {elapsed_time: .3f}")
        
        if epoch - best_epoch > CFG.es_patience:
            print("Early Stopping!")
            break
            
        train_loss = 0
            
    return val_fold, best_epoch, best_val_loss

## Run Training

In [17]:
score_list = []
for fold_id in FOLDS:
    output_path = Path(f"fold{fold_id}")
    output_path.mkdir(exist_ok=True)
    print(f"[fold{fold_id}]")
    score_list.append(train_one_fold(CFG, fold_id, train, output_path))

[fold0]


model.safetensors:   0%|          | 0.00/87.4M [00:00<?, ?B/s]

[epoch 1] train loss:  0.882599, val loss:  0.851408, elapsed_time:  136.889
[epoch 2] train loss:  0.698157, val loss:  0.820204, elapsed_time:  124.286
[epoch 3] train loss:  0.608675, val loss:  0.744616, elapsed_time:  123.949
[epoch 4] train loss:  0.528086, val loss:  0.753391, elapsed_time:  124.226
[epoch 5] train loss:  0.427589, val loss:  0.781515, elapsed_time:  124.063
[epoch 6] train loss:  0.318129, val loss:  0.756746, elapsed_time:  124.139
[epoch 7] train loss:  0.224556, val loss:  0.803439, elapsed_time:  124.015
[epoch 8] train loss:  0.153463, val loss:  0.786738, elapsed_time:  123.968
[epoch 9] train loss:  0.116854, val loss:  0.824401, elapsed_time:  124.217
Early Stopping!
[fold1]




[epoch 1] train loss:  0.904280, val loss:  0.938235, elapsed_time:  121.299
[epoch 2] train loss:  0.720165, val loss:  0.900330, elapsed_time:  121.501
[epoch 3] train loss:  0.638118, val loss:  0.706957, elapsed_time:  121.144
[epoch 4] train loss:  0.552333, val loss:  0.737890, elapsed_time:  121.325
[epoch 5] train loss:  0.457910, val loss:  0.774971, elapsed_time:  121.119
[epoch 6] train loss:  0.350029, val loss:  0.703400, elapsed_time:  121.281
[epoch 7] train loss:  0.252007, val loss:  0.714577, elapsed_time:  121.121
[epoch 8] train loss:  0.175336, val loss:  0.723147, elapsed_time:  121.234
[epoch 9] train loss:  0.132709, val loss:  0.776623, elapsed_time:  121.364
[fold2]




[epoch 1] train loss:  0.898212, val loss:  0.886631, elapsed_time:  122.104
[epoch 2] train loss:  0.710584, val loss:  0.811041, elapsed_time:  121.316
[epoch 3] train loss:  0.627898, val loss:  0.830818, elapsed_time:  121.113
[epoch 4] train loss:  0.543182, val loss:  0.865721, elapsed_time:  120.968
[epoch 5] train loss:  0.456368, val loss:  0.702043, elapsed_time:  121.464
[epoch 6] train loss:  0.351186, val loss:  0.767019, elapsed_time:  121.270
[epoch 7] train loss:  0.249169, val loss:  0.813521, elapsed_time:  121.358
[epoch 8] train loss:  0.169040, val loss:  0.833246, elapsed_time:  121.285
[epoch 9] train loss:  0.128260, val loss:  0.853226, elapsed_time:  121.222
[fold3]




[epoch 1] train loss:  0.908737, val loss:  0.935427, elapsed_time:  124.786
[epoch 2] train loss:  0.706002, val loss:  0.724434, elapsed_time:  124.586
[epoch 3] train loss:  0.618819, val loss:  0.730945, elapsed_time:  124.886
[epoch 4] train loss:  0.535117, val loss:  0.739224, elapsed_time:  124.880
[epoch 5] train loss:  0.452238, val loss:  0.709114, elapsed_time:  124.696
[epoch 6] train loss:  0.344210, val loss:  0.767912, elapsed_time:  124.793
[epoch 7] train loss:  0.243417, val loss:  0.813730, elapsed_time:  124.765
[epoch 8] train loss:  0.166547, val loss:  0.848325, elapsed_time:  124.783
[epoch 9] train loss:  0.124330, val loss:  0.883598, elapsed_time:  124.695
[fold4]
[epoch 1] train loss:  0.892550, val loss:  1.235761, elapsed_time:  126.615
[epoch 2] train loss:  0.702810, val loss:  0.771075, elapsed_time:  125.348
[epoch 3] train loss:  0.614388, val loss:  0.837626, elapsed_time:  125.135
[epoch 4] train loss:  0.535080, val loss:  0.723097, elapsed_time: 

# Inference Out Of Fold

## Copy best models

In [18]:
print(score_list)

[(0, 3, 0.7446164488792419), (1, 6, 0.703400194644928), (2, 5, 0.7020434141159058), (3, 5, 0.7091140747070312), (4, 4, 0.7230966091156006)]


In [19]:
best_log_list = []
for (fold_id, best_epoch, _) in score_list:
    
    exp_dir_path = Path(f"fold{fold_id}")
    best_model_path = exp_dir_path / f"snapshot_epoch_{best_epoch}.pth"
    copy_to = f"./best_model_fold{fold_id}.pth"
    shutil.copy(best_model_path, copy_to)
    
    for p in exp_dir_path.glob("*.pth"):
        p.unlink()

## Inference OOF

In [20]:
def run_inference_loop(model, loader, device):
    model.to(device)
    model.eval()
    pred_list = []
    with torch.no_grad():
        for batch in tqdm(loader):
            x = to_device(batch["data"], device)
            y = model(x)
            pred_list.append(y.softmax(dim=1).detach().cpu().numpy())
        
    pred_arr = np.concatenate(pred_list)
    del pred_list
    return pred_arr

In [21]:
label_arr = train[CLASSES].values
oof_pred_arr = np.zeros((len(train), N_CLASSES))
score_list = []

for fold_id in range(N_FOLDS):
    print(f"\n[fold {fold_id}]")
    device = torch.device(CFG.device)

    # # get_dataloader
    _, val_path_label, _, val_idx = get_path_label(fold_id, train)
    _, val_transform = get_transforms(CFG)
    val_dataset = HMSHBACSpecDataset(**val_path_label, transform=val_transform)
    val_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=CFG.batch_size, num_workers=4, shuffle=False, drop_last=False)
    
    # # get model
    model_path = f"./best_model_fold{fold_id}.pth"
    model = HMSHBACSpecModel(
        model_name=CFG.model_name, pretrained=False, num_classes=6, in_channels=1)
    model.load_state_dict(torch.load(model_path, map_location=device))
    
    # # inference
    val_pred = run_inference_loop(model, val_loader, device)
    oof_pred_arr[val_idx] = val_pred
    
    del val_idx, val_path_label
    del model, val_loader
    torch.cuda.empty_cache()
    gc.collect()


[fold 0]


  0%|          | 0/67 [00:00<?, ?it/s]


[fold 1]


  0%|          | 0/77 [00:00<?, ?it/s]


[fold 2]


  0%|          | 0/77 [00:00<?, ?it/s]


[fold 3]


  0%|          | 0/65 [00:00<?, ?it/s]


[fold 4]


  0%|          | 0/63 [00:00<?, ?it/s]

## Calculate OOF score

In [22]:
import sys
sys.path.append('/kaggle/input/kaggle-kl-div')
from kaggle_kl_div import score

true = train[["label_id"] + CLASSES].copy()

oof = pd.DataFrame(oof_pred_arr, columns=CLASSES)
oof.insert(0, "label_id", train["label_id"])

cv_score = score(solution=true, submission=oof, row_id_column_name='label_id')
print('CV Score KL-Div for ResNet34d',cv_score)

CV Score KL-Div for ResNet34d 0.715760646315781


# EOF