### Classification notebook for Kaggle RSNA competition

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
import os
import gc
import random
from glob import glob
from sklearn.model_selection import GroupKFold, KFold, StratifiedKFold,StratifiedGroupKFold
import warnings
import pickle
import json
import re
import time
import sys
from requests import get
import multiprocessing
import joblib
import torch
from torch.utils.data import Dataset, DataLoader
import transformers
import torch.nn as nn
import torch.nn.functional as F
from torch.cuda.amp import GradScaler, autocast
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
import timm
from sklearn.preprocessing import minmax_scale
import matplotlib.pyplot as plt
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2,torchvision
from ipyexperiments.ipyexperiments import IPyExperimentsPytorch
from timm.optim.optim_factory import create_optimizer_v2
from timm import utils
from fastprogress.fastprogress import format_time
from fastai.vision.all import *
from torch.utils.data import WeightedRandomSampler
from sklearn.metrics import roc_auc_score

class CFG:
    seed = 46
    n_splits = 4
    SZ = (1536, 960)
    debug = False
    BS = 10
    EP = 12
    MODEL = 'efficientnet_b3'
    LR = 4e-04
    WD = 1e-08

random.seed(CFG.seed)
os.environ["PYTHONHASHSEED"] = str(CFG.seed)
np.random.seed(CFG.seed)
plt.rcParams["font.size"] = 13
warnings.filterwarnings('ignore')

In [None]:
set_seed(CFG.seed)

In [None]:
root_dir = '///mnt/c/Personal/Competitions/Kaggle/rsna'
image_dir = f'{root_dir}/data/8bit'
DIR = '///mnt/c/Personal/Competitions/Kaggle/rsna/data/'
submit = pd.read_csv(os.path.join(DIR,'sample_submission.csv'))
train = pd.read_csv(os.path.join(DIR,'Train.csv'))
test_df = pd.read_csv(os.path.join(DIR,'Test.csv'))

if CFG.debug:
    train = train.sample(frac=0.01).reset_index(drop=True)
    
VERSION = "Multitask_EB3"
MODEL_FOLDER = Path(f"{root_dir}/runs/{VERSION}/")
os.makedirs(MODEL_FOLDER,exist_ok=True)
KERNEL_TYPE = f"{CFG.MODEL}_{CFG.SZ[0]}_{CFG.SZ[1]}_bs{CFG.BS}_ep{CFG.EP}_lr{str(CFG.LR).replace('-','')}_wd{str(CFG.WD).replace('-','')}"

print(MODEL_FOLDER)
print(KERNEL_TYPE)

In [None]:
train['difficult_negative_case'] = train['difficult_negative_case'].astype(int)
train['laterality_enc'] = train['laterality'].map(dict({'L':0,'R':1}))
train['view_enc'] = train['view'].map(dict({'CC':0,'MLO':1,'ML':2,'LM':3,'AT':4,'LMO':5}))

In [None]:
aux_labels = ['laterality_enc','implant','view_enc'] #'biopsy','invasive'
train[aux_labels].isna().sum()

In [None]:
train[aux_labels].nunique()

### Get kfolds

In [None]:
mskf = StratifiedGroupKFold(n_splits=CFG.n_splits, shuffle=True, random_state=121)
fold_ids = []
train['fold'] = 0

for train_index, test_index in mskf.split(train,train['cancer'].values,train['patient_id'].values):
    fold_ids.append(test_index)    

for fld in range(CFG.n_splits):
    valIx = fold_ids[fld]
    train.loc[valIx,'fold']=fld 

#### Data loader

In [None]:
def read_data(d):
    image = cv2.imread(os.path.join(image_dir,f'{d.patient_id}_{d.image_id}.png'))
    return image

class RsnaDataset(Dataset):
    def __init__(self, df, augs=None,mode='train'):
        self.length = len(df)
        self.df = df
        self.augs = augs
        self.mode = mode
        
    def __len__(self):
        return self.length

    def __getitem__(self, index):
        d = self.df.iloc[index]
        image = read_data(d)
        image = image.astype(np.float32)/255
        if self.augs is not None:
            image = self.augs(image=image)['image']
        patient_id = d.patient_id
        
        aux1 = torch.tensor(d.laterality_enc).float()
        aux2 = torch.tensor(d.biopsy).float()        
#         aux3 = torch.tensor(d.invasive).float()
        aux3 = torch.tensor(d.view_enc).long()
        aux4 = torch.tensor(d.implant).float()
        cancer = torch.tensor(d.cancer).float()
        
        if self.mode=='test':
            return image,patient_id
        
        return image,cancer,aux1,aux2,aux3,aux4,patient_id

In [None]:
def worker_init_fn(worker_id):
    """
    Handles PyTorch x Numpy seeding issues.
    Args:
        worker_id (int): Id of the worker.
    """
    np.random.seed(np.random.get_state()[1][0] + worker_id)

### Augmentations

In [None]:
TRAIN_AUG = A.Compose([
    A.ShiftScaleRotate(rotate_limit=15, p=0.7),
    A.HorizontalFlip(p = 0.5),
    A.Resize(CFG.SZ[0],CFG.SZ[1]),
    # A.Normalize(mean=0,std=1),
    ToTensorV2(),
])

VALID_AUG = A.Compose([
    A.Resize(CFG.SZ[0],CFG.SZ[1]),
    # A.Normalize(mean=0,std=1),
    ToTensorV2(),
])

### Visualization

In [None]:
dataset_show = RsnaDataset(train, augs=TRAIN_AUG, mode='train')
loader_show = torch.utils.data.DataLoader(dataset_show, batch_size=8)
img,target,aux_targ1,aux_targ2,aux_targ3,aux_targ4,_ = next(iter(loader_show))

grid = torchvision.utils.make_grid(img, normalize=True, padding=2)
grid = grid.permute(1, 2, 0)
show_image(grid, figsize=(15,8),title=[x for x in target.numpy()]);

### Model

In [None]:
def get_rsna_classification_model(model_name, pretrained=True, **kwargs):
    model = timm.create_model(CFG.MODEL, pretrained=pretrained,num_classes=0, **kwargs)
    return model

In [None]:
from timm.models.efficientnet import *
class Net(nn.Module):
    def load_pretrain(self, ):
        pass

    def __init__(self,):
        super(Net, self).__init__()
        
        self.encoder = tf_efficientnetv2_s(pretrained=True)#,drop_rate = 0.15, drop_path_rate = 0.15)
        #encoder_dim = [64, 256, 512, 1024, 2048]
        try:
            k = self.encoder.classifier.in_features
        except:
            k = self.encoder.classifier[1].in_features
            
        self.cancer = nn.Linear(k,1)
        self.aux1 = nn.Linear(k,1)
        self.aux2 = nn.Linear(k,1)
        self.aux3 = nn.Linear(k,6)
        self.aux4 = nn.Linear(k,1)
    def forward(self, x):
        #------
        e = self.encoder
        x = e.forward_features(x)
        x = F.adaptive_avg_pool2d(x,1)
        x = torch.flatten(x,1,3)
        #------

        feature = x
        cancer = self.cancer(feature).reshape(-1)
        aux1 = self.aux1(feature).reshape(-1)
        aux2 = self.aux2(feature).reshape(-1)
        aux3 = self.aux3(feature)#.reshape(-1)
        aux4 = self.aux4(feature).reshape(-1)
        return cancer, aux1,aux2,aux3,aux4


In [None]:
dl = DataLoader(RsnaDataset(train, augs=TRAIN_AUG, mode='train'),
                          batch_size=2,
                          shuffle=True,
                          num_workers=8,
                          drop_last=True,
                        worker_init_fn=worker_init_fn)

image,cancer,aux1,aux2,aux31,aux4,patient_id = next(iter(dl))
# a.shape,b.shape,c.shape

In [None]:
# m = get_rsna_classification_model(CFG.MODEL)
m = Net()
cancer1, aux1,aux2,aux3,aux4 = m(image)
print(cancer, aux1.shape,aux2.shape,aux3.shape,aux4.shape)

In [None]:
nn.BCEWithLogitsLoss()(cancer,cancer1)

In [None]:
nn.CrossEntropyLoss()(aux3,aux31)  ## nn.CrossEntropyLoss()(pred,target)

### Sampling

In [None]:
class BalanceSampler(Sampler):

    def __init__(self, dataset, ratio=8):
        self.r = ratio-1
        self.dataset = dataset
        self.pos_index = np.where(dataset.df.cancer>0)[0]
        self.neg_index = np.where(dataset.df.cancer==0)[0]

        self.length = self.r*int(np.floor(len(self.neg_index)/self.r))

    def __iter__(self):
        pos_index = self.pos_index.copy()
        neg_index = self.neg_index.copy()
        np.random.shuffle(pos_index)
        np.random.shuffle(neg_index)

        neg_index = neg_index[:self.length].reshape(-1,self.r)
        pos_index = np.random.choice(pos_index, self.length//self.r).reshape(-1,1)

        index = np.concatenate([pos_index,neg_index],-1).reshape(-1)
        return iter(index)

    def __len__(self):
        return self.length

### Custom Loss

In [None]:
### Customn losss fnc

### Customn losss fnc
class FocalLoss(nn.Module):
    """
    The focal loss for fighting against class-imbalance
    """

    def __init__(self, alpha=0.25, gamma=2):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = 1e-12  # prevent training from Nan-loss error

    def forward(self, logits, target):
        """
        logits & target should be tensors with shape [batch_size, num_classes]
        """
        probs = F.sigmoid(logits)
        one_subtract_probs = 1.0 - probs
        # add epsilon
        probs_new = probs + self.epsilon
        one_subtract_probs_new = one_subtract_probs + self.epsilon
        # calculate focal loss
        log_pt = target * torch.log(probs_new) + (1.0 - target) * torch.log(one_subtract_probs_new)
        pt = torch.exp(log_pt)
        focal_loss = -1.0 * (self.alpha * (1 - pt) ** self.gamma) * log_pt
        return torch.mean(focal_loss)
    
class LabelSmoothingLoss(nn.Module): 
    def __init__(self, classes=1, smoothing=0.0, dim=-1): 
        super(LabelSmoothingLoss, self).__init__() 
        self.confidence = 1.0 - smoothing 
        self.smoothing = smoothing 
        self.cls = classes 
        self.dim = dim 
    def forward(self, pred, target): 
        pred = pred.log_softmax(dim=self.dim) 
        with torch.no_grad():
            true_dist = torch.zeros_like(pred) 
            true_dist.fill_(self.smoothing / (self.cls - 1)) 
            true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence) 
        return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))
    
    
class CustomAuxLoss(nn.Module):
    """
    The focal loss for fighting against class-imbalance
    """

    def __init__(self, alpha=0.5):
        super(CustomAuxLoss, self).__init__()
        self.alpha = alpha
        self.epsilon = 1e-12  # prevent training from Nan-loss error

    def forward(self, logits, target, auxLogits1, auxtarget1, auxLogits2, auxtarget2,
               auxLogits3, auxtarget3,auxLogits4, auxtarget4):
        """
        logits & target should be tensors with shape [batch_size, num_classes]
        """
        
        BCELoss = F.binary_cross_entropy_with_logits(logits,target)
        BCELoss_aux1 = F.binary_cross_entropy_with_logits(auxLogits1,auxtarget1)
        BCELoss_aux2 = F.binary_cross_entropy_with_logits(auxLogits2,auxtarget2)
        BCELoss_aux3 = nn.CrossEntropyLoss()(auxLogits3,auxtarget3)
        BCELoss_aux4 = F.binary_cross_entropy_with_logits(auxLogits4,auxtarget4)

        BCELoss_aux = (BCELoss_aux1+BCELoss_aux2+BCELoss_aux3+BCELoss_aux4)/4   
        loss = BCELoss*self.alpha+BCELoss_aux*(1-self.alpha)
        return torch.mean(loss)

### Train & Validation Function

In [None]:
def pfbeta(labels, preds, beta=1,clip=True):
    if clip:
        preds = preds.clip(0, 1)
    y_true_count = labels.sum()
    ctp = preds[labels==1].sum()
    cfp = preds[labels==0].sum()
    beta_squared = beta * beta
    c_precision = ctp / (ctp + cfp)
    c_recall = ctp / y_true_count
    if (c_precision > 0 and c_recall > 0):
        result = (1 + beta_squared) * (c_precision * c_recall) / (beta_squared * c_precision + c_recall)
        return result
    else:
        return torch.tensor(0.0)
    
def pfbeta_thresh(labels, preds, beta=1):
    preds = preds>0.1
    y_true_count = labels.sum()
    ctp = preds[labels==1].sum()
    cfp = preds[labels==0].sum()
    beta_squared = beta * beta
    c_precision = ctp / (ctp + cfp)
    c_recall = ctp / y_true_count
    if (c_precision > 0 and c_recall > 0):
        result = (1 + beta_squared) * (c_precision * c_recall) / (beta_squared * c_precision + c_recall)
        return result
    else:
        return torch.tensor(0.0)
    
def optimal_f1(labels, predictions):
    labels = labels.cpu().numpy()
    predictions = predictions.cpu().numpy()
    thres = np.linspace(0, 1, 100)
    f1s = [pfbeta(labels, predictions > thr,clip=False) for thr in thres]
    idx = np.argmax(f1s)
    return f1s[idx], thres[idx]

In [None]:
def train_one_epoch(
    model: nn.Module,
    loader: Iterable,
    loss_fn: Callable,
    optimizer: torch.optim.Optimizer,
    lr_scheduler: torch.optim.lr_scheduler._LRScheduler = None,
    mixup_fn: Callable = None,
    grad_scaler: torch.cuda.amp.GradScaler = None,
    mbar: master_bar = None,
):

    model.train()

    losses_m = utils.AverageMeter()

    pbar = progress_bar(loader, parent=mbar, leave=False)
    pbar.update(0)

    for batch_idx, (input, target,aux_target1,aux_target2,aux_target3,aux_target4,_) in enumerate(loader):
        input, target, aux_target1, aux_target2,aux_target3,aux_target4 = input.cuda(), target.cuda(), aux_target1.cuda(),aux_target2.cuda(),aux_target3.cuda(),aux_target4.cuda()
        
        optimizer.zero_grad()
        with torch.cuda.amp.autocast(enabled=True):
            output,aux_output1,aux_output2,aux_output3,aux_output4 = model(input)
            loss = loss_fn(output, target,aux_output1,aux_target1,aux_output2,aux_target2,aux_output3,aux_target3,aux_output4,aux_target4)
        losses_m.update(loss.item(), input.size(0))

        grad_scaler.scale(loss).backward()
        grad_scaler.step(optimizer)
        grad_scaler.update()

        if lr_scheduler is not None:
            lr_scheduler.step()

        pbar.update(batch_idx + 1)
        pbar.comment = f"{losses_m.avg:.4f}"

    pbar.on_iter_end()
    return OrderedDict([("loss", losses_m.avg)])


@torch.inference_mode()
def validate(model: nn.Module, loader: Iterable, loss_fn: Callable, mbar: master_bar):
    model.eval()

    metric_m = utils.AverageMeter()
    metric_m_thresh = utils.AverageMeter()
    auc_m = utils.AverageMeter()
    losses_m = utils.AverageMeter()

    pbar = progress_bar(loader, parent=mbar, leave=False)
    pbar.update(0)

    for batch_idx, (input, target,_,_,_,_,_) in enumerate(loader):
        
        input, target = input.cuda(), target.cuda()
        output,_,_,_,_ = model(input)
        loss = loss_fn(output, target).item()
        losses_m.update(loss, input.size(0))
        
        output = F.sigmoid(output)
        metric = pfbeta(target,output).item()
        metric_thresh,_ = optimal_f1(target, output)
        # pfbeta_thresh(target,output).item()
        metric_m.update(metric, output.size(0))
        metric_m_thresh.update(metric_thresh.item(), output.size(0))
        pbar.update(batch_idx + 1)

    pbar.on_iter_end()
    return OrderedDict([("loss", losses_m.avg), ("metric", metric_m.avg),("metric_thresh", metric_m_thresh.avg)])


### Run!

In [None]:
def training_loop(fold):
    
    with IPyExperimentsPytorch(exp_enable=False, cl_set_seed=42, cl_compact=True):
        print()
        print("*" * 100)
        print(f"Training fold {fold}")
        print("*" * 100)

        torch.backends.cudnn.benchmark = True
      
        dataset_train = RsnaDataset(train.query("fold!=@fold").reset_index(drop=True), augs=TRAIN_AUG, mode="train")
        dataset_valid = RsnaDataset(train.query("fold==@fold").reset_index(drop=True), augs=VALID_AUG, mode="valid")

        print(f"TRAIN: {len(dataset_train)} | VALID: {len(dataset_valid)}")

        loader_train = torch.utils.data.DataLoader(dataset_train, 
                                                   CFG.BS, 
                                                   num_workers=8, 
                                                   drop_last=True,
                                                  pin_memory=True)#,
#                                                    sampler=ImbalancedDatasetSampler(dataset_train))
        loader_valid = torch.utils.data.DataLoader(dataset_valid, CFG.BS * 2, num_workers=8, shuffle=False)

        model = Net()
        model.cuda()
        optimizer = create_optimizer_v2(model, "lookahead_RAdam", lr=CFG.LR)

        num_train_steps = len(loader_train) * CFG.EP
        lr_scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer,max_lr=CFG.LR, total_steps=num_train_steps,verbose=False)

        train_loss_fn = CustomAuxLoss(alpha=0.8) #nn.BCEWithLogitsLoss()
        valid_loss_fn = nn.BCEWithLogitsLoss()

        grad_scaler = torch.cuda.amp.GradScaler()

        print(f"Scheduled epochs: {CFG.EP}")

        mbar = master_bar(list(range(CFG.EP)))
        best_epoch, best_metric = 0, 100
        metric_names = ["epoch", "train_loss", "valid_loss", "metric","metric_thresh", "time"]
        mbar.write([f"{l:.6f}" if isinstance(l, float) else str(l) for l in metric_names], table=True)
        for epoch in range(CFG.EP):
            
                       
            start_time = time.time()
            mbar.update(epoch)
            
            train_metrics = train_one_epoch(
                model, loader_train, train_loss_fn, optimizer,
                lr_scheduler=lr_scheduler, mixup_fn=None, grad_scaler=grad_scaler, mbar=mbar)

            valid_metrics = validate(model, loader_valid, valid_loss_fn, mbar=mbar)
            
            elapsed = format_time(time.time() - start_time)
            epoch_log = [epoch,train_metrics["loss"], valid_metrics["loss"], valid_metrics["metric"],
                         valid_metrics["metric_thresh"], elapsed]
            mbar.write([f"{l:.6f}" if isinstance(l, float) else str(l) for l in epoch_log], table=True)

            if 1:
                best_epoch, best_metric = epoch, valid_metrics["loss"]
                path = Path(f'{MODEL_FOLDER}/fold_{fold}')
                os.makedirs(path,exist_ok=True)
                dirpath = path / (KERNEL_TYPE + f"_Epoch_{epoch}_fold_{fold}.pth")
                torch.save(model.state_dict(), dirpath)
            
        mbar.on_iter_end()
        print("*** Best metric: {0} (epoch {1})".format(best_metric, best_epoch))

In [None]:
if __name__ == "__main__":
    for fold_idx in [0,1,2,3]:
        training_loop(fold_idx)
        torch.cuda.empty_cache()
        gc.collect()

In [None]:
for i in range(5):
    torch.cuda.empty_cache()
    gc.collect()

In [None]:
def gen_oof(fold):
   
    torch.backends.cudnn.benchmark = True
    dataset_valid = RsnaDataset(train.query("fold==@fold").reset_index(drop=True), augs=VALID_AUG, mode="valid")
    ix =  train.query("fold==@fold").index
    print(f"VALID: {len(dataset_valid)}")

    loader_valid = torch.utils.data.DataLoader(dataset_valid, CFG.BS * 2, num_workers=8, shuffle=False)
    model = Net()
    model.load_state_dict(torch.load(f'{MODEL_FOLDER}/fold_{fold}/{KERNEL_TYPE}_Epoch_{CFG.EP-1}_fold_{fold}.pth'))
    model.eval()
    model.cuda()

    preds = []
    imageids = []

    for input,label,_,_,_,_,patient_id in tqdm(loader_valid, dynamic_ncols=True, desc="OOF Generation"):
        pred = []
        with torch.cuda.amp.autocast(), torch.no_grad():
            input = input.cuda()
            pred.append(F.sigmoid(model(input)[0]))
            torch.cuda.empty_cache()
            gc.collect()
        preds.append(torch.concat(pred).data.cpu().numpy())
    return np.concatenate(preds, axis=0),ix

In [None]:
oof = np.zeros((len(train)))
for k in tqdm([0,1,2,3]):
    oof_fold,ix = gen_oof(k)
    print(oof_fold.min(),oof_fold.max())
    oof[ix] += oof_fold

In [None]:
def optimal_f1_numpy(oof,fold):
    labels = train.loc[train['fold'].isin(fold)].reset_index(drop=True)['cancer'].values
    oof = oof[train.loc[train['fold'].isin(fold)].index]
    thres = np.linspace(0, 1, 100)
    f1s = [pfbeta(labels, oof > thr,clip=False) for thr in thres]
    idx = np.argmax(f1s)
    return f1s[idx], thres[idx]

In [None]:
scr, thresh = optimal_f1_numpy(oof,[0,1,2,3])

In [None]:
scr,thresh

### Fin 