# Introduction 

**This is a basic CNN Model training notebook**

It is based on: 
- Thumbnail images
- Basic data transformation (using Albumentation):
    - resizing images to 512x512
    - normalizing pixel values
- CNN Architecture


**Todos:**

- Learn about Dataset & DataLoader
- add augmentations (albumentation)
- gem pooling

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        # print(os.path.join(dirname, filename))
        continue

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
#!pip install focal_loss_torch
# from focal_loss.focal_loss import FocalLoss

In [3]:


import os
import gc
import cv2
import datetime
import math
import copy
import time
import random
import glob
from matplotlib import pyplot as plt
from skimage import io


# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from torch.cuda import amp
import torchvision

import optuna
from optuna.trial import TrialState

# Utils
import joblib
from tqdm import tqdm
from collections import defaultdict

# Sklearn Imports
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import balanced_accuracy_score, confusion_matrix, f1_score

# For Image Models
import timm

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

# For colored terminal text
from colorama import Fore, Back, Style
b_ = Fore.BLUE
sr_ = Style.RESET_ALL

import warnings
# warnings.filterwarnings("ignore")

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [4]:
from cancer_utils_thumbnails import UBCDataset, data_transforms, GeM, EfficientNetB0,EarlyStopping, predict_val_dataset
from cancer_utils_thumbnails import fetch_scheduler, get_optimizer, get_dataloaders, get_class_weights




In [5]:
CONFIG = {
    "is_submission": False,
    "crop_vertical": True,
    "weighted_loss": True,
    "datetime_now": datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), 
    "n_fold": 5,
    "fold": 1,
    "seed": 42,
    "img_size": 512,
    'center_crop_size': 1024,
    "model_name": "tf_efficientnet_b0_ns",
    "num_classes": 5,
    "train_batch_size": 16,
    "valid_batch_size": 16,
    "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
    "num_epochs": 30,
    "early_stopping": True,
    "patience": 7,
    "optimizer": 'Adam',
    "learning_rate": 1e-4,
    "scheduler": 'CosineAnnealingLR',
    "min_lr": 1e-6,
    "T_max": 30,
    "momentum": 0.9,
    "weight_decay": 1e-4,
}

## 1. Data Preparation

In [6]:
ROOT_DIR = '/kaggle/input/UBC-OCEAN'
TRAIN_DIR = '/kaggle/input/UBC-OCEAN/train_thumbnails'
TEST_DIR = '/kaggle/input/UBC-OCEAN/test_thumbnails'
ALT_TEST_DIR = '/kaggle/input/UBC-OCEAN/test_images'
TMA_TRAIN_DIR = '/kaggle/input/UBC-OCEAN/train_images'

def get_train_file_path(df_train_row):
    if df_train_row.is_tma == False:
        return f"{TRAIN_DIR}/{df_train_row.image_id}_thumbnail.png"
    else:
        return f"{TMA_TRAIN_DIR}/{df_train_row.image_id}.png"


def get_test_file_path(image_id):
    if os.path.exists(f"{TEST_DIR}/{image_id}_thumbnail.png"):
        return f"{TEST_DIR}/{image_id}_thumbnail.png"
    else:
        return f"{ALT_TEST_DIR}/{image_id}.png"

In [7]:
train_images = sorted(glob.glob(f"{TRAIN_DIR}/*.png"))
df_train = pd.read_csv("/kaggle/input/UBC-OCEAN/train.csv")
print(df_train.shape)
df_train['file_path'] = df_train.apply(lambda row: get_train_file_path(row), axis=1)
# only consider WSI / Thumbnail images
#df_train = df_train[ 
#    df_train["file_path"].isin(train_images) ].reset_index(drop=True)
print(df_train.shape)

# encode to numericalt target
encoder = LabelEncoder()
df_train['target_label'] = encoder.fit_transform(df_train['label'])

# save encoder
with open("label_encoder_"+ CONFIG["datetime_now"] +".pkl", "wb") as fp:
    joblib.dump(encoder, fp)
    
# use stratified K Fold for crossvalidation 
skf = StratifiedKFold(n_splits=CONFIG['n_fold'], shuffle=True, random_state=CONFIG["seed"])

for fold, ( _, val_) in enumerate(skf.split(X=df_train, y=df_train.target_label)):
    df_train.loc[val_ , "kfold"] = int(fold)
df_train.head()

(538, 5)
(538, 6)


Unnamed: 0,image_id,label,image_width,image_height,is_tma,file_path,target_label,kfold
0,4,HGSC,23785,20008,False,/kaggle/input/UBC-OCEAN/train_thumbnails/4_thu...,2,3.0
1,66,LGSC,48871,48195,False,/kaggle/input/UBC-OCEAN/train_thumbnails/66_th...,3,2.0
2,91,HGSC,3388,3388,True,/kaggle/input/UBC-OCEAN/train_images/91.png,2,4.0
3,281,LGSC,42309,15545,False,/kaggle/input/UBC-OCEAN/train_thumbnails/281_t...,3,2.0
4,286,EC,37204,30020,False,/kaggle/input/UBC-OCEAN/train_thumbnails/286_t...,1,2.0


In [8]:
df_test = pd.read_csv("/kaggle/input/UBC-OCEAN/test.csv")
df_test['file_path'] = df_test['image_id'].apply(get_test_file_path)
df_test["target_label"] = 0 
df_test

Unnamed: 0,image_id,image_width,image_height,file_path,target_label
0,41,28469,16987,/kaggle/input/UBC-OCEAN/test_thumbnails/41_thu...,0


In [9]:
data_transforms = {
    "train": A.Compose([
        A.RandomResizedCrop(CONFIG['img_size'], CONFIG['img_size'], scale=(0.8, 1.0)),
        A.HorizontalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.2),
        A.ShiftScaleRotate(shift_limit=0.125, scale_limit=0.2, rotate_limit=15, p=0.5),
        A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.5),
        A.CoarseDropout(p=0.2),
        A.Cutout(p=0.2),
        A.Normalize(
            mean=[0.485, 0.456, 0.406], 
            std=[0.229, 0.224, 0.225], 
            max_pixel_value=255.0, 
            p=1.0
        ),
        ToTensorV2()], p=1.),
    
    "valid": A.Compose([
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),
        A.Normalize(
            mean=[0.485, 0.456, 0.406], 
            std=[0.229, 0.224, 0.225], 
            max_pixel_value=255.0, 
            p=1.0
        ),
        ToTensorV2()], p=1.)
}



## 2. Model Creation

## 3. Training

In [10]:
def train_one_epoch(model, train_loader, optimizer, criterion, device, writer, epoch, scheduler=None):
    if torch.cuda.is_available():
        print("[INFO] Using GPU: {}\n".format(torch.cuda.get_device_name()))
    model.train()
    train_loss = 0.0
    bar = tqdm(enumerate(train_loader), total=len(train_loader))
    for step, data in bar:
        images = data['image'].to(device, dtype=torch.float)
        labels = data['label'].to(device, dtype=torch.long)

        optimizer.zero_grad()
        outputs = model(images)
        
        # crossentropy loss
        loss = criterion(outputs, labels)
        # Focal Loss
        #criterion = FocalLoss(gamma=0.7)
        #m = torch.nn.Softmax(dim=-1)
        #loss = criterion(m(outputs), labels)
        
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * images.size(0)
        # Update learning rate using the scheduler
        if scheduler:
            scheduler.step()
        
        # Log the training loss to TensorBoard
        writer.add_scalar('loss/train_batch', loss.item(), epoch * len(train_loader) + step)
    
    train_loss /= len(train_loader.dataset)
    # Log the average training loss for the epoch to TensorBoard
    writer.add_scalar('loss/train_epoch', train_loss, epoch)
    # gc.collect()
    return train_loss

def validate_one_epoch(model, valid_loader, criterion, device, writer, epoch):
    model.eval()
    valid_loss = 0.0
    valid_acc = 0.0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        bar_val = tqdm(enumerate(valid_loader), total=len(valid_loader))
        for step, data in bar_val:
            images = data['image'].to(device, dtype=torch.float)
            labels = data['label'].to(device, dtype=torch.long)
            outputs = model(images)
            
            # crossentropy loss
            loss = criterion(outputs, labels)
            # Focal Loss
            #criterion = FocalLoss(gamma=0.7)
            #m = torch.nn.Softmax(dim=-1)
            #loss = criterion(m(outputs), labels)
        
            valid_loss += loss.item() * images.size(0)
            _, predicted = torch.max(model.softmax(outputs), 1)
            acc = torch.sum( predicted == labels )
            valid_acc  += acc.item()
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
        
            writer.add_scalar('loss/valid_batch', loss.item(), epoch * len(valid_loader) + step)
            writer.add_scalar('acc/valid_batch', acc.item(), epoch * len(valid_loader) + step)
    valid_loss /= len(valid_loader.dataset)
    valid_acc /= len(valid_loader.dataset)
    bal_acc = balanced_accuracy_score(all_labels, all_preds)
    # At the end of your validation loop:
    macro_f1 = f1_score(all_labels, all_preds, average='macro')
    micro_f1 = f1_score(all_labels, all_preds, average='micro')
    weighted_f1 = f1_score(all_labels, all_preds, average='weighted')

    # Logging to TensorBoard
    writer.add_scalar('loss/val_epoch', valid_loss, epoch)
    writer.add_scalar('acc/val_epoch', valid_acc, epoch)
    writer.add_scalar('balanced_acc/val_epoch', bal_acc, epoch)
    writer.add_scalar('F1/macro', macro_f1, epoch)
    writer.add_scalar('F1/micro', micro_f1, epoch)
    writer.add_scalar('F1/weighted', weighted_f1, epoch)
    # in order to put multiple lines within one graph
    #writer.add_scalars('run_14h', {'xsinx':i*np.sin(i/r),
    #                        'xcosx':i*np.cos(i/r),
    #                        'tanx': np.tan(i/r)}, i)
    return valid_loss, valid_acc, bal_acc, weighted_f1

def train_model(model, train_loader, valid_loader, optimizer, criterion, device, num_epochs, scheduler, fold):
    model_name = "epochs" + str(CONFIG["num_epochs"]) + "_bs"+str(CONFIG["train_batch_size"] )+ "_opt" +CONFIG["optimizer"]+ "_sched" + CONFIG["scheduler"] + "_lr"+str(CONFIG["learning_rate"])+ "_wd" + str(CONFIG["weight_decay"]) + "_fold" + str(fold)
    print(f"Training model: {model_name}")
    # Initialize TensorBoard writer
    writer = SummaryWriter('logs/fit/' + model_name)
    early_stopping = EarlyStopping(patience=CONFIG["patience"], verbose=True, path=model_name)
    #if scheduler_type:
    #    print(f"Define {scheduler_type} scheduler")
    #    scheduler = get_lr_scheduler(optimizer, scheduler_type, num_epochs=num_epochs)
    
    for epoch in range(num_epochs):
        train_loss = train_one_epoch(model, train_loader, optimizer, criterion, device, writer, epoch, scheduler)
        valid_loss, valid_acc, bal_acc, weighted_f1 = validate_one_epoch(model, valid_loader, criterion, device, writer, epoch)
        print(f"Epoch {epoch+1}/{num_epochs} - Train loss: {train_loss:.4f}, Validation loss: {valid_loss:.4f}, Validation acc: {valid_acc:.4f}, Balanced acc: {bal_acc:.4f}, Weighted F1-Score: {weighted_f1:.4f}")
        
        # Call early stopping
        if CONFIG["early_stopping"]:
            early_stopping(valid_loss, model)
            if early_stopping.early_stop:
                print("Early stopping")
                break
            
    writer.close()
    return train_loss, valid_loss, valid_acc, model_name
    # Load the last checkpoint with the best model
    #model.load_state_dict(torch.load('best_model_checkpoint.pth'))

### Training N-Fold Models

In [11]:
if CONFIG["weighted_loss"]:
    class_weights = get_class_weights(df_train).to(CONFIG['device'], dtype=torch.float)
    print(f"Class weights: {class_weights}")
else:
    class_weights=None
criterion = nn.CrossEntropyLoss(weight=class_weights)

Class weights: tensor([0.1538, 0.1228, 0.0686, 0.3239, 0.3310])


In [12]:
for fold in range(0,5):
    print(f"Fold: {str(fold)}")
    train_loader, valid_loader, df_train_fold, df_valid_fold = get_dataloaders(df_train.copy(), fold, CONFIG)

    checkpoint_path='//kaggle/input/tf-efficientnet-b0-aa-827b6e33-pth/tf_efficientnet_b0_aa-827b6e33.pth'
    model = EfficientNetB0(CONFIG['model_name'], CONFIG['num_classes'], pretrained=False , checkpoint_path=checkpoint_path)
    model.to(CONFIG['device']);
    
    optimizer = get_optimizer(CONFIG["optimizer"], model, CONFIG)
    scheduler = fetch_scheduler(optimizer, CONFIG)
    
    _, _, _, model_name = train_model(model, train_loader, valid_loader, optimizer, criterion, CONFIG["device"], 1, scheduler, fold)
    model.load_state_dict(torch.load(model_name))
    
    df_validate = predict_val_dataset(model, CONFIG, df_valid_fold, encoder, TRAIN_DIR, val_size=1)
    df_validate

Fold: 0


  model = create_fn(


Training model: epochs30_bs16_optAdam_schedCosineAnnealingLR_lr0.0001_wd0.0001_fold0


100%|██████████| 27/27 [06:51<00:00, 15.22s/it]
100%|██████████| 7/7 [00:43<00:00,  6.24s/it]

Epoch 1/1 - Train loss: 1.6505, Validation loss: 1.6068, Validation acc: 0.2870, Balanced acc: 0.2987, Weighted F1-Score: 0.3159
Validation loss decreased (inf --> 1.606819). Saving model ...



100%|██████████| 7/7 [00:38<00:00,  5.47s/it]
  model = create_fn(


Validation Accuracy: 0.28703703703703703
Balanced Accuracy: 0.29866666666666664
Macro F1-Score: 0.25579710144927537
Micro F1-Score: 0.28703703703703703
Weighted F1-Score: 0.31592190016103067
Confusion Matrix: [[ 4  2  2  4  8]
 [ 7  4  5  2  7]
 [11  0 16  6 12]
 [ 0  1  1  2  5]
 [ 4  0  0  0  5]]
Fold: 1
Training model: epochs30_bs16_optAdam_schedCosineAnnealingLR_lr0.0001_wd0.0001_fold1


100%|██████████| 27/27 [06:58<00:00, 15.49s/it]
100%|██████████| 7/7 [00:36<00:00,  5.23s/it]

Epoch 1/1 - Train loss: 1.7187, Validation loss: 1.5000, Validation acc: 0.3704, Balanced acc: 0.3460, Weighted F1-Score: 0.3494
Validation loss decreased (inf --> 1.499975). Saving model ...



100%|██████████| 7/7 [00:43<00:00,  6.18s/it]
  model = create_fn(


Validation Accuracy: 0.37037037037037035
Balanced Accuracy: 0.346
Macro F1-Score: 0.31023366749667536
Micro F1-Score: 0.37037037037037035
Weighted F1-Score: 0.3493638028681982
Confusion Matrix: [[13  0  3  2  2]
 [ 9  2 13  1  0]
 [10  5 20  8  2]
 [ 4  0  2  3  0]
 [ 5  1  1  0  2]]
Fold: 2
Training model: epochs30_bs16_optAdam_schedCosineAnnealingLR_lr0.0001_wd0.0001_fold2


100%|██████████| 27/27 [06:51<00:00, 15.23s/it]
100%|██████████| 7/7 [00:37<00:00,  5.42s/it]

Epoch 1/1 - Train loss: 1.6607, Validation loss: 1.5732, Validation acc: 0.2315, Balanced acc: 0.2561, Weighted F1-Score: 0.2146
Validation loss decreased (inf --> 1.573236). Saving model ...



100%|██████████| 7/7 [00:35<00:00,  5.09s/it]
  model = create_fn(


Validation Accuracy: 0.23148148148148148
Balanced Accuracy: 0.2561052631578947
Macro F1-Score: 0.19483868594342052
Micro F1-Score: 0.23148148148148148
Weighted F1-Score: 0.21463050163432756
Confusion Matrix: [[ 4  4  8  0  3]
 [ 3  3 11  1  7]
 [ 6 16 11  0 11]
 [ 2  3  2  0  3]
 [ 0  1  2  0  7]]
Fold: 3
Training model: epochs30_bs16_optAdam_schedCosineAnnealingLR_lr0.0001_wd0.0001_fold3


100%|██████████| 27/27 [07:17<00:00, 16.20s/it]
100%|██████████| 7/7 [00:43<00:00,  6.23s/it]

Epoch 1/1 - Train loss: 1.6781, Validation loss: 1.5936, Validation acc: 0.2897, Balanced acc: 0.2688, Weighted F1-Score: 0.3070
Validation loss decreased (inf --> 1.593593). Saving model ...



100%|██████████| 7/7 [00:40<00:00,  5.72s/it]
  model = create_fn(


Validation Accuracy: 0.2897196261682243
Balanced Accuracy: 0.2688383838383839
Macro F1-Score: 0.26290406868234417
Micro F1-Score: 0.2897196261682243
Weighted F1-Score: 0.3069788210833307
Confusion Matrix: [[ 5  5  5  4  1]
 [ 1  5  8  6  4]
 [ 4  9 16 13  2]
 [ 0  1  5  3  1]
 [ 0  3  2  2  2]]
Fold: 4
Training model: epochs30_bs16_optAdam_schedCosineAnnealingLR_lr0.0001_wd0.0001_fold4


100%|██████████| 27/27 [08:18<00:00, 18.45s/it]
100%|██████████| 7/7 [00:46<00:00,  6.60s/it]

Epoch 1/1 - Train loss: 1.6434, Validation loss: 1.5115, Validation acc: 0.4766, Balanced acc: 0.3914, Weighted F1-Score: 0.4655
Validation loss decreased (inf --> 1.511496). Saving model ...



100%|██████████| 7/7 [00:42<00:00,  6.06s/it]

Validation Accuracy: 0.4766355140186916
Balanced Accuracy: 0.39135353535353534
Macro F1-Score: 0.37112750263435196
Micro F1-Score: 0.4766355140186916
Weighted F1-Score: 0.46551265694472216
Confusion Matrix: [[12  4  4  0  0]
 [ 6 17  2  0  0]
 [12  8 20  2  2]
 [ 5  3  1  0  0]
 [ 3  1  2  1  2]]





In [13]:
# model.load_state_dict(torch.load('/kaggle/working/best_model_checkpoint' + CONFIG["datetime_now"] + '.pth'))
#df_validate = predict_val_dataset(model, CONFIG, df_valid_fold, TRAIN_DIR, val_size=1)
#df_validate

### Optuna study

In [14]:
# Create a study.
"""
pruner = optuna.pruners.MedianPruner(
    n_startup_trials=3,  # Number of trials to run before starting to prune
    n_warmup_steps=100,   # Pruning is disabled for the first 30 steps
    interval_steps=5    # Check for pruning every 10 steps
)
optuna.logging.set_verbosity(optuna.logging.INFO)
study = optuna.create_study(study_name="tutorial", direction="minimize")
study.optimize(objective, n_trials=20)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

optuna.visualization.plot_param_importances(study)
optuna.visualization.plot_optimization_history(study)
"""

'\npruner = optuna.pruners.MedianPruner(\n    n_startup_trials=3,  # Number of trials to run before starting to prune\n    n_warmup_steps=100,   # Pruning is disabled for the first 30 steps\n    interval_steps=5    # Check for pruning every 10 steps\n)\noptuna.logging.set_verbosity(optuna.logging.INFO)\nstudy = optuna.create_study(study_name="tutorial", direction="minimize")\nstudy.optimize(objective, n_trials=20)\n\npruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])\ncomplete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])\n\nprint("Study statistics: ")\nprint("  Number of finished trials: ", len(study.trials))\nprint("  Number of pruned trials: ", len(pruned_trials))\nprint("  Number of complete trials: ", len(complete_trials))\n\nprint("Best trial:")\ntrial = study.best_trial\n\nprint("  Value: ", trial.value)\n\nprint("  Params: ")\nfor key, value in trial.params.items():\n    print("    {}: {}".format(key, value))\n\noptuna.visuali