# Introduction 

**This Notebook is used to train a CNN network on Cancer Subtype Classification**

Model: 
- Efficientnet

Data: 
- Tiled Dataset 
- Only Cancerous Tiles 
    - Based on Binary Classifier (cancer vs. non-cancer)
    - Remove all non-cancer tiles
- WSI & TMA

**Todos:**
- Scaling? 
    - TMA vs WSI 


In [1]:
# !pip install --quiet torch_optimizer
# import torch_optimizer as torch_optimizer


In [2]:
!pip install --quiet mlflow dagshub
import dagshub
import mlflow.pytorch 
from mlflow import MlflowClient


[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
ydata-profiling 4.3.1 requires dacite>=1.8, but you have dacite 1.6.0 which is incompatible.
ydata-profiling 4.3.1 requires scipy<1.11,>=1.4.1, but you have scipy 1.11.2 which is incompatible.[0m[31m
[0m



In [3]:
import os
import gc
import cv2
import datetime
import math
import copy
import time
import random
import glob
from matplotlib import pyplot as plt
from skimage import io


# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from torch.cuda import amp
import torchvision

import optuna
from optuna.trial import TrialState

# Utils
import joblib
from tqdm import tqdm
from collections import defaultdict


from PIL import Image
from joblib import Parallel, delayed
from tqdm.auto import tqdm

# Sklearn Imports
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import balanced_accuracy_score, confusion_matrix, f1_score
from torch.utils.data.sampler import WeightedRandomSampler

# For Image Models
import timm

from getpass import getpass

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

# For colored terminal text
from colorama import Fore, Back, Style
b_ = Fore.BLUE
sr_ = Style.RESET_ALL

import warnings
# warnings.filterwarnings("ignore")

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [4]:
from ubc_utils_models import UBCModel, get_optimizer, fetch_scheduler, EarlyStopping
from ubc_utils_infrastructure import get_train_file_path, get_test_file_path, print_logged_info, get_or_create_experiment_id, create_img_dataframe_from_directory, create_img_dataframe_from_folder
from ubc_utils_datasets import get_class_weights

In [5]:
os.environ["MLFLOW_TRACKING_USERNAME"]="Niggl0n"
os.environ["MLFLOW_TRACKING_PASSWORD"] = "7a3590e8c5558d4598dacc7810befa70a4baac9e"
os.environ['MLFLOW_TRACKING_PROJECTNAME'] = "UBC_Cancer_Classification"
#dagshub.auth.add_app_token("7a3590e8c5558d4598dacc7810befa70a4baac9e")
mlflow.set_tracking_uri(f'https://dagshub.com/' + os.environ['MLFLOW_TRACKING_USERNAME'] + '/' + os.environ['MLFLOW_TRACKING_PROJECTNAME'] + '.mlflow')

In [6]:
mlflow_experiment_id = get_or_create_experiment_id("UBC_Subtype_Classification_Cancer_Tiles")
mlflow_experiment_id


'7'

In [7]:
CONFIG = {
    "weighted_loss": True,
    "datetime_now": datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), 
    "n_fold":5, 
    "test_fold": 0,
    "seed": 42,
    "img_size": 512,
    "model_name": "tf_efficientnet_b0_ns",   # "tf_efficientnet_b0_ns", # "tf_efficientnetv2_s_in21ft1k"
    "checkpoint_path": "/kaggle/input/tf-efficientnet-b0-aa-827b6e33-pth/tf_efficientnet_b0_aa-827b6e33.pth",
    "num_classes": 5,
    "train_batch_size": 16,
    "valid_batch_size": 16,
    "n_tiles": 10,
    "n_tiles_test": 10,
    "top_n_tiles": 30,
    "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
    "num_epochs": 15,
    "early_stopping": True,
    "patience": 6,
    "optimizer": 'adam',
    "scheduler": 'CosineAnnealingLR',
    "min_lr": 1e-6,
    "T_max": 10,
    "momentum": 0.9,
    "weight_decay": 1e-4,
}

## 1. Data Preparation

# Todos: 
- tma image detection during data set creation
- 

In [8]:
ROOT_DIR = '/kaggle/input/UBC-OCEAN'
TRAIN_DIR = '/kaggle/input/tiles-of-cancer-2048px-scale-0-25/'
TMA_DIR = "/kaggle/input/ubc-tma-0125x-v3/UBC_TMA_tiles_4096px_scale0125_v3"


In [9]:
df_orig = pd.read_csv("/kaggle/input/UBC-OCEAN/train.csv")
df_orig = df_orig.rename(columns={"label":"subtype"})
df_cancer_tiles = pd.read_csv("/kaggle/input/ubc-label-tiles-full-dataset/tiles_025x_full_pseudo_labels.csv", index_col="Unnamed: 0")
display(df_orig.sample(5))
display(df_cancer_tiles.sample(5))

Unnamed: 0,image_id,subtype,image_width,image_height,is_tma
162,18981,HGSC,29086,35820,False
259,31300,LGSC,22848,19232,False
297,36063,CC,83724,35889,False
49,5307,HGSC,71975,27683,False
44,5015,HGSC,48645,44254,False


Unnamed: 0,image_path,prediction,probability
73875,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,0,0.001221
106881,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,0,0.378849
104805,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,1,0.867636
55806,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,0,0.232869
92128,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,0,0.341924


In [10]:
df_cancer_tiles["image_id"] = df_cancer_tiles["image_path"].map(lambda x: int(x.split('/')[-2]))
df_cancer_tiles = pd.merge(df_cancer_tiles, df_orig, on="image_id", how="left")
df_cancer_tiles.shape, df_cancer_tiles.image_id.nunique()

((123714, 8), 538)

In [11]:
df_cancer_tiles["is_tma"] = df_cancer_tiles["is_tma"].astype(bool)
df_cancer_tiles = df_cancer_tiles[df_cancer_tiles["is_tma"]==False]
df_all_tiles = df_cancer_tiles.copy()
print(df_cancer_tiles.image_id.nunique())
df_cancer_tiles = df_cancer_tiles[df_cancer_tiles["probability"]<0.5]
print(df_cancer_tiles.image_id.nunique())

513
513


In [12]:
N = CONFIG["top_n_tiles"]
result_df = df_cancer_tiles.groupby('image_id').apply(lambda x: x.nsmallest(N, 'probability')).reset_index(drop=True)
result_df.image_id.value_counts()[:2]

image_id
4        30
40129    30
Name: count, dtype: int64

In [13]:
result_df.image_id.value_counts()[-30:]

image_id
21910    30
21445    30
20205    30
21432    30
21373    30
65533    30
21260    30
20882    30
20312    30
20316    30
20329    30
20463    30
20858    30
54825    29
9697     27
21232    27
14312    26
281      26
34508    26
13364    26
29888    24
20670    24
32192    22
63836    21
54506    20
63298    18
47984    11
57100     8
9154      7
51215     5
Name: count, dtype: int64

In [14]:
result_df.shape, df_cancer_tiles.shape

((15227, 8), (87506, 8))

In [15]:
result_df.head(2)

Unnamed: 0,image_path,prediction,probability,image_id,subtype,image_width,image_height,is_tma
0,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,0,1e-05,4,HGSC,23785,20008,False
1,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,0,1.6e-05,4,HGSC,23785,20008,False


In [16]:
df_tma = create_img_dataframe_from_directory(TMA_DIR)
df_tma["image_id"] = df_tma["image_id"].astype(int)
df_tma = pd.merge(df_tma, df_orig, on="image_id", how="left")
df_tma.head(2)

Unnamed: 0,image_path,image_id,subtype,image_width,image_height,is_tma
0,/kaggle/input/ubc-tma-0125x-v3/UBC_TMA_tiles_4...,29084,LGSC,3388,3388,True
1,/kaggle/input/ubc-tma-0125x-v3/UBC_TMA_tiles_4...,17637,HGSC,2964,2964,True


In [17]:
df_masks = create_img_dataframe_from_folder("/kaggle/input/ubc-ovarian-cancer-competition-supplemental-masks/")
df_masks.head(2)

Unnamed: 0,image_path,image_id
0,/kaggle/input/ubc-ovarian-cancer-competition-s...,39146
1,/kaggle/input/ubc-ovarian-cancer-competition-s...,19030


In [18]:
cols = ['image_path', 'image_id', 'subtype', 'image_width', 'image_height', 'is_tma']
df_train = pd.concat([df_tma[cols], result_df[cols]], axis=0, ignore_index=True)
df_train = df_train.rename(columns={"subtype":"label"})
print(df_train.shape, df_train.image_id.nunique())

# encode to numericalt target
encoder = LabelEncoder()
df_train['target_label'] = encoder.fit_transform(df_train['label'])

# save encoder
with open("label_encoder.pkl", "wb") as fp:
    joblib.dump(encoder, fp)
    
# use stratified K Fold for crossvalidation 
skf = StratifiedKFold(n_splits=CONFIG['n_fold'], shuffle=True, random_state=CONFIG["seed"])

for fold, ( _, val_) in enumerate(skf.split(X=df_train, y=df_train.target_label)):
    df_train.loc[val_ , "kfold"] = int(fold)
display(df_train.head())

# assert that images for which we have masks are not part of test set (avoid information leakage)
df_train.loc[df_train["image_id"].isin(df_masks["image_id"]), "kfold"] = CONFIG["n_fold"] + 1
display(df_train["kfold"].value_counts())
# separate train and test dataset
df_test = df_train[df_train["kfold"]==CONFIG["test_fold"]].reset_index(drop=True)
df_train = df_train[df_train["kfold"]!=CONFIG["test_fold"]].reset_index(drop=True)
print(f"Shape df_train: {df_train.shape}, Shape df_test: {df_test.shape} ")
display(df_train.label.value_counts())

(15252, 6) 538


Unnamed: 0,image_path,image_id,label,image_width,image_height,is_tma,target_label,kfold
0,/kaggle/input/ubc-tma-0125x-v3/UBC_TMA_tiles_4...,29084,LGSC,3388,3388,True,3,0.0
1,/kaggle/input/ubc-tma-0125x-v3/UBC_TMA_tiles_4...,17637,HGSC,2964,2964,True,2,4.0
2,/kaggle/input/ubc-tma-0125x-v3/UBC_TMA_tiles_4...,48734,EC,3388,3388,True,1,0.0
3,/kaggle/input/ubc-tma-0125x-v3/UBC_TMA_tiles_4...,36302,CC,3388,3388,True,0,0.0
4,/kaggle/input/ubc-tma-0125x-v3/UBC_TMA_tiles_4...,8280,HGSC,2964,2964,True,2,2.0


kfold
6.0    4557
2.0    2165
1.0    2152
3.0    2136
4.0    2126
0.0    2116
Name: count, dtype: int64

Shape df_train: (13136, 8), Shape df_test: (2116, 8) 


label
HGSC    5462
EC      3061
CC      2471
LGSC    1086
MC      1056
Name: count, dtype: int64

In [19]:
class CancerTilesDataset(Dataset):
    """
    @staticmethod
    def get_img_dir(data_row):
        # based on if is_tma or not we select the respective image path
        if data_row.is_tma == True:
            return glob.glob(os.path.join("/kaggle/input/ubc-tma-tiles-05-512/UBC_TMA_tiles_1024px_scale05_v7", str(data_row.image_id), "*.png"))
        else:
            return glob.glob(os.path.join("/kaggle/input/tiles-of-cancer-2048px-scale-0-25", str(data_row.image_id), "*.png")) 
    """
    
    @staticmethod
    def get_n_random_samples(df_data, N):
        sampled_df = df_data.groupby('image_id').apply(lambda x: x.sample(min(len(x), N), replace=False)).reset_index(drop=True)    
        return sampled_df

    def __init__(
        self,
        df_data,
        path_img_dir: str =  '',
        transforms = None,
        mode: str = 'train',
        labels_lut = None,
        #white_thr: int = 225,
        thr_max_bg: float = 0.2,  # depricated
        train_val_split: float = 0.90,
        n_tiles: int = 1,
        tma_weight: float = 1.0,
    ):
        assert os.path.isdir(path_img_dir)
        #self.path_img_dir = path_img_dir
        self.transforms = transforms
        self.mode = mode
        #self.white_thr = white_thr
        #self.thr_max_bg = thr_max_bg
        self.train_val_split = train_val_split
        self.n_tiles = n_tiles
        self.tma_weight = tma_weight
        
        self.n_tiles_per_image_id = n_tiles
        self.data = df_data
        self.labels_unique = sorted(self.data["label"].unique())
        self.labels_lut = labels_lut or {lb: i for i, lb in enumerate(self.labels_unique)}

        self.data.is_tma = self.data.is_tma.astype(bool)
        self.data = self.data.sample(frac=1, random_state=42).reset_index(drop=True)

        # split dataset
        assert 0.0 <= self.train_val_split <= 1.0
        frac = int(self.train_val_split * len(self.data))
        self.data = self.data[:frac] if mode in ["train", "test"] else self.data[frac:]
        self.labels =  np.array(self.data.target_label.values.tolist())
        self.img_paths =  self.data.image_path.values.tolist()

        # set higher sample weights for tma samples
        # self.sample_weights =  np.array([self.tma_weight if is_tma == True else 1 for is_tma in self.data["is_tma"]]) 

        
    def __getitem__(self, idx: int) -> tuple:
        """
        nth_iteration = idx//len(self.data)
        if self.mode=="train":
            random.seed()
        else:
            random.seed(CONFIG["seed"]+nth_iteration)
        random.shuffle(self.img_dirs[idx])
        """
        img_path = self.img_paths[idx]
        assert os.path.isfile(img_path), f"missing: {img_path}"
        tile = cv2.imread(img_path)
        tile = cv2.cvtColor(tile, cv2.COLOR_BGR2RGB)

        if self.transforms:
            tile = self.transforms(image=tile)["image"]
        return {
            "image": tile,
            "label": torch.tensor(self.labels[idx], dtype=torch.long),
            "image_path": img_path,
               }
    
    def __len__(self) -> int:
        return len(self.img_paths)
    
    def get_sample_weights(self):
        return torch.from_numpy(self.sample_weights).double()
    
    def on_epoch_start(self):
        self.data = self.get_n_random_samples(self.data, self.n_tiles_per_image_id)
        self.labels =  np.array(self.data.target_label.values.tolist())
        self.img_paths =  self.data.image_path.values.tolist()
        # set higher sample weights for tma samples
        self.sample_weights =  np.array([self.tma_weight if is_tma == True else 1 for is_tma in self.data["is_tma"]]) 
        


In [20]:
img_color_mean=[0.8661704276539922, 0.7663107094675368, 0.8574260897185548]
img_color_std=[0.08670629753900036, 0.11646580094195522, 0.07164169171856792]

data_transforms = {
    "train": A.Compose([
        A.Resize(512, 512),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        # A.RandomBrightnessContrast(p=0.75),
        A.ShiftScaleRotate(p=0.75),
        A.OneOf([
        A.GaussNoise(var_limit=[10, 50]),
        A.GaussianBlur(),
        A.MotionBlur(),
        ], p=0.4),
        A.GridDistortion(num_steps=5, distort_limit=0.3, p=0.5),
        A.CoarseDropout(max_holes=5, max_width=int(512* 0.1), max_height=int(512* 0.1),
        mask_fill_value=0, p=0.5),
        A.Normalize(img_color_mean, img_color_std), 
        ToTensorV2()], p=1.),
    
    "valid": A.Compose([
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),
        A.Normalize(img_color_mean, img_color_std), 
        ToTensorV2()], p=1.)
}



df_cancer_tiles## 3. Training

In [21]:
def train_one_epoch(model, train_loader, optimizer, criterion, device, writer, epoch, scheduler=None):
    if torch.cuda.is_available():
        print("[INFO] Using GPU: {}\n".format(torch.cuda.get_device_name()))
    model.train()
    train_loss = 0.0
    bar = tqdm(enumerate(train_loader), total=len(train_loader))
    for step, data in bar:
        images = data['image'].to(device, dtype=torch.float)
        labels = data['label'].to(device, dtype=torch.long)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * images.size(0)
        writer.add_scalar('loss/train_batch', loss.item(), epoch * len(train_loader) + step)
    
    if scheduler:
        scheduler.step()
    train_loss /= len(train_loader.dataset)
    writer.add_scalar('loss/train_epoch', train_loss, epoch)
    return train_loss

def validate_one_epoch(model, valid_loader, criterion, device, writer, epoch):
    model.eval()
    valid_loss = 0.0
    valid_acc = 0.0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        bar_val = tqdm(enumerate(valid_loader), total=len(valid_loader))
        for step, data in bar_val:
            images = data['image'].to(device, dtype=torch.float)
            labels = data['label'].to(device, dtype=torch.long)
            outputs = model(images)
            loss = criterion(outputs, labels)
        
            valid_loss += loss.item() * images.size(0)
            _, predicted = torch.max(model.softmax(outputs), 1)
            acc = torch.sum( predicted == labels )
            valid_acc  += acc.item()
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
        
            writer.add_scalar('loss/valid_batch', loss.item(), epoch * len(valid_loader) + step)
            writer.add_scalar('acc/valid_batch', acc.item(), epoch * len(valid_loader) + step)
    valid_loss /= len(valid_loader.dataset)
    valid_acc /= len(valid_loader.dataset)
    bal_acc = balanced_accuracy_score(all_labels, all_preds)
    # At the end of your validation loop:
    macro_f1 = f1_score(all_labels, all_preds, average='macro')
    micro_f1 = f1_score(all_labels, all_preds, average='micro')
    weighted_f1 = f1_score(all_labels, all_preds, average='weighted')

    # Logging to TensorBoard
    writer.add_scalar('loss/val_epoch', valid_loss, epoch)
    writer.add_scalar('acc/val_epoch', valid_acc, epoch)
    writer.add_scalar('balanced_acc/val_epoch', bal_acc, epoch)
    writer.add_scalar('F1/macro', macro_f1, epoch)
    writer.add_scalar('F1/micro', micro_f1, epoch)
    writer.add_scalar('F1/weighted', weighted_f1, epoch)
    return valid_loss, valid_acc, bal_acc, weighted_f1

def train_model(model, train_dataset, valid_dataset, optimizer, criterion, device, num_epochs, scheduler, save_model_path=None):
    model_name = "model_epochs" + str(CONFIG["num_epochs"]) + "_bs"+str(CONFIG["train_batch_size"] )+ "_opt" +CONFIG["optimizer"]+ "_sched" + CONFIG["scheduler"] + "_lr"+str(CONFIG["learning_rate"])+ "_wd" + str(CONFIG["weight_decay"])
    print(f"Training model: {model_name}")
    datetime_now =  datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    if not save_model_path:
        save_model_path = 'best_model_checkpoint' + datetime_now + '.pth'
    print(f"Path for saving model: {save_model_path}")

    writer = SummaryWriter('logs/fit/' + model_name)
    early_stopping = EarlyStopping(patience=CONFIG["patience"], verbose=True, path=save_model_path)
    
    for epoch in range(num_epochs):
        # resample from images
        train_dataset.on_epoch_start()
        sample_weights = train_dataset.get_sample_weights()
        sampler = WeightedRandomSampler(sample_weights, len(sample_weights))
        train_loader = DataLoader(train_dataset, batch_size=CONFIG['train_batch_size'], num_workers=2, sampler=sampler, shuffle=False, pin_memory=True)
    
        # resample from images
        valid_dataset.on_epoch_start()
        sample_weights = valid_dataset.get_sample_weights()
        sampler = WeightedRandomSampler(sample_weights, len(sample_weights))
        valid_loader = DataLoader(valid_dataset, batch_size=CONFIG['train_batch_size'], num_workers=2, sampler=sampler, shuffle=False, pin_memory=True)
        
        train_loss = train_one_epoch(model, train_loader, optimizer, criterion, device, writer, epoch, scheduler)
        valid_loss, valid_acc, bal_acc, weighted_f1 = validate_one_epoch(model, valid_loader, criterion, device, writer, epoch)
        print(f"Epoch {epoch+1}/{num_epochs} - Train loss: {train_loss:.4f}, Validation loss: {valid_loss:.4f}, Validation acc: {valid_acc:.4f}, Balanced acc: {bal_acc:.4f}, Weighted F1-Score: {weighted_f1:.4f}")
        # Call early stopping
        if CONFIG["early_stopping"]:
            early_stopping(valid_loss, model)
            if early_stopping.early_stop:
                print("Early stopping")
                break

    writer.close()
    mlflow.log_metrics({
        'epoch': epoch,
        'train_loss': train_loss,
        'valid_loss': valid_loss,
        'valid_acc': valid_acc,
        'balanced_acc': bal_acc,
        'weighted_f1': weighted_f1
    }, step=epoch)      
    return train_loss, valid_loss, valid_acc, save_model_path



In [22]:
def test_on_holdout(model, CONFIG, df_test, TRAIN_DIR=None, val_size=1.0, n_tiles=1):

    model.eval()
    test_dataset = CancerTilesDataset(df_test, TRAIN_DIR, transforms=data_transforms["valid"], mode="test", train_val_split=1.0, n_tiles=n_tiles)
    test_loader = DataLoader(test_dataset, batch_size=CONFIG['valid_batch_size'], 
                              num_workers=2, shuffle=False, pin_memory=True)
    print(f"Test-Dataset Size: {len(test_dataset)}")

    preds_list = []
    labels_list = []
    image_path_list = []

    test_acc = 0.0

    with torch.no_grad():
        bar = tqdm(enumerate(test_loader), total=len(test_loader))
        for step, data in bar: 
            # print(step)
            images = data['image'].to(CONFIG["device"], dtype=torch.float)
            labels = data['label'].to(CONFIG["device"], dtype=torch.long)
            img_paths = data['image_path']
            image_path_list.extend(img_paths)

            batch_size = images.size(0)
            outputs = model(images)
            _, predicted = torch.max(model.softmax(outputs), 1)
            preds_list.append(predicted.detach().cpu().numpy() )
            labels_list.append(labels.detach().cpu().numpy() )
            acc = torch.sum(predicted == labels )
            test_acc  += acc.item()
    labels_list = np.concatenate(labels_list).flatten()
    preds_list = np.concatenate(preds_list).flatten()
    pred_labels = encoder.inverse_transform( preds_list )
    df_result = pd.DataFrame({
        "image_path": image_path_list,
        "label":labels_list,
        "predicted_label":pred_labels,
        "prediction": preds_list,
    })
    
    # calculate & log metrics
    test_acc /= len(test_loader.dataset)
    bal_acc = balanced_accuracy_score(labels_list, preds_list)
    conf_matrix = confusion_matrix(labels_list, preds_list)
    macro_f1 = f1_score(labels_list, preds_list, average='macro')
    print(f"Test Accuracy: {test_acc}")
    print(f"Balanced Accuracy: {bal_acc}")
    print(f"Confusion Matrix: {conf_matrix}")
    mlflow.log_metrics({
        'test_acc': test_acc,
        'test_balanced_acc': bal_acc,
        'test_f1_score': macro_f1,
    })
    return df_result

In [23]:

if CONFIG["weighted_loss"]:
    class_weights = get_class_weights(df_train).to(CONFIG['device'], dtype=torch.float)
    print(f"Class weights: {class_weights}")
else:
    class_weights=None
criterion = nn.CrossEntropyLoss(weight=class_weights)

Class weights: tensor([0.1455, 0.1174, 0.0658, 0.3310, 0.3404], device='cuda:0')


In [24]:
from ubc_utils_datasets import get_dataloaders
def get_dataloaders(df, TRAIN_DIR, CONFIG, data_transforms, n_tiles=1, train_val_split=0.9,thr_max_bg=0.1, apply_sampler=True, tma_weight=1, sample_fac=1):
    # df_train = df[df["kfold"]!=fold].reset_index(drop=True)
    train_dataset = CancerTilesDataset(df, TRAIN_DIR, transforms=data_transforms["train"], mode="train", n_tiles=n_tiles, train_val_split=train_val_split, thr_max_bg=thr_max_bg, tma_weight=tma_weight)
    if apply_sampler:
        samples_weights = train_dataset.get_sample_weights()
        train_sampler = WeightedRandomSampler(samples_weights, len(samples_weights)*sample_fac)
    else:
        train_sampler = None
    train_loader = DataLoader(train_dataset, batch_size=CONFIG['train_batch_size'], num_workers=2, sampler=train_sampler, shuffle=False, pin_memory=True)
    
    valid_dataset = CancerTilesDataset(df, TRAIN_DIR, transforms=data_transforms["valid"], mode="valid", n_tiles=n_tiles, train_val_split=train_val_split, thr_max_bg=thr_max_bg, tma_weight=tma_weight)
    if apply_sampler:
        samples_weights = valid_dataset.get_sample_weights()
        valid_sampler = WeightedRandomSampler(samples_weights, len(samples_weights)*sample_fac)
    else:
        valid_sampler=None
    valid_loader = DataLoader(valid_dataset, batch_size=CONFIG['valid_batch_size'], num_workers=2, sampler=valid_sampler, shuffle=False, pin_memory=True)
    print(f"Len Train Dataset: {len(train_dataset)}, Len Validation Dataset: {len(valid_dataset)}" )
    return train_loader, valid_loader, df

In [25]:
CONFIG["num_epochs"] = 20
CONFIG["learning_rate"] = 1e-5
CONFIG["n_tiles"] = 10
#CONFIG["n_tiles_test"] = 20
CONFIG["tma_weight"] = 1


# df_train__ = df_train.head(10)

print(f"Shape df_train: {df_train.shape}, Shape df_test: {df_test.shape}")
with mlflow.start_run(experiment_id=mlflow_experiment_id) as run:
    # train_loader, valid_loader, df_train_fold = get_dataloaders(df_train.copy(), TRAIN_DIR, CONFIG, data_transforms, tma_weight=CONFIG["tma_weight"])
    train_dataset = CancerTilesDataset(df_train, TRAIN_DIR, transforms=data_transforms["train"], mode="train", n_tiles=CONFIG["n_tiles"], train_val_split=0.9, tma_weight=CONFIG["tma_weight"])
    valid_dataset = CancerTilesDataset(df_train, TRAIN_DIR, transforms=data_transforms["valid"], mode="valid", n_tiles=CONFIG["n_tiles"], train_val_split=0.9, tma_weight=CONFIG["tma_weight"])

    model = UBCModel(CONFIG['model_name'], CONFIG['num_classes'], pretrained=False , checkpoint_path=CONFIG["checkpoint_path"])
    # model.load_state_dict(torch.load(CONFIG["checkpoint_path"]))
    model.to(CONFIG['device']);

    optimizer = get_optimizer(CONFIG["optimizer"], model, CONFIG)
    scheduler = fetch_scheduler(optimizer, CONFIG)
    _, _, _, save_model_path = train_model(model, train_dataset, valid_dataset, optimizer, criterion, CONFIG["device"], CONFIG["num_epochs"], scheduler)
    model.load_state_dict(torch.load(save_model_path))
    
    print("Validate on Holdout Set:")
    df_test_result = test_on_holdout(model, CONFIG, df_test, TRAIN_DIR, val_size=1, n_tiles=CONFIG["n_tiles_test"])
    df_test_file_path = "df_test_results.csv"
    df_test_result.to_csv(df_test_file_path, index=False)
    mlflow.log_params(CONFIG)
    mlflow.pytorch.log_model(model, "model")
    mlflow.log_params({"model_path": save_model_path})
    mlflow.log_artifact(df_test_file_path)
    print_logged_info(mlflow.get_run(run_id=run.info.run_id))



Shape df_train: (13136, 8), Shape df_test: (2116, 8)
Training model: model_epochs20_bs16_optadam_schedCosineAnnealingLR_lr0.0001_wd1e-05
Path for saving model: best_model_checkpoint2023-12-15_09-38-24.pth
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/321 [00:00<?, ?it/s]

  0%|          | 0/83 [00:00<?, ?it/s]

Epoch 1/20 - Train loss: 1.0857, Validation loss: 0.6833, Validation acc: 0.7367, Balanced acc: 0.7461, Weighted F1-Score: 0.7350
Validation loss decreased (inf --> 0.683346). Saving model ...
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/321 [00:00<?, ?it/s]

  0%|          | 0/83 [00:00<?, ?it/s]

Epoch 2/20 - Train loss: 0.7113, Validation loss: 0.5818, Validation acc: 0.7595, Balanced acc: 0.7977, Weighted F1-Score: 0.7611
Validation loss decreased (0.683346 --> 0.581750). Saving model ...
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/321 [00:00<?, ?it/s]

  0%|          | 0/83 [00:00<?, ?it/s]

Epoch 3/20 - Train loss: 0.6067, Validation loss: 0.4842, Validation acc: 0.7527, Balanced acc: 0.8221, Weighted F1-Score: 0.7512
Validation loss decreased (0.581750 --> 0.484203). Saving model ...
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/321 [00:00<?, ?it/s]

  0%|          | 0/83 [00:00<?, ?it/s]

Epoch 4/20 - Train loss: 0.4982, Validation loss: 0.4710, Validation acc: 0.7907, Balanced acc: 0.8187, Weighted F1-Score: 0.7908
Validation loss decreased (0.484203 --> 0.471032). Saving model ...
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/321 [00:00<?, ?it/s]

  0%|          | 0/83 [00:00<?, ?it/s]

Epoch 5/20 - Train loss: 0.4742, Validation loss: 0.4343, Validation acc: 0.7823, Balanced acc: 0.8491, Weighted F1-Score: 0.7826
Validation loss decreased (0.471032 --> 0.434306). Saving model ...
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/321 [00:00<?, ?it/s]

  0%|          | 0/83 [00:00<?, ?it/s]

Epoch 6/20 - Train loss: 0.4062, Validation loss: 0.3564, Validation acc: 0.8212, Balanced acc: 0.8724, Weighted F1-Score: 0.8197
Validation loss decreased (0.434306 --> 0.356397). Saving model ...
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/321 [00:00<?, ?it/s]

  0%|          | 0/83 [00:00<?, ?it/s]

Epoch 7/20 - Train loss: 0.4000, Validation loss: 0.3660, Validation acc: 0.8341, Balanced acc: 0.8710, Weighted F1-Score: 0.8331
EarlyStopping counter: 1 out of 6
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/321 [00:00<?, ?it/s]

  0%|          | 0/83 [00:00<?, ?it/s]

Epoch 8/20 - Train loss: 0.3672, Validation loss: 0.3229, Validation acc: 0.8470, Balanced acc: 0.8814, Weighted F1-Score: 0.8482
Validation loss decreased (0.356397 --> 0.322927). Saving model ...
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/321 [00:00<?, ?it/s]

  0%|          | 0/83 [00:00<?, ?it/s]

Epoch 9/20 - Train loss: 0.3501, Validation loss: 0.3357, Validation acc: 0.8432, Balanced acc: 0.8721, Weighted F1-Score: 0.8435
EarlyStopping counter: 1 out of 6
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/321 [00:00<?, ?it/s]

  0%|          | 0/83 [00:00<?, ?it/s]

Epoch 10/20 - Train loss: 0.3346, Validation loss: 0.3204, Validation acc: 0.8417, Balanced acc: 0.8903, Weighted F1-Score: 0.8414
Validation loss decreased (0.322927 --> 0.320420). Saving model ...
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/321 [00:00<?, ?it/s]

  0%|          | 0/83 [00:00<?, ?it/s]

Epoch 11/20 - Train loss: 0.3487, Validation loss: 0.3799, Validation acc: 0.8303, Balanced acc: 0.8627, Weighted F1-Score: 0.8292
EarlyStopping counter: 1 out of 6
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/321 [00:00<?, ?it/s]

  0%|          | 0/83 [00:00<?, ?it/s]

Epoch 12/20 - Train loss: 0.3454, Validation loss: 0.3634, Validation acc: 0.8409, Balanced acc: 0.8774, Weighted F1-Score: 0.8405
EarlyStopping counter: 2 out of 6
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/321 [00:00<?, ?it/s]

  0%|          | 0/83 [00:00<?, ?it/s]

Epoch 13/20 - Train loss: 0.3482, Validation loss: 0.3151, Validation acc: 0.8562, Balanced acc: 0.8957, Weighted F1-Score: 0.8568
Validation loss decreased (0.320420 --> 0.315100). Saving model ...
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/321 [00:00<?, ?it/s]

  0%|          | 0/83 [00:00<?, ?it/s]

Epoch 14/20 - Train loss: 0.3347, Validation loss: 0.3177, Validation acc: 0.8387, Balanced acc: 0.8888, Weighted F1-Score: 0.8370
EarlyStopping counter: 1 out of 6
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/321 [00:00<?, ?it/s]

  0%|          | 0/83 [00:00<?, ?it/s]

Epoch 15/20 - Train loss: 0.3321, Validation loss: 0.3658, Validation acc: 0.8105, Balanced acc: 0.8695, Weighted F1-Score: 0.8096
EarlyStopping counter: 2 out of 6
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/321 [00:00<?, ?it/s]

  0%|          | 0/83 [00:00<?, ?it/s]

Epoch 16/20 - Train loss: 0.3311, Validation loss: 0.3748, Validation acc: 0.8425, Balanced acc: 0.8703, Weighted F1-Score: 0.8413
EarlyStopping counter: 3 out of 6
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/321 [00:00<?, ?it/s]

  0%|          | 0/83 [00:00<?, ?it/s]

Epoch 17/20 - Train loss: 0.3349, Validation loss: 0.3058, Validation acc: 0.8463, Balanced acc: 0.8896, Weighted F1-Score: 0.8461
Validation loss decreased (0.315100 --> 0.305778). Saving model ...
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/321 [00:00<?, ?it/s]

  0%|          | 0/83 [00:00<?, ?it/s]

Epoch 18/20 - Train loss: 0.3291, Validation loss: 0.2749, Validation acc: 0.8729, Balanced acc: 0.9072, Weighted F1-Score: 0.8716
Validation loss decreased (0.305778 --> 0.274904). Saving model ...
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/321 [00:00<?, ?it/s]

  0%|          | 0/83 [00:00<?, ?it/s]

Epoch 19/20 - Train loss: 0.3190, Validation loss: 0.3170, Validation acc: 0.8691, Balanced acc: 0.8944, Weighted F1-Score: 0.8688
EarlyStopping counter: 1 out of 6
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/321 [00:00<?, ?it/s]

  0%|          | 0/83 [00:00<?, ?it/s]

Epoch 20/20 - Train loss: 0.2825, Validation loss: 0.3700, Validation acc: 0.8425, Balanced acc: 0.8844, Weighted F1-Score: 0.8430
EarlyStopping counter: 2 out of 6
Validate on Holdout Set:
Test-Dataset Size: 2116


  0%|          | 0/133 [00:00<?, ?it/s]

Test Accuracy: 0.8596408317580341
Balanced Accuracy: 0.8671904210053153
Confusion Matrix: [[336   3   5   3   7]
 [ 18 389  50  10  10]
 [ 39  57 796  33  14]
 [  7   9  16 143   0]
 [  6   6   3   1 155]]
run_id: 10eb4fe1bcca4374808600daacb9784c
artifacts: ['model/MLmodel', 'model/conda.yaml', 'model/data', 'model/python_env.yaml', 'model/requirements.txt']
params: {'model_path': 'best_model_checkpoint2023-12-15_09-38-24.pth', 'weighted_loss': 'True', 'datetime_now': '2023-12-15_09-38-15', 'n_fold': '5', 'test_fold': '0', 'seed': '42', 'img_size': '512', 'model_name': 'tf_efficientnet_b0_ns', 'checkpoint_path': '/kaggle/input/tf-efficientnet-b0-aa-827b6e33-pth/tf_efficientnet_b0_aa-827b6e33.pth', 'num_classes': '5', 'train_batch_size': '16', 'valid_batch_size': '16', 'n_tiles': '10', 'n_tiles_test': '10', 'top_n_tiles': '30', 'device': 'cuda:0', 'num_epochs': '20', 'early_stopping': 'True', 'patience': '6', 'optimizer': 'adam', 'scheduler': 'CosineAnnealingLR', 'min_lr': '1e-06', 'T_m

In [26]:
df_test_result.head()

Unnamed: 0,image_path,label,predicted_label,prediction
0,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,1,EC,1
1,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,2,HGSC,2
2,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,1,EC,1
3,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,3,LGSC,3
4,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,2,HGSC,2


In [27]:
"""
model = UBCModel(CONFIG['model_name'], CONFIG['num_classes'], pretrained=False , checkpoint_path=None)
model.load_state_dict(torch.load("/kaggle/input/effnet-version-28/best_model_checkpoint2023-11-21_15-47-39.pth"))
model.to(CONFIG['device']);
df_test = test_on_holdout(model, CONFIG, df_test, TRAIN_DIR, val_size=1, n_tiles=CONFIG["n_tiles_test"])
"""

'\nmodel = UBCModel(CONFIG[\'model_name\'], CONFIG[\'num_classes\'], pretrained=False , checkpoint_path=None)\nmodel.load_state_dict(torch.load("/kaggle/input/effnet-version-28/best_model_checkpoint2023-11-21_15-47-39.pth"))\nmodel.to(CONFIG[\'device\']);\ndf_test = test_on_holdout(model, CONFIG, df_test, TRAIN_DIR, val_size=1, n_tiles=CONFIG["n_tiles_test"])\n'