# Introduction 

**This is a basic CNN Model training notebook**

It is based on: 
- Thumbnail images
- Basic data transformation (using Albumentation):
    - resizing images to 512x512
    - normalizing pixel values
- CNN Architecture


**Todos:**

- Learn about Dataset & DataLoader
- add augmentations (albumentation)
- gem pooling

In [1]:
# !pip install --quiet torch_optimizer

In [2]:
!pip install --quiet mlflow dagshub

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
ydata-profiling 4.3.1 requires dacite>=1.8, but you have dacite 1.6.0 which is incompatible.
ydata-profiling 4.3.1 requires scipy<1.11,>=1.4.1, but you have scipy 1.11.2 which is incompatible.[0m[31m
[0m

In [3]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input/tiles-of-cancer-2048px-scale-0-25'):
    for filename in filenames:
        # print(os.path.join(dirname, filename))
        continue



# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [4]:


import os
import gc
import cv2
import datetime
import math
import copy
import time
import random
import glob
from matplotlib import pyplot as plt
from skimage import io


# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from torch.cuda import amp
import torchvision

import optuna
from optuna.trial import TrialState

# Utils
import joblib
from tqdm import tqdm
from collections import defaultdict


from PIL import Image
from joblib import Parallel, delayed
from tqdm.auto import tqdm

import plotly.express as px

# Sklearn Imports
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import balanced_accuracy_score, confusion_matrix, f1_score, mean_absolute_error,mean_squared_error

# For Image Models
import timm

import dagshub
from getpass import getpass
import mlflow.pytorch 
from mlflow import MlflowClient

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

# For colored terminal text
from colorama import Fore, Back, Style
b_ = Fore.BLUE
sr_ = Style.RESET_ALL

import warnings
# warnings.filterwarnings("ignore")

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"



In [5]:
os.environ["MLFLOW_TRACKING_USERNAME"]="Niggl0n"
os.environ["MLFLOW_TRACKING_PASSWORD"] = "7a3590e8c5558d4598dacc7810befa70a4baac9e"
os.environ['MLFLOW_TRACKING_PROJECTNAME'] = "UBC_Cancer_Classification"
dagshub.auth.add_app_token("7a3590e8c5558d4598dacc7810befa70a4baac9e")
mlflow.set_tracking_uri(f'https://dagshub.com/' + os.environ['MLFLOW_TRACKING_USERNAME'] + '/' + os.environ['MLFLOW_TRACKING_PROJECTNAME'] + '.mlflow')
mlflow.set_experiment(experiment_name="UBC_OOS_Outlier_Enc_SVM")

<Experiment: artifact_location='mlflow-artifacts:/5a5b33e1ffb34569bbed108e47ed4be3', creation_time=1700649468372, experiment_id='4', last_update_time=1700649468372, lifecycle_stage='active', name='UBC_OOS_Outlier_Enc_SVM', tags={}>

In [6]:
def get_or_create_experiment_id(name):
    exp = mlflow.get_experiment_by_name(name)
    if exp is None:
        exp_id = mlflow.create_experiment(name)
        return exp_id
    return exp.experiment_id

mlflow_experiment_id = get_or_create_experiment_id("UBC_OOS_Outlier_Enc_SVM")
mlflow_experiment_id

'4'

In [7]:
CONFIG = {
    "anomaly_class": "HGSC",
    "is_submission": False,
    "datetime_now": datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), 
    "n_fold":5, 
    "seed": 42,
    "model_name": "tf_efficientnet_b0_ns",   # "tf_efficientnet_b0_ns", # "tf_efficientnetv2_s_in21ft1k"
    "checkpoint_path": "/kaggle/input/tf-efficientnet-b0-aa-827b6e33-pth/tf_efficientnet_b0_aa-827b6e33.pth",
    "img_size": 256,
    "train_batch_size": 32,
    "valid_batch_size": 32,
    "n_tiles": 10,
    "n_tiles_test": 10,
    "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
    "num_epochs": 6,
    "early_stopping": True,
    "patience": 3,
    "optimizer": 'adam',
    "scheduler": 'CosineAnnealingLR',
}

## 1. Data Preparation

In [8]:
ROOT_DIR = '/kaggle/input/UBC-OCEAN'
TRAIN_DIR = '/kaggle/input/tiles-of-cancer-2048px-scale-0-25/'
TEST_DIR = '/kaggle/input/UBC-OCEAN/test_thumbnails'

# ALT_TEST_DIR = '/kaggle/input/UBC-OCEAN/test_images'
# TMA_TRAIN_DIR = '/kaggle/input/UBC-OCEAN/train_images'

def get_train_file_path(df_train_row):
    return f"{TRAIN_DIR}/{df_train_row.image_id}_thumbnail.png"

def get_test_file_path(image_id):
    if os.path.exists(f"{TEST_DIR}/{image_id}_thumbnail.png"):
        return f"{TEST_DIR}/{image_id}_thumbnail.png"
    else:
        return f"{ALT_TEST_DIR}/{image_id}.png"



## Create Train and Holdout Set

In [9]:
train_images = sorted(glob.glob(f"{TRAIN_DIR}/*.png"))
df_train = pd.read_csv("/kaggle/input/UBC-OCEAN/train.csv")
print(df_train.shape)
# df_train['file_path'] = df_train.apply(lambda row: get_train_file_path(row), axis=1)
# only consider WSI / Thumbnail images
#df_train = df_train[ 
#    df_train["file_path"].isin(train_images) ].reset_index(drop=True)
print(df_train.shape)

# encode to numericalt target
encoder = LabelEncoder()
df_train['target_label'] = encoder.fit_transform(df_train['label'])
with open("label_encoder_"+ CONFIG["datetime_now"] +".pkl", "wb") as fp:
    joblib.dump(encoder, fp)
    
# use stratified K Fold for crossvalidation 
skf = StratifiedKFold(n_splits=CONFIG['n_fold'], shuffle=True, random_state=CONFIG["seed"])

for fold, ( _, val_) in enumerate(skf.split(X=df_train, y=df_train.label)):
    df_train.loc[val_ , "kfold"] = int(fold)

df_anomaly = df_train[df_train["label"]==CONFIG["anomaly_class"]].reset_index(drop=True)
df_train = df_train[df_train["label"]!=CONFIG["anomaly_class"]].reset_index(drop=True)
df_holdout = df_train[df_train["kfold"].isin([3,4])].reset_index(drop=True)
df_train = df_train[df_train["kfold"].isin([0,1,2])].reset_index(drop=True)

# encode to numericalt target
encoder = LabelEncoder()
df_train['target_label'] = encoder.fit_transform(df_train['label'])
with open("label_encoder_"+ CONFIG["datetime_now"] +".pkl", "wb") as fp:
    joblib.dump(encoder, fp)
df_holdout['target_label'] = encoder.transform(df_holdout['label'])


print(f"Shape Training Set: {df_train.shape}, Shape Holdout Set: {df_holdout.shape}, Shape Anomaly Set: {df_anomaly.shape}")

df_train.head()

(538, 5)
(538, 5)
Shape Training Set: (190, 7), Shape Holdout Set: (126, 7), Shape Anomaly Set: (222, 7)


Unnamed: 0,image_id,label,image_width,image_height,is_tma,target_label,kfold
0,66,LGSC,48871,48195,False,2,2.0
1,281,LGSC,42309,15545,False,2,2.0
2,286,EC,37204,30020,False,1,2.0
3,1660,CC,83340,20447,False,0,1.0
4,1943,CC,73730,34949,False,0,0.0


## Create Pytorch Dataset

In [10]:
class CancerTilesDataset(Dataset):
    def __init__(
        self,
        df_data,
        path_img_dir: str =  '',
        transforms = None,
        mode: str = 'train',
        labels_lut = None,
        white_thr: int = 225,
        thr_max_bg: float = 0.2,
        split: float = 0.90,
        n_tiles: int = 1
    ):
        assert os.path.isdir(path_img_dir)
        self.path_img_dir = path_img_dir
        self.transforms = transforms
        self.mode = mode
        self.white_thr = white_thr
        self.thr_max_bg = thr_max_bg
        self.split = split
        self.n_tiles = n_tiles

        self.data = df_data
        self.labels_unique = sorted(self.data["label"].unique())
        self.labels_lut = labels_lut or {lb: i for i, lb in enumerate(self.labels_unique)}
        # shuffle data
        self.data = self.data.sample(frac=1, random_state=42).reset_index(drop=True)

        # split dataset
        assert 0.0 <= self.split <= 1.0
        frac = int(self.split * len(self.data))
        self.data = self.data[:frac] if mode in ["train", "test"] else self.data[frac:]
        self.img_dirs = [glob.glob(os.path.join(path_img_dir, str(idx), "*.png")) for idx in self.data["image_id"]] 
        self.img_dirs = self.img_dirs * self.n_tiles
        self.img_paths = []
        #print(f"missing: {sum([not os.path.isfile(os.path.join(self.path_img_dir, im))
        #                       for im in self.img_names])}")
        # self.labels = list(self.data['label'])
        self.labels =  np.array(self.data.target_label.values.tolist() * self.n_tiles)

    def __getitem__(self, idx: int) -> tuple:
        nth_iteration = idx//len(self.data)
        if self.mode=="train":
            random.seed()
        else:
            random.seed(CONFIG["seed"]+nth_iteration)
        random.shuffle(self.img_dirs[idx])
        for img_path in self.img_dirs[idx]:
            assert os.path.isfile(img_path), f"missing: {img_path}"
            tile = cv2.imread(img_path)
            tile = cv2.cvtColor(tile, cv2.COLOR_BGR2RGB)
        
            # tile = np.array(Image.open(img_path))[..., :3]
            black_bg = np.sum(tile, axis=2) == 0
            tile[black_bg, :] = 255
            mask_bg = np.mean(tile, axis=2) > self.white_thr
            if np.sum(mask_bg) < (np.prod(mask_bg.shape) * self.thr_max_bg):
                #self.img_paths.append(img_path)
                #print(f"Idx: {idx}, Path: {img_path}, len img_pths: {len(self.img_paths)}, nunique img_paths: {len(set(self.img_paths))}")
                break

        # augmentation
        if self.transforms:
            tile = self.transforms(image=tile)["image"]
        #print(f"img dim: {img.shape}")
        return {
            "image": tile,
            "label": torch.tensor(self.labels[idx], dtype=torch.long)
               }
    def __len__(self) -> int:
        return len(self.img_dirs)

In [11]:
img_color_mean=[0.8661704276539922, 0.7663107094675368, 0.8574260897185548]
img_color_std=[0.08670629753900036, 0.11646580094195522, 0.07164169171856792]

data_transforms = {
    "train": A.Compose([
        A.Resize(512, 512),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.75),
        A.ShiftScaleRotate(p=0.75),
        A.OneOf([
        A.GaussNoise(var_limit=[10, 50]),
        A.GaussianBlur(),
        A.MotionBlur(),
        ], p=0.4),
        A.GridDistortion(num_steps=5, distort_limit=0.3, p=0.5),
        A.CoarseDropout(max_holes=1, max_width=int(512* 0.3), max_height=int(512* 0.3),
        mask_fill_value=0, p=0.5),
        A.Normalize(img_color_mean, img_color_std), 
        ToTensorV2()], p=1.),
    
    "valid": A.Compose([
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),
        A.Normalize(img_color_mean, img_color_std), 
        ToTensorV2()], p=1.)
}

## 2. Model Creation

In [12]:
class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeM, self).__init__()
        self.p = nn.Parameter(torch.ones(1)*p)
        self.eps = eps

    def forward(self, x):
        return self.gem(x, p=self.p, eps=self.eps)
        
    def gem(self, x, p=3, eps=1e-6):
        return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)
        
    def __repr__(self):
        return self.__class__.__name__ + \
                '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + \
                ', ' + 'eps=' + str(self.eps) + ')'


class UBCOutlierModel(nn.Module):

    def __init__(self, model_name, num_classes, pretrained=False, checkpoint_path=None):
        '''
        Fine tune for EfficientNetB0
        Args
            n_classes : int - Number of classification categories.
            learnable_modules : tuple - Names of the modules to fine-tune.
        Return
            
        '''
        super(UBCOutlierModel, self).__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained, checkpoint_path=checkpoint_path)

        in_features = self.model.classifier.in_features
        self.model.classifier = nn.Identity()
        self.model.global_pool = nn.Identity()
        self.pooling = GeM()
        self.linear = nn.Linear(in_features, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, images):
        """
        Forward function for the fine-tuned model
        Args
            x: 
        Return
            result
        """
        features = self.model(images)
        pooled_features = self.pooling(features).flatten(1)
        output = self.linear(pooled_features)
        return output


## 3. Training

In [13]:
class EarlyStopping:
    def __init__(self, patience=5, verbose=False, delta=0, path='checkpoint.pth', trace_func=print):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = float('inf')
        self.delta = delta
        self.path = path
        self.trace_func = trace_func

    def __call__(self, val_loss, model):
        score = -val_loss
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decreases.'''
        if self.verbose:
            self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model to path {self.path}')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss


In [14]:
def convert_dict_to_tensor(dict_):
    """Converts the values of a dict into a PyTorch tensor."""

    # Create a new PyTorch tensor
    tensor = torch.empty(len(dict_))

    # Iterate over the dict and for each key-value pair, convert the value to a PyTorch tensor and add it to the new tensor
    for i, (key, value) in enumerate(dict_.items()):
        tensor[i] = value

    # Return the new tensor
    return tensor

def get_class_weights(df_train):
    label_counts = df_train.target_label.value_counts().sort_index().to_dict()
    ratios_dict = {}
    for key,val in label_counts.items():
        ratios_dict[key] = val / df_train.shape[0]
    ratios_dict
    weights = {}
    sum_weights = 0
    for key, val in ratios_dict.items():
        weights[key] = 1 / val
        sum_weights +=  1 / val
    for key, val in weights.items():
        weights[key] = val / sum_weights
    weight_tensor = convert_dict_to_tensor(weights)
    return weight_tensor

def get_dataloaders(df, n_tiles=1):
    # df_train = df[df["kfold"]!=fold].reset_index(drop=True)
    train_dataset = CancerTilesDataset(df_train, TRAIN_DIR, transforms=data_transforms["train"], mode="train", n_tiles=n_tiles)
    train_loader = DataLoader(train_dataset, batch_size=CONFIG['train_batch_size'], 
                              num_workers=2, shuffle=False, pin_memory=True)
    valid_dataset = CancerTilesDataset(df_train, TRAIN_DIR, transforms=data_transforms["valid"], mode="valid", n_tiles=n_tiles)
    valid_loader = DataLoader(valid_dataset, batch_size=CONFIG['valid_batch_size'], 
                              num_workers=2, shuffle=False, pin_memory=True)
    print(f"Len Train Dataset: {len(train_dataset)}, Len Validation Dataset: {len(valid_dataset)}" )
    return train_loader, valid_loader, df_train

def print_logged_info(r):
    tags = {k: v for k, v in r.data.tags.items() if not k.startswith("mlflow.")}
    artifacts = [f.path for f in MlflowClient().list_artifacts(r.info.run_id, "model")]
    print(f"run_id: {r.info.run_id}")
    print(f"artifacts: {artifacts}")
    print(f"params: {r.data.params}")
    print(f"metrics: {r.data.metrics}")
    print(f"tags: {tags}")


In [15]:
def fetch_scheduler(optimizer):
    if CONFIG['scheduler'] == 'CosineAnnealingLR':
        scheduler = lr_scheduler.CosineAnnealingLR(optimizer,T_max=CONFIG['T_max'], 
                                                   eta_min=CONFIG['min_lr'], verbose=False)
    elif CONFIG['scheduler'] == 'CosineAnnealingWarmRestarts':
        CONFIG['T_0'] = 20
        CONFIG['T_mult'] = 2
        CONFIG['min_lr'] = 1e-6
        scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=CONFIG['T_0'], T_mult=CONFIG['T_mult'],
                                                             eta_min=CONFIG['min_lr'], verbose=False)
    elif CONFIG['scheduler'] == 'ReduceLROnPlateau':
        scheduler =  ReduceLROnPlateau(optimizer, mode='min', factor=kwargs.get('factor', 0.1), patience=kwargs.get('patience', 5), verbose=False)
    elif CONFIG['scheduler'] == 'LambdaLR':
        scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda)
    elif CONFIG['scheduler'] == None:
        return None
        
    return scheduler

def get_optimizer(optimizer_name, model):
    if optimizer_name.lower() == "adam":
        CONFIG['learning_rate'] = 1e-4
        CONFIG['weight_decay'] = 1e-5
        CONFIG['betas'] = (0.9, 0.999)
        CONFIG['eps'] = 1e-8
        optimizer = optim.Adam(model.parameters(), lr=CONFIG['learning_rate'], betas=CONFIG['betas'], eps=CONFIG['eps'],  weight_decay=CONFIG['weight_decay'])
    elif optimizer_name.lower() == "sgd":
        CONFIG['learning_rate'] = 1e-3
        CONFIG['weight_decay'] = 1e-3
        CONFIG['momentum'] = 1e-3
        optimizer = optim.SGD(model.parameters(), lr=CONFIG['learning_rate'], momentum=CONFIG['momentum'], weight_decay=CONFIG['weight_decay'])
    elif optimizer_name.lower() == "radam":
        CONFIG['learning_rate'] = 1e-4
        CONFIG['weight_decay'] = 0
        CONFIG['betas'] = (0.9, 0.999)
        CONFIG['eps'] = 1e-8
        optimizer = torch_optimizer.RAdam(
            model.parameters(),
            lr= CONFIG['learning_rate'],
            betas=CONFIG['betas'],
            eps=CONFIG['eps'],
            weight_decay=CONFIG['weight_decay'],
        )
    elif optimizer_name.lower() == "rmsprop":
        CONFIG['learning_rate'] = 0.256
        CONFIG['alpha'] = 0.9
        CONFIG['momentum'] = 0.9
        CONFIG['weight_decay'] = 1e-5
        optimizer = optim.RMSprop(model.parameters(), lr=CONFIG['learning_rate'], alpha=CONFIG['learning_rate'], 
                                  momentum=CONFIG['learning_rate'], weight_decay=CONFIG['learning_rate'])
    else:
        raise ValueError("Invalid Optimizer given!")
    return optimizer

In [16]:
def train_one_epoch(model, train_loader, optimizer, criterion, device, writer, epoch, scheduler=None):
    if torch.cuda.is_available():
        print("[INFO] Using GPU: {}\n".format(torch.cuda.get_device_name()))
    model.train()
    train_loss = 0.0
    bar = tqdm(enumerate(train_loader), total=len(train_loader))
    for step, data in bar:
        images = data['image'].to(device, dtype=torch.float)
        labels = data['label'].to(device, dtype=torch.long)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        # Focal Loss
        #criterion = FocalLoss(gamma=0.7)
        #m = torch.nn.Softmax(dim=-1)
        #loss = criterion(m(outputs), labels)
        
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * images.size(0)
        # Update learning rate using the scheduler
        if scheduler:
            scheduler.step()
        
        # Log the training loss to TensorBoard
        writer.add_scalar('loss/train_batch', loss.item(), epoch * len(train_loader) + step)
    
    train_loss /= len(train_loader.dataset)
    # Log the average training loss for the epoch to TensorBoard
    writer.add_scalar('loss/train_epoch', train_loss, epoch)
    # gc.collect()
    return train_loss

def validate_one_epoch(model, valid_loader, criterion, device, writer, epoch):
    model.eval()
    valid_loss = 0.0
    valid_acc = 0.0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        bar_val = tqdm(enumerate(valid_loader), total=len(valid_loader))
        for step, data in bar_val:
            images = data['image'].to(device, dtype=torch.float)
            labels = data['label'].to(device, dtype=torch.long)
            outputs = model(images)

            loss = criterion(outputs, labels)
            # Focal Loss
            #criterion = FocalLoss(gamma=0.7)
            #m = torch.nn.Softmax(dim=-1)
            #loss = criterion(m(outputs), labels)
        
            valid_loss += loss.item() * images.size(0)
            _, predicted = torch.max(model.softmax(outputs), 1)
            acc = torch.sum( predicted == labels )
            valid_acc  += acc.item()
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
        
            writer.add_scalar('loss/valid_batch', loss.item(), epoch * len(valid_loader) + step)
            writer.add_scalar('acc/valid_batch', acc.item(), epoch * len(valid_loader) + step)
    valid_loss /= len(valid_loader.dataset)
    valid_acc /= len(valid_loader.dataset)
    bal_acc = balanced_accuracy_score(all_labels, all_preds)
    # At the end of your validation loop:
    macro_f1 = f1_score(all_labels, all_preds, average='macro')
    micro_f1 = f1_score(all_labels, all_preds, average='micro')
    weighted_f1 = f1_score(all_labels, all_preds, average='weighted')

    # Logging to TensorBoard
    writer.add_scalar('loss/val_epoch', valid_loss, epoch)
    writer.add_scalar('acc/val_epoch', valid_acc, epoch)
    writer.add_scalar('balanced_acc/val_epoch', bal_acc, epoch)
    writer.add_scalar('F1/macro', macro_f1, epoch)
    writer.add_scalar('F1/micro', micro_f1, epoch)
    writer.add_scalar('F1/weighted', weighted_f1, epoch)
    # in order to put multiple lines within one graph
    #writer.add_scalars('run_14h', {'xsinx':i*np.sin(i/r),
    #                        'xcosx':i*np.cos(i/r),
    #                        'tanx': np.tan(i/r)}, i)
    return valid_loss, valid_acc, bal_acc, weighted_f1

def train_model(model, train_loader, valid_loader, optimizer, criterion, device, num_epochs, scheduler, save_model_path=None):
    model_name = "model_epochs" + str(CONFIG["num_epochs"]) + "_bs"+str(CONFIG["train_batch_size"] )+ "_opt" +CONFIG["optimizer"]+ "_sched" + CONFIG["scheduler"] + "_lr"+str(CONFIG["learning_rate"])+ "_wd" + str(CONFIG["weight_decay"])
    print(f"Training model: {model_name}")
    datetime_now =  datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    if not save_model_path:
        save_model_path = 'best_model_checkpoint' + datetime_now + '.pth'
    print(f"Path for saving model: {save_model_path}")
    # Initialize TensorBoard writer
    writer = SummaryWriter('logs/fit/' + model_name)
    early_stopping = EarlyStopping(patience=CONFIG["patience"], verbose=True, path=save_model_path)
    #if scheduler_type:
    #    print(f"Define {scheduler_type} scheduler")
    #    scheduler = get_lr_scheduler(optimizer, scheduler_type, num_epochs=num_epochs)
    
    for epoch in range(num_epochs):
        train_loss = train_one_epoch(model, train_loader, optimizer, criterion, device, writer, epoch, scheduler)
        valid_loss, valid_acc, bal_acc, weighted_f1 = validate_one_epoch(model, valid_loader, criterion, device, writer, epoch)
        print(f"Epoch {epoch+1}/{num_epochs} - Train loss: {train_loss:.4f}, Validation loss: {valid_loss:.4f}, Validation acc: {valid_acc:.4f}, Balanced acc: {bal_acc:.4f}, Weighted F1-Score: {weighted_f1:.4f}")
        # Call early stopping
        if CONFIG["early_stopping"]:
            early_stopping(valid_loss, model)
            if early_stopping.early_stop:
                print("Early stopping")
                break
        writer.close()

        try:
            mlflow.log_metrics({
                'epoch': epoch,
                'train_loss': train_loss,
                'valid_loss': valid_loss,
                'valid_acc': valid_acc,
                'balanced_acc': bal_acc,
                'weighted_f1': weighted_f1
            }, step=epoch)
        except: 
            pass
            
    return train_loss, valid_loss, valid_acc, save_model_path

In [17]:
def calc_mae_per_image(images, rec):
    abs_diff = torch.abs(torch.from_numpy(images) - torch.from_numpy(rec))
    # Sum differences per image
    sum_diff_per_image = torch.sum(abs_diff.view(images.shape[0], -1), dim=1)

    # Calculate mean absolute error per image
    num_pixels_per_image = 3 * CONFIG["img_size"] * CONFIG["img_size"]
    mae_per_image = sum_diff_per_image / num_pixels_per_image
    return mae_per_image



In [18]:
def test_on_holdout(model, CONFIG, df_test, TRAIN_DIR=None, val_size=1.0, n_tiles=1):
    if not CONFIG["is_submission"]:
        model.eval()
        test_dataset = CancerTilesDataset(df_test, TRAIN_DIR, transforms=data_transforms["valid"], mode="test", split=1.0, n_tiles=n_tiles)
        test_loader = DataLoader(test_dataset, batch_size=CONFIG['valid_batch_size'], 
                                  num_workers=2, shuffle=False, pin_memory=True)
        print(f"Test-Dataset Size: {len(test_dataset)}")

        preds = []
        labels_list = []
        test_acc = 0.0

        with torch.no_grad():
            bar = tqdm(enumerate(test_loader), total=len(test_loader))
            for step, data in bar: 
                # print(step)
                images = data['image'].to(CONFIG["device"], dtype=torch.float)
                labels = data['label'].to(CONFIG["device"], dtype=torch.long)

                batch_size = images.size(0)
                outputs = model(images)
                _, predicted = torch.max(model.softmax(outputs), 1)
                preds.append(predicted.detach().cpu().numpy() )
                labels_list.append(labels.detach().cpu().numpy() )
                acc = torch.sum(predicted == labels )
                test_acc  += acc.item()
        test_acc /= len(test_loader.dataset)
        preds = np.concatenate(preds).flatten()
        labels_list = np.concatenate(labels_list).flatten()
        pred_labels = encoder.inverse_transform( preds )
        
        # Calculate Balanced Accuracy
        bal_acc = balanced_accuracy_score(labels_list, preds)
        # Calculate Confusion Matrix
        conf_matrix = confusion_matrix(labels_list, preds)
        macro_f1 = f1_score(labels_list, preds, average='macro')

    
        print(f"Test Accuracy: {test_acc}")
        print(f"Balanced Accuracy: {bal_acc}")
        print(f"Confusion Matrix: {conf_matrix}")
        
        # add to validation dataframe
        num_samples = len(df_test)
        for i in range(0,n_tiles):
            df_test[f"label_tile_{str(i)}"] = labels_list[i*num_samples:(i+1)*num_samples]
            df_test[f"pred_tile_{str(i)}"] = preds[i*num_samples:(i+1)*num_samples]
            df_test[f"pred_label_tile_{str(i)}"] = pred_labels[i*num_samples:(i+1)*num_samples]
            #df_test["pred"] = preds
            #df_test["pred_labels"] = pred_labels
        try: 
            mlflow.log_metrics({
                'test_acc': test_acc,
                'test_balanced_acc': bal_acc,
                'test_f1_score': macro_f1,
            })
        except: 
            pass
        return df_test
    else:
        print("Skip validation on training set due to submission!")
        return None

In [19]:
CONFIG["weighted_loss"] = True
if CONFIG["weighted_loss"]:
    class_weights = get_class_weights(df_train).to(CONFIG['device'], dtype=torch.float)
    print(f"Class weights: {class_weights}")
else:
    class_weights=None
criterion = nn.CrossEntropyLoss(weight=class_weights)

Class weights: tensor([0.1666, 0.1311, 0.3511, 0.3511], device='cuda:0')


In [20]:
df_test = df_holdout.copy()
CONFIG['num_classes'] = df_train["label"].nunique() 
CONFIG['T_max'] = CONFIG['num_epochs']
CONFIG['min_lr'] = 1e-6
print(f"Shape df_train: {df_train.shape}, Shape df_test: {df_test.shape}")
with mlflow.start_run(experiment_id=mlflow_experiment_id) as run:
    train_loader, valid_loader, df_train_fold = get_dataloaders(df_train.copy(), n_tiles=CONFIG["n_tiles"])

    model = UBCOutlierModel(CONFIG['model_name'], CONFIG['num_classes'], pretrained=False , checkpoint_path=CONFIG["checkpoint_path"])
    # model.load_state_dict(torch.load(CONFIG["checkpoint_path"]))
    model.to(CONFIG['device']);

    optimizer = get_optimizer(CONFIG["optimizer"], model)
    scheduler = fetch_scheduler(optimizer)

    _, _, _, save_model_path = train_model(model, train_loader, valid_loader, optimizer, criterion, CONFIG["device"], CONFIG["num_epochs"], scheduler)
    model.load_state_dict(torch.load(save_model_path))

    
    print("Validate on Holdout Set:")
    df_test = test_on_holdout(model, CONFIG, df_test, TRAIN_DIR, val_size=1, n_tiles=CONFIG["n_tiles_test"])
    df_test_file_path = "df_test_results.csv"
    df_test.to_csv(df_test_file_path, index=False)
    try: 
        mlflow.log_params(CONFIG)
        mlflow.pytorch.log_model(model, "model")
        mlflow.log_params(save_model_path)
        mlflow.log_artifact(df_test_file_path)
        print_logged_info(mlflow.get_run(run_id=run.info.run_id))
    except:
        pass



Shape df_train: (190, 7), Shape df_test: (126, 7)
Len Train Dataset: 1710, Len Validation Dataset: 190


  model = create_fn(


Training model: model_epochs6_bs32_optadam_schedCosineAnnealingLR_lr0.0001_wd1e-05
Path for saving model: best_model_checkpoint2023-11-22_13-18-28.pth
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/54 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 1/6 - Train loss: 1.2944, Validation loss: 1.2737, Validation acc: 0.4211, Balanced acc: 0.4156, Weighted F1-Score: 0.4317
Validation loss decreased (inf --> 1.273675). Saving model to path best_model_checkpoint2023-11-22_13-18-28.pth
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/54 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 2/6 - Train loss: 1.0351, Validation loss: 1.1419, Validation acc: 0.4474, Balanced acc: 0.4594, Weighted F1-Score: 0.4613
Validation loss decreased (1.273675 --> 1.141859). Saving model to path best_model_checkpoint2023-11-22_13-18-28.pth
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/54 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 3/6 - Train loss: 0.8565, Validation loss: 1.0570, Validation acc: 0.4895, Balanced acc: 0.5323, Weighted F1-Score: 0.4836
Validation loss decreased (1.141859 --> 1.057028). Saving model to path best_model_checkpoint2023-11-22_13-18-28.pth
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/54 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 4/6 - Train loss: 0.7467, Validation loss: 1.0301, Validation acc: 0.4842, Balanced acc: 0.5229, Weighted F1-Score: 0.4889
Validation loss decreased (1.057028 --> 1.030102). Saving model to path best_model_checkpoint2023-11-22_13-18-28.pth
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/54 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 5/6 - Train loss: 0.6302, Validation loss: 1.0112, Validation acc: 0.4789, Balanced acc: 0.5229, Weighted F1-Score: 0.4694
Validation loss decreased (1.030102 --> 1.011182). Saving model to path best_model_checkpoint2023-11-22_13-18-28.pth
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/54 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 6/6 - Train loss: 0.5687, Validation loss: 1.0010, Validation acc: 0.4737, Balanced acc: 0.5281, Weighted F1-Score: 0.4615
Validation loss decreased (1.011182 --> 1.001003). Saving model to path best_model_checkpoint2023-11-22_13-18-28.pth
Validate on Holdout Set:
Test-Dataset Size: 1260


  0%|          | 0/40 [00:00<?, ?it/s]

Test Accuracy: 0.5642857142857143
Balanced Accuracy: 0.6242239527389903
Confusion Matrix: [[244  54  72  30]
 [ 27 187 242  34]
 [  9   4 172   5]
 [  6  38  28 108]]




# 2 .Outlier Detection 

In [21]:
# Manipulate Encoder to returned pooled features 
class UBCOutlierModel(nn.Module):

    def __init__(self, model_name, num_classes, pretrained=False, checkpoint_path=None):
        '''
        Fine tune for EfficientNetB0
        Args
            n_classes : int - Number of classification categories.
            learnable_modules : tuple - Names of the modules to fine-tune.
        Return
            
        '''
        super(UBCOutlierModel, self).__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained, checkpoint_path=checkpoint_path)

        in_features = self.model.classifier.in_features
        self.model.classifier = nn.Identity()
        self.model.global_pool = nn.Identity()
        self.pooling = GeM()
        self.linear = nn.Linear(in_features, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, images):
        """
        Forward function for the fine-tuned model
        Args
            x: 
        Return
            result
        """
        features = self.model(images)
        pooled_features = self.pooling(features).flatten(1)
        output = self.linear(pooled_features)
        return pooled_features

In [22]:
def extract_features(data_loader, model, device):
    model.eval()
    features = []
    with torch.no_grad():
        for batch in data_loader:
            images = batch['image'].to(device)  # Assuming the data loader returns a dictionary
            batch_features = model(images)
            features.append(batch_features.cpu().numpy())

    return np.vstack(features)

In [23]:
"""encoder = UBCOutlierModel(CONFIG['model_name'], CONFIG['num_classes'], pretrained=False , checkpoint_path=CONFIG["checkpoint_path"])
encoder.load_state_dict(torch.load(save_model_path))
encoder.to(CONFIG['device']);"""

'encoder = UBCOutlierModel(CONFIG[\'model_name\'], CONFIG[\'num_classes\'], pretrained=False , checkpoint_path=CONFIG["checkpoint_path"])\nencoder.load_state_dict(torch.load(save_model_path))\nencoder.to(CONFIG[\'device\']);'

In [24]:
"""from sklearn.svm import OneClassSVM
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV


# Extract features from the training dataset
svm_train_loader, svm_valid_loader, _ = get_dataloaders(df_holdout.copy(), n_tiles=CONFIG["n_tiles"])
svm_test_dataset = CancerTilesDataset(df_anomaly.copy(), TRAIN_DIR, transforms=data_transforms["valid"], mode="test", split=1.0, n_tiles=CONFIG["n_tiles"])
svm_test_loader = DataLoader(svm_test_dataset, batch_size=CONFIG['valid_batch_size'], 
                          num_workers=2, shuffle=False, pin_memory=True)
        
train_features = extract_features(svm_train_loader, encoder, CONFIG["device"])
valid_features = extract_features(svm_valid_loader, encoder, CONFIG["device"])
test_features = extract_features(svm_test_loader, encoder, CONFIG["device"])

scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_features)
valid_features_scaled = scaler.transform(valid_features)
test_features_scaled = scaler.transform(test_features)
print(train_features_scaled.shape, valid_features_scaled.shape, test_features_scaled.shape)

from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
pca = PCA(n_components=0.95)
train_features_reduced = pca.fit_transform(train_features_scaled)
valid_features_reduced = pca.transform(valid_features_scaled)
test_features_reduced = pca.transform(test_features_scaled)"""

'from sklearn.svm import OneClassSVM\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.model_selection import GridSearchCV\n\n\n# Extract features from the training dataset\nsvm_train_loader, svm_valid_loader, _ = get_dataloaders(df_holdout.copy(), n_tiles=CONFIG["n_tiles"])\nsvm_test_dataset = CancerTilesDataset(df_anomaly.copy(), TRAIN_DIR, transforms=data_transforms["valid"], mode="test", split=1.0, n_tiles=CONFIG["n_tiles"])\nsvm_test_loader = DataLoader(svm_test_dataset, batch_size=CONFIG[\'valid_batch_size\'], \n                          num_workers=2, shuffle=False, pin_memory=True)\n        \ntrain_features = extract_features(svm_train_loader, encoder, CONFIG["device"])\nvalid_features = extract_features(svm_valid_loader, encoder, CONFIG["device"])\ntest_features = extract_features(svm_test_loader, encoder, CONFIG["device"])\n\nscaler = StandardScaler()\ntrain_features_scaled = scaler.fit_transform(train_features)\nvalid_features_scaled = scaler.transform(valid_fe

In [25]:
# Define the parameter grid
"""param_grid = {
    'gamma': ['scale', 'auto'] + list(np.logspace(-9, 3, 13)),
    'nu': np.linspace(0.01, 0.5, 10)  # Assuming a smaller nu as we expect fewer outliers
}

# Create OneClassSVM object
svm = OneClassSVM()

# Grid search
grid_search = GridSearchCV(svm, param_grid, cv=5, n_jobs=-1)
grid_search.fit(train_features_scaled)

# Best parameters
best_params = grid_search.best_params_
print("Best parameters:", best_params)

# Train the SVM with the best parameters
optimized_svm = OneClassSVM(**best_params)
optimized_svm.fit(train_features_scaled)"""



"""# Train the one-class SVM
clf = OneClassSVM(nu=0.1, kernel="rbf", gamma="auto")
clf.fit(train_features_reduced)
y_pred_train = clf.predict(train_features_reduced)
y_pred_valid = clf.predict(valid_features_reduced)
y_pred_outliers = clf.predict(test_features_reduced)
from numpy import quantile, where, random

train_scores = clf.score_samples(train_features_reduced)
thresh = quantile(train_scores, 0.03)

print(thresh)

n_error_train = y_pred_train[y_pred_train == -1].size
n_error_valid = y_pred_valid[y_pred_valid == -1].size
n_error_outliers = y_pred_outliers[y_pred_outliers == 1].size
print(n_error_train/len(y_pred_train), n_error_valid, n_error_outliers)
print(n_error_train/len(y_pred_train), n_error_valid/len(y_pred_valid), n_error_outliers/len(y_pred_outliers))"""

'# Train the one-class SVM\nclf = OneClassSVM(nu=0.1, kernel="rbf", gamma="auto")\nclf.fit(train_features_reduced)\ny_pred_train = clf.predict(train_features_reduced)\ny_pred_valid = clf.predict(valid_features_reduced)\ny_pred_outliers = clf.predict(test_features_reduced)\nfrom numpy import quantile, where, random\n\ntrain_scores = clf.score_samples(train_features_reduced)\nthresh = quantile(train_scores, 0.03)\n\nprint(thresh)\n\nn_error_train = y_pred_train[y_pred_train == -1].size\nn_error_valid = y_pred_valid[y_pred_valid == -1].size\nn_error_outliers = y_pred_outliers[y_pred_outliers == 1].size\nprint(n_error_train/len(y_pred_train), n_error_valid, n_error_outliers)\nprint(n_error_train/len(y_pred_train), n_error_valid/len(y_pred_valid), n_error_outliers/len(y_pred_outliers))'