# Introduction 

**This is a basic CNN Model training notebook**

It is based on: 
- Thumbnail images
- Basic data transformation (using Albumentation):
    - resizing images to 512x512
    - normalizing pixel values
- CNN Architecture


**Todos:**

- Learn about Dataset & DataLoader
- add augmentations (albumentation)
- gem pooling

In [1]:
!pip install --quiet torch_optimizer

In [2]:
!pip install --quiet mlflow dagshub

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
ydata-profiling 4.3.1 requires dacite>=1.8, but you have dacite 1.6.0 which is incompatible.
ydata-profiling 4.3.1 requires scipy<1.11,>=1.4.1, but you have scipy 1.11.2 which is incompatible.[0m[31m
[0m

In [3]:


import os
import gc
import cv2
import datetime
import math
import copy
import time
import random
import glob
from matplotlib import pyplot as plt
from skimage import io


# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from torch.cuda import amp
import torchvision
import torch_optimizer as torch_optimizer

import optuna
from optuna.trial import TrialState

# Utils
import joblib
from tqdm import tqdm
from collections import defaultdict


from PIL import Image
from joblib import Parallel, delayed
from tqdm.auto import tqdm

# Sklearn Imports
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import balanced_accuracy_score, confusion_matrix, f1_score
from torch.utils.data.sampler import WeightedRandomSampler

# For Image Models
import timm

import dagshub
from getpass import getpass
import mlflow.pytorch 
from mlflow import MlflowClient

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

# For colored terminal text
from colorama import Fore, Back, Style
b_ = Fore.BLUE
sr_ = Style.RESET_ALL

import warnings
# warnings.filterwarnings("ignore")

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"



In [4]:
from cancer_utils_tiles import get_class_weights,UBCModel, get_optimizer, fetch_scheduler, EarlyStopping, print_logged_info, get_or_create_experiment_id


In [5]:
os.environ["MLFLOW_TRACKING_USERNAME"]="Niggl0n"
os.environ["MLFLOW_TRACKING_PASSWORD"] = "7a3590e8c5558d4598dacc7810befa70a4baac9e"
os.environ['MLFLOW_TRACKING_PROJECTNAME'] = "UBC_Cancer_Classification"
#dagshub.auth.add_app_token("7a3590e8c5558d4598dacc7810befa70a4baac9e")
mlflow.set_tracking_uri(f'https://dagshub.com/' + os.environ['MLFLOW_TRACKING_USERNAME'] + '/' + os.environ['MLFLOW_TRACKING_PROJECTNAME'] + '.mlflow')

In [6]:
CONFIG = {
    "is_submission": False,
    "weighted_loss": True,
    "datetime_now": datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), 
    "n_fold":5, 
    "test_fold": 0,
    "seed": 42,
    "img_size": 512,
    "model_name": "tf_efficientnet_b0_ns",   # "tf_efficientnet_b0_ns", # "tf_efficientnetv2_s_in21ft1k"
    "checkpoint_path": "/kaggle/input/tf-efficientnet-b0-aa-827b6e33-pth/tf_efficientnet_b0_aa-827b6e33.pth",
    "num_classes": 5,
    "train_batch_size": 8,
    "valid_batch_size": 8,
    "n_tiles": 10,
    "n_tiles_test": 10,
    "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
    "num_epochs": 15,
    "early_stopping": True,
    "patience": 6,
    "optimizer": 'adam',
    "scheduler": 'CosineAnnealingLR',
    "min_lr": 1e-6,
    "T_max": 10,
    "momentum": 0.9,
    "weight_decay": 1e-4,
}

## 1. Data Preparation

In [7]:
ROOT_DIR = '/kaggle/input/UBC-OCEAN'
TRAIN_DIR = '/kaggle/input/tiles-of-cancer-2048px-scale-0-25/'
TMA_DIR = "/kaggle/input/ubc-tma-tiles-512-05scale/UBC_TMA_tiles_1024p_scale05"
TEST_DIR = '/kaggle/input/UBC-OCEAN/test_thumbnails'

# ALT_TEST_DIR = '/kaggle/input/UBC-OCEAN/test_images'
# TMA_TRAIN_DIR = '/kaggle/input/UBC-OCEAN/train_images'

def get_train_file_path(df_train_row):
    return f"{TRAIN_DIR}{df_train_row.image_id}.png"

def get_test_file_path(image_id):
    if os.path.exists(f"{TEST_DIR}/{image_id}_thumbnail.png"):
        return f"{TEST_DIR}/{image_id}_thumbnail.png"
    else:
        return f"{ALT_TEST_DIR}/{image_id}.png"



In [8]:
train_images = sorted(glob.glob(f"{TRAIN_DIR}/*.png"))
df_train = pd.read_csv("/kaggle/input/UBC-OCEAN/train.csv")
print(df_train.shape)
df_train['file_path'] = df_train.apply(lambda row: get_train_file_path(row), axis=1)
# only consider WSI / Thumbnail images
#df_train = df_train[ 
#    df_train["file_path"].isin(train_images) ].reset_index(drop=True)
print(df_train.shape)

# encode to numericalt target
encoder = LabelEncoder()
df_train['target_label'] = encoder.fit_transform(df_train['label'])

# save encoder
with open("label_encoder_"+ CONFIG["datetime_now"] +".pkl", "wb") as fp:
    joblib.dump(encoder, fp)
    
# use stratified K Fold for crossvalidation 
skf = StratifiedKFold(n_splits=CONFIG['n_fold'], shuffle=True, random_state=CONFIG["seed"])

for fold, ( _, val_) in enumerate(skf.split(X=df_train, y=df_train.target_label)):
    df_train.loc[val_ , "kfold"] = int(fold)
display(df_train.head())

# separate train and test dataset
df_test = df_train[df_train["kfold"]==CONFIG["test_fold"]].reset_index(drop=True)
df_train = df_train[df_train["kfold"]!=CONFIG["test_fold"]].reset_index(drop=True)
print(f"Shape df_train: {df_train.shape}, Shape df_test: {df_test.shape} ")

(538, 5)
(538, 6)


Unnamed: 0,image_id,label,image_width,image_height,is_tma,file_path,target_label,kfold
0,4,HGSC,23785,20008,False,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,2,3.0
1,66,LGSC,48871,48195,False,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,3,2.0
2,91,HGSC,3388,3388,True,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,2,4.0
3,281,LGSC,42309,15545,False,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,3,2.0
4,286,EC,37204,30020,False,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,1,2.0


Shape df_train: (430, 8), Shape df_test: (108, 8) 


In [9]:
class CancerTilesDataset(Dataset):
    @staticmethod
    def get_img_dir(data_row):
        # based on if is_tma or not we select the respective image path
        if data_row.is_tma == True:
            return glob.glob(os.path.join("/kaggle/input/ubc-tma-tiles-512-05scale/UBC_TMA_tiles_1024p_scale05", str(data_row.image_id), "*.png"))
        else:
            return glob.glob(os.path.join("/kaggle/input/tiles-of-cancer-2048px-scale-0-25", str(data_row.image_id), "*.png")) 

    def __init__(
        self,
        df_data,
        path_img_dir: str =  '',
        transforms = None,
        mode: str = 'train',
        labels_lut = None,
        white_thr: int = 225,
        thr_max_bg: float = 0.2,
        train_val_split: float = 0.90,
        n_tiles: int = 1,
        tma_weight: float = 1.0,
    ):
        assert os.path.isdir(path_img_dir)
        self.path_img_dir = path_img_dir
        self.transforms = transforms
        self.mode = mode
        self.white_thr = white_thr
        self.thr_max_bg = thr_max_bg
        self.train_val_split = train_val_split
        self.n_tiles = n_tiles
        self.tma_weight = tma_weight

        self.data = df_data
        self.labels_unique = sorted(self.data["label"].unique())
        self.labels_lut = labels_lut or {lb: i for i, lb in enumerate(self.labels_unique)}

        self.data.is_tma = self.data.is_tma.astype(bool)
        self.data = self.data.sample(frac=1, random_state=42).reset_index(drop=True)

        # split dataset
        assert 0.0 <= self.train_val_split <= 1.0
        frac = int(self.train_val_split * len(self.data))
        self.data = self.data[:frac] if mode in ["train", "test"] else self.data[frac:]
        self.img_dirs = [CancerTilesDataset.get_img_dir(row) for i, row in self.data.iterrows()] 
        self.img_dirs = self.img_dirs * self.n_tiles
        self.img_paths = []
        #print(f"missing: {sum([not os.path.isfile(os.path.join(self.path_img_dir, im))
        #                       for im in self.img_names])}")
        # self.labels = list(self.data['label'])
        self.labels =  np.array(self.data.target_label.values.tolist() * self.n_tiles)
        
        # set sample weights 
        self.sample_weights = [self.tma_weight if is_tma == True else 1 for is_tma in self.data["is_tma"]] 
        self.sample_weights =  np.array(self.sample_weights * self.n_tiles)
        
    def __getitem__(self, idx: int) -> tuple:
        nth_iteration = idx//len(self.data)
        if self.mode=="train":
            random.seed()
        else:
            random.seed(CONFIG["seed"]+nth_iteration)
        random.shuffle(self.img_dirs[idx])
        for img_path in self.img_dirs[idx]:
            assert os.path.isfile(img_path), f"missing: {img_path}"
            tile = cv2.imread(img_path)
            tile = cv2.cvtColor(tile, cv2.COLOR_BGR2RGB)
        
            # tile = np.array(Image.open(img_path))[..., :3]
            black_bg = np.sum(tile, axis=2) == 0
            tile[black_bg, :] = 255
            mask_bg = np.mean(tile, axis=2) > self.white_thr
            if np.sum(mask_bg) < (np.prod(mask_bg.shape) * self.thr_max_bg):
                #self.img_paths.append(img_path)
                #print(f"Idx: {idx}, Path: {img_path}, len img_pths: {len(self.img_paths)}, nunique img_paths: {len(set(self.img_paths))}")
                break

        # augmentation
        if self.transforms:
            tile = self.transforms(image=tile)["image"]
        #print(f"img dim: {img.shape}")
        return {
            "image": tile,
            "label": torch.tensor(self.labels[idx], dtype=torch.long),
               }
    def __len__(self) -> int:
        return len(self.img_dirs)
    
    def get_sample_weights(self):
        return torch.from_numpy(self.sample_weights).double()


In [10]:
img_color_mean=[0.8661704276539922, 0.7663107094675368, 0.8574260897185548]
img_color_std=[0.08670629753900036, 0.11646580094195522, 0.07164169171856792]

data_transforms = {
    "train": A.Compose([
        A.Resize(512, 512),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        # A.RandomBrightnessContrast(p=0.75),
        A.ShiftScaleRotate(p=0.75),
        A.OneOf([
        A.GaussNoise(var_limit=[10, 50]),
        A.GaussianBlur(),
        A.MotionBlur(),
        ], p=0.4),
        A.GridDistortion(num_steps=5, distort_limit=0.3, p=0.5),
        A.CoarseDropout(max_holes=5, max_width=int(512* 0.1), max_height=int(512* 0.1),
        mask_fill_value=0, p=0.5),
        A.Normalize(img_color_mean, img_color_std), 
        ToTensorV2()], p=1.),
    
    "valid": A.Compose([
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),
        A.Normalize(img_color_mean, img_color_std), 
        ToTensorV2()], p=1.)
}



## 3. Training

In [11]:
def train_one_epoch(model, train_loader, optimizer, criterion, device, writer, epoch, scheduler=None):
    if torch.cuda.is_available():
        print("[INFO] Using GPU: {}\n".format(torch.cuda.get_device_name()))
    model.train()
    train_loss = 0.0
    bar = tqdm(enumerate(train_loader), total=len(train_loader))
    for step, data in bar:
        images = data['image'].to(device, dtype=torch.float)
        labels = data['label'].to(device, dtype=torch.long)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        # Focal Loss
        #criterion = FocalLoss(gamma=0.7)
        #m = torch.nn.Softmax(dim=-1)
        #loss = criterion(m(outputs), labels)
        
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * images.size(0)
        writer.add_scalar('loss/train_batch', loss.item(), epoch * len(train_loader) + step)
    
    # Update learning rate using the scheduler
    if scheduler:
        scheduler.step()
    train_loss /= len(train_loader.dataset)
    # Log the average training loss for the epoch to TensorBoard
    writer.add_scalar('loss/train_epoch', train_loss, epoch)
    # gc.collect()
    return train_loss

def validate_one_epoch(model, valid_loader, criterion, device, writer, epoch):
    model.eval()
    valid_loss = 0.0
    valid_acc = 0.0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        bar_val = tqdm(enumerate(valid_loader), total=len(valid_loader))
        for step, data in bar_val:
            images = data['image'].to(device, dtype=torch.float)
            labels = data['label'].to(device, dtype=torch.long)
            outputs = model(images)
            loss = criterion(outputs, labels)
            # Focal Loss
            #criterion = FocalLoss(gamma=0.7)
            #m = torch.nn.Softmax(dim=-1)
            #loss = criterion(m(outputs), labels)
        
            valid_loss += loss.item() * images.size(0)
            _, predicted = torch.max(model.softmax(outputs), 1)
            acc = torch.sum( predicted == labels )
            valid_acc  += acc.item()
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
        
            writer.add_scalar('loss/valid_batch', loss.item(), epoch * len(valid_loader) + step)
            writer.add_scalar('acc/valid_batch', acc.item(), epoch * len(valid_loader) + step)
    valid_loss /= len(valid_loader.dataset)
    valid_acc /= len(valid_loader.dataset)
    bal_acc = balanced_accuracy_score(all_labels, all_preds)
    # At the end of your validation loop:
    macro_f1 = f1_score(all_labels, all_preds, average='macro')
    micro_f1 = f1_score(all_labels, all_preds, average='micro')
    weighted_f1 = f1_score(all_labels, all_preds, average='weighted')

    # Logging to TensorBoard
    writer.add_scalar('loss/val_epoch', valid_loss, epoch)
    writer.add_scalar('acc/val_epoch', valid_acc, epoch)
    writer.add_scalar('balanced_acc/val_epoch', bal_acc, epoch)
    writer.add_scalar('F1/macro', macro_f1, epoch)
    writer.add_scalar('F1/micro', micro_f1, epoch)
    writer.add_scalar('F1/weighted', weighted_f1, epoch)
    # in order to put multiple lines within one graph
    #writer.add_scalars('run_14h', {'xsinx':i*np.sin(i/r),
    #                        'xcosx':i*np.cos(i/r),
    #                        'tanx': np.tan(i/r)}, i)
    return valid_loss, valid_acc, bal_acc, weighted_f1

def train_model(model, train_loader, valid_loader, optimizer, criterion, device, num_epochs, scheduler, save_model_path=None):
    model_name = "model_epochs" + str(CONFIG["num_epochs"]) + "_bs"+str(CONFIG["train_batch_size"] )+ "_opt" +CONFIG["optimizer"]+ "_sched" + CONFIG["scheduler"] + "_lr"+str(CONFIG["learning_rate"])+ "_wd" + str(CONFIG["weight_decay"])
    print(f"Training model: {model_name}")
    datetime_now =  datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    if not save_model_path:
        save_model_path = 'best_model_checkpoint' + datetime_now + '.pth'
    print(f"Path for saving model: {save_model_path}")
    # Initialize TensorBoard writer
    writer = SummaryWriter('logs/fit/' + model_name)
    early_stopping = EarlyStopping(patience=CONFIG["patience"], verbose=True, path=save_model_path)
    #if scheduler_type:
    #    print(f"Define {scheduler_type} scheduler")
    #    scheduler = get_lr_scheduler(optimizer, scheduler_type, num_epochs=num_epochs)
    
    for epoch in range(num_epochs):
        train_loss = train_one_epoch(model, train_loader, optimizer, criterion, device, writer, epoch, scheduler)
        valid_loss, valid_acc, bal_acc, weighted_f1 = validate_one_epoch(model, valid_loader, criterion, device, writer, epoch)
        print(f"Epoch {epoch+1}/{num_epochs} - Train loss: {train_loss:.4f}, Validation loss: {valid_loss:.4f}, Validation acc: {valid_acc:.4f}, Balanced acc: {bal_acc:.4f}, Weighted F1-Score: {weighted_f1:.4f}")
        # Call early stopping
        if CONFIG["early_stopping"]:
            early_stopping(valid_loss, model)
            if early_stopping.early_stop:
                print("Early stopping")
                break
        writer.close()

        try:
            mlflow.log_metrics({
                'epoch': epoch,
                'train_loss': train_loss,
                'valid_loss': valid_loss,
                'valid_acc': valid_acc,
                'balanced_acc': bal_acc,
                'weighted_f1': weighted_f1
            }, step=epoch)
        except: 
            pass
            
    return train_loss, valid_loss, valid_acc, save_model_path
    # Load the last checkpoint with the best model
    #model.load_state_dict(torch.load('best_model_checkpoint.pth'))



In [12]:
def test_on_holdout(model, CONFIG, df_test, TRAIN_DIR=None, val_size=1.0, n_tiles=1):
    if not CONFIG["is_submission"]:
        model.eval()
        test_dataset = CancerTilesDataset(df_test, TRAIN_DIR, transforms=data_transforms["valid"], mode="test", train_val_split=1.0, n_tiles=n_tiles)
        test_loader = DataLoader(test_dataset, batch_size=CONFIG['valid_batch_size'], 
                                  num_workers=2, shuffle=False, pin_memory=True)
        print(f"Test-Dataset Size: {len(test_dataset)}")

        preds = []
        labels_list = []
        test_acc = 0.0

        with torch.no_grad():
            bar = tqdm(enumerate(test_loader), total=len(test_loader))
            for step, data in bar: 
                # print(step)
                images = data['image'].to(CONFIG["device"], dtype=torch.float)
                labels = data['label'].to(CONFIG["device"], dtype=torch.long)

                batch_size = images.size(0)
                outputs = model(images)
                _, predicted = torch.max(model.softmax(outputs), 1)
                preds.append(predicted.detach().cpu().numpy() )
                labels_list.append(labels.detach().cpu().numpy() )
                acc = torch.sum(predicted == labels )
                test_acc  += acc.item()
        test_acc /= len(test_loader.dataset)
        preds = np.concatenate(preds).flatten()
        labels_list = np.concatenate(labels_list).flatten()
        pred_labels = encoder.inverse_transform( preds )
        
        # Calculate Balanced Accuracy
        bal_acc = balanced_accuracy_score(labels_list, preds)
        # Calculate Confusion Matrix
        conf_matrix = confusion_matrix(labels_list, preds)
        macro_f1 = f1_score(labels_list, preds, average='macro')

    
        print(f"Test Accuracy: {test_acc}")
        print(f"Balanced Accuracy: {bal_acc}")
        print(f"Confusion Matrix: {conf_matrix}")
        
        # add to validation dataframe
        num_samples = len(df_test)
        for i in range(0,n_tiles):
            df_test[f"label_tile_{str(i)}"] = labels_list[i*num_samples:(i+1)*num_samples]
            df_test[f"pred_tile_{str(i)}"] = preds[i*num_samples:(i+1)*num_samples]
            df_test[f"pred_label_tile_{str(i)}"] = pred_labels[i*num_samples:(i+1)*num_samples]
            #df_test["pred"] = preds
            #df_test["pred_labels"] = pred_labels
        mlflow.log_metrics({
            'test_acc': test_acc,
            'test_balanced_acc': bal_acc,
            'test_f1_score': macro_f1,
        })
        return df_test
    else:
        print("Skip validation on training set due to submission!")
        return None

In [13]:

if CONFIG["weighted_loss"]:
    class_weights = get_class_weights(df_train).to(CONFIG['device'], dtype=torch.float)
    print(f"Class weights: {class_weights}")
else:
    class_weights=None
criterion = nn.CrossEntropyLoss(weight=class_weights)

Class weights: tensor([0.1548, 0.1236, 0.0691, 0.3219, 0.3306], device='cuda:0')


In [14]:
def get_dataloaders(df, TRAIN_DIR, CONFIG, data_transforms, n_tiles=1, train_val_split=0.9, apply_sampler=True, tma_weight=1, sample_fac=1):
    # df_train = df[df["kfold"]!=fold].reset_index(drop=True)
    train_dataset = CancerTilesDataset(df, TRAIN_DIR, transforms=data_transforms["train"], mode="train", n_tiles=n_tiles, train_val_split=train_val_split, tma_weight=tma_weight)
    if apply_sampler:
        samples_weights = train_dataset.get_sample_weights()
        train_sampler = WeightedRandomSampler(samples_weights, len(samples_weights)*sample_fac)
    else:
        train_sampler = None
    train_loader = DataLoader(train_dataset, batch_size=CONFIG['train_batch_size'], num_workers=2, sampler=train_sampler, shuffle=False, pin_memory=True)
    
    valid_dataset = CancerTilesDataset(df, TRAIN_DIR, transforms=data_transforms["valid"], mode="valid", n_tiles=n_tiles, train_val_split=train_val_split, tma_weight=tma_weight)
    if apply_sampler:
        samples_weights = valid_dataset.get_sample_weights()
        valid_sampler = WeightedRandomSampler(samples_weights, len(samples_weights)*sample_fac)
    else:
        valid_sampler=None
    valid_loader = DataLoader(valid_dataset, batch_size=CONFIG['valid_batch_size'], num_workers=2, sampler=valid_sampler, shuffle=False, pin_memory=True)
    print(f"Len Train Dataset: {len(train_dataset)}, Len Validation Dataset: {len(valid_dataset)}" )
    return train_loader, valid_loader, df

In [15]:
CONFIG["num_epochs"] = 20
CONFIG["learning_rate"] = 1e-4
CONFIG["n_tiles"] = 4
CONFIG["n_tiles_test"] = 4
CONFIG["tma_weight"] = 3

mlflow_experiment_id = get_or_create_experiment_id(os.environ['MLFLOW_TRACKING_PROJECTNAME'])
mlflow_experiment_id

print(f"Shape df_train: {df_train.shape}, Shape df_test: {df_test.shape}")
with mlflow.start_run(experiment_id=mlflow_experiment_id) as run:
    train_loader, valid_loader, df_train_fold = get_dataloaders(df_train.copy(), TRAIN_DIR, CONFIG, data_transforms, n_tiles=CONFIG["n_tiles"], tma_weight=CONFIG["tma_weight"])

    model = UBCModel(CONFIG['model_name'], CONFIG['num_classes'], pretrained=False , checkpoint_path=CONFIG["checkpoint_path"])
    # model.load_state_dict(torch.load(CONFIG["checkpoint_path"]))
    model.to(CONFIG['device']);

    optimizer = get_optimizer(CONFIG["optimizer"], model)
    scheduler = fetch_scheduler(optimizer, CONFIG)
    _, _, _, save_model_path = train_model(model, train_loader, valid_loader, optimizer, criterion, CONFIG["device"], CONFIG["num_epochs"], scheduler)
    model.load_state_dict(torch.load(save_model_path))
    
    print("Validate on Holdout Set:")
    df_test = test_on_holdout(model, CONFIG, df_test, TRAIN_DIR, val_size=1, n_tiles=CONFIG["n_tiles_test"])
    df_test_file_path = "df_test_results.csv"
    df_test.to_csv(df_test_file_path, index=False)
    mlflow.log_params(CONFIG)
    mlflow.pytorch.log_model(model, "model")
    mlflow.log_params({"model_path": save_model_path})
    mlflow.log_artifact(df_test_file_path)
    print_logged_info(mlflow.get_run(run_id=run.info.run_id))



Shape df_train: (430, 8), Shape df_test: (108, 8)
Len Train Dataset: 1548, Len Validation Dataset: 172


  model = create_fn(


Training model: model_epochs20_bs8_optadam_schedCosineAnnealingLR_lr0.0001_wd0.0001
Path for saving model: best_model_checkpoint2023-12-05_10-22-34.pth
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/194 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch 1/20 - Train loss: 1.4336, Validation loss: 1.0133, Validation acc: 0.6163, Balanced acc: 0.6596, Weighted F1-Score: 0.6182
Validation loss decreased (inf --> 1.013250). Saving model ...
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/194 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch 2/20 - Train loss: 1.1020, Validation loss: 0.8246, Validation acc: 0.6802, Balanced acc: 0.7194, Weighted F1-Score: 0.6737
Validation loss decreased (1.013250 --> 0.824601). Saving model ...
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/194 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch 3/20 - Train loss: 0.9420, Validation loss: 0.6725, Validation acc: 0.6860, Balanced acc: 0.7394, Weighted F1-Score: 0.6814
Validation loss decreased (0.824601 --> 0.672523). Saving model ...
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/194 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch 4/20 - Train loss: 0.8457, Validation loss: 0.7179, Validation acc: 0.6570, Balanced acc: 0.6791, Weighted F1-Score: 0.6474
EarlyStopping counter: 1 out of 6
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/194 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch 5/20 - Train loss: 0.7446, Validation loss: 0.8300, Validation acc: 0.6453, Balanced acc: 0.6754, Weighted F1-Score: 0.6513
EarlyStopping counter: 2 out of 6
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/194 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch 6/20 - Train loss: 0.6933, Validation loss: 0.8457, Validation acc: 0.6744, Balanced acc: 0.6799, Weighted F1-Score: 0.6748
EarlyStopping counter: 3 out of 6
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/194 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch 7/20 - Train loss: 0.6811, Validation loss: 0.7291, Validation acc: 0.7093, Balanced acc: 0.7280, Weighted F1-Score: 0.7071
EarlyStopping counter: 4 out of 6
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/194 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch 8/20 - Train loss: 0.6493, Validation loss: 0.7646, Validation acc: 0.6221, Balanced acc: 0.6686, Weighted F1-Score: 0.6282
EarlyStopping counter: 5 out of 6
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/194 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch 9/20 - Train loss: 0.6116, Validation loss: 0.8803, Validation acc: 0.7035, Balanced acc: 0.7269, Weighted F1-Score: 0.7046
EarlyStopping counter: 6 out of 6
Early stopping
Validate on Holdout Set:
Test-Dataset Size: 432


  0%|          | 0/54 [00:00<?, ?it/s]

Test Accuracy: 0.5925925925925926
Balanced Accuracy: 0.6327777777777779
Confusion Matrix: [[62  8  4  2  4]
 [ 7 45 12 12 24]
 [15 36 99 22  8]
 [ 6  2  7 19  2]
 [ 0  2  1  2 31]]




run_id: 056709bed7df41a4b415add97ebf86bc
artifacts: ['model/MLmodel', 'model/conda.yaml', 'model/data', 'model/python_env.yaml', 'model/requirements.txt']
params: {'model_path': 'best_model_checkpoint2023-12-05_10-22-34.pth', 'is_submission': 'False', 'weighted_loss': 'True', 'datetime_now': '2023-12-05_10-22-00', 'n_fold': '5', 'test_fold': '0', 'seed': '42', 'img_size': '512', 'model_name': 'tf_efficientnet_b0_ns', 'checkpoint_path': '/kaggle/input/tf-efficientnet-b0-aa-827b6e33-pth/tf_efficientnet_b0_aa-827b6e33.pth', 'num_classes': '5', 'train_batch_size': '8', 'valid_batch_size': '8', 'n_tiles': '4', 'n_tiles_test': '4', 'device': 'cuda:0', 'num_epochs': '20', 'early_stopping': 'True', 'patience': '6', 'optimizer': 'adam', 'scheduler': 'CosineAnnealingLR', 'min_lr': '1e-06', 'T_max': '10', 'momentum': '0.9', 'weight_decay': '0.0001', 'learning_rate': '0.0001', 'tma_weight': '3'}
metrics: {'balanced_acc': 0.668607843137255, 'epoch': 7.0, 'train_loss': 0.649328457639199, 'valid_acc'

In [16]:
"""
model = UBCModel(CONFIG['model_name'], CONFIG['num_classes'], pretrained=False , checkpoint_path=None)
model.load_state_dict(torch.load("/kaggle/input/effnet-version-28/best_model_checkpoint2023-11-21_15-47-39.pth"))
model.to(CONFIG['device']);
df_test = test_on_holdout(model, CONFIG, df_test, TRAIN_DIR, val_size=1, n_tiles=CONFIG["n_tiles_test"])
"""

'\nmodel = UBCModel(CONFIG[\'model_name\'], CONFIG[\'num_classes\'], pretrained=False , checkpoint_path=None)\nmodel.load_state_dict(torch.load("/kaggle/input/effnet-version-28/best_model_checkpoint2023-11-21_15-47-39.pth"))\nmodel.to(CONFIG[\'device\']);\ndf_test = test_on_holdout(model, CONFIG, df_test, TRAIN_DIR, val_size=1, n_tiles=CONFIG["n_tiles_test"])\n'