# Introduction 

**This is a basic CNN Model training notebook**

It is based on: 
- Thumbnail images
- Basic data transformation (using Albumentation):
    - resizing images to 512x512
    - normalizing pixel values
- CNN Architecture


**Todos:**

- Learn about Dataset & DataLoader
- add augmentations (albumentation)
- gem pooling

In [1]:
!pip install --quiet torch_optimizer

In [2]:
!pip install --quiet mlflow dagshub

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
ydata-profiling 4.3.1 requires dacite>=1.8, but you have dacite 1.6.0 which is incompatible.
ydata-profiling 4.3.1 requires scipy<1.11,>=1.4.1, but you have scipy 1.11.2 which is incompatible.[0m[31m
[0m

In [3]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input/tiles-of-cancer-2048px-scale-0-25'):
    for filename in filenames:
        # print(os.path.join(dirname, filename))
        continue



# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [4]:


import os
import gc
import cv2
import datetime
import math
import copy
import time
import random
import glob
from matplotlib import pyplot as plt
from skimage import io


# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from torch.cuda import amp
import torchvision
import torch_optimizer as torch_optimizer

import optuna
from optuna.trial import TrialState

# Utils
import joblib
from tqdm import tqdm
from collections import defaultdict


from PIL import Image
from joblib import Parallel, delayed
from tqdm.auto import tqdm

# Sklearn Imports
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import balanced_accuracy_score, confusion_matrix, f1_score

# For Image Models
import timm

import dagshub
from getpass import getpass
import mlflow.pytorch 
from mlflow import MlflowClient

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

# For colored terminal text
from colorama import Fore, Back, Style
b_ = Fore.BLUE
sr_ = Style.RESET_ALL

import warnings
# warnings.filterwarnings("ignore")

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"



In [5]:
os.environ["MLFLOW_TRACKING_USERNAME"]="Niggl0n"
os.environ["MLFLOW_TRACKING_PASSWORD"] = "7a3590e8c5558d4598dacc7810befa70a4baac9e"
os.environ['MLFLOW_TRACKING_PROJECTNAME'] = "UBC_Cancer_Classification"
#dagshub.auth.add_app_token("7a3590e8c5558d4598dacc7810befa70a4baac9e")
mlflow.set_tracking_uri(f'https://dagshub.com/' + os.environ['MLFLOW_TRACKING_USERNAME'] + '/' + os.environ['MLFLOW_TRACKING_PROJECTNAME'] + '.mlflow')

In [6]:
def get_or_create_experiment_id(name):
    exp = mlflow.get_experiment_by_name(name)
    if exp is None:
        exp_id = mlflow.create_experiment(name)
        return exp_id
    return exp.experiment_id

mlflow_experiment_id = get_or_create_experiment_id(os.environ['MLFLOW_TRACKING_PROJECTNAME'])
mlflow_experiment_id

'1'

In [7]:
CONFIG = {
    "is_submission": False,
    "weighted_loss": True,
    "datetime_now": datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), 
    "n_fold":5, 
    "test_fold": 0,
    "seed": 42,
    "img_size": 512,
    "model_name": "tf_efficientnet_b0_ns",
    "num_classes": 5,
    "train_batch_size": 8,
    "valid_batch_size": 8,
    "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
    "num_epochs": 50,
    "early_stopping": True,
    "patience": 10,
    "optimizer": 'adam',
    "scheduler": 'CosineAnnealingLR',
    "min_lr": 1e-6,
    "T_max": 30,
    "momentum": 0.9,
    "weight_decay": 1e-4,
}

## 1. Data Preparation

In [8]:
ROOT_DIR = '/kaggle/input/UBC-OCEAN'
TRAIN_DIR = '/kaggle/input/tiles-of-cancer-2048px-scale-0-25/'
TEST_DIR = '/kaggle/input/UBC-OCEAN/test_thumbnails'

# ALT_TEST_DIR = '/kaggle/input/UBC-OCEAN/test_images'
# TMA_TRAIN_DIR = '/kaggle/input/UBC-OCEAN/train_images'

def get_train_file_path(df_train_row):
    return f"{TRAIN_DIR}/{df_train_row.image_id}_thumbnail.png"

def get_test_file_path(image_id):
    if os.path.exists(f"{TEST_DIR}/{image_id}_thumbnail.png"):
        return f"{TEST_DIR}/{image_id}_thumbnail.png"
    else:
        return f"{ALT_TEST_DIR}/{image_id}.png"



In [9]:
train_images = sorted(glob.glob(f"{TRAIN_DIR}/*.png"))
df_train = pd.read_csv("/kaggle/input/UBC-OCEAN/train.csv")
print(df_train.shape)
df_train['file_path'] = df_train.apply(lambda row: get_train_file_path(row), axis=1)
# only consider WSI / Thumbnail images
#df_train = df_train[ 
#    df_train["file_path"].isin(train_images) ].reset_index(drop=True)
print(df_train.shape)

# encode to numericalt target
encoder = LabelEncoder()
df_train['target_label'] = encoder.fit_transform(df_train['label'])

# save encoder
with open("label_encoder_"+ CONFIG["datetime_now"] +".pkl", "wb") as fp:
    joblib.dump(encoder, fp)
    
# use stratified K Fold for crossvalidation 
skf = StratifiedKFold(n_splits=CONFIG['n_fold'], shuffle=True, random_state=CONFIG["seed"])

for fold, ( _, val_) in enumerate(skf.split(X=df_train, y=df_train.target_label)):
    df_train.loc[val_ , "kfold"] = int(fold)
df_train.head()

(538, 5)
(538, 6)


Unnamed: 0,image_id,label,image_width,image_height,is_tma,file_path,target_label,kfold
0,4,HGSC,23785,20008,False,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,2,3.0
1,66,LGSC,48871,48195,False,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,3,2.0
2,91,HGSC,3388,3388,True,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,2,4.0
3,281,LGSC,42309,15545,False,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,3,2.0
4,286,EC,37204,30020,False,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,1,2.0


In [10]:
df_test = pd.read_csv("/kaggle/input/UBC-OCEAN/test.csv")
df_test['file_path'] = df_test['image_id'].apply(get_test_file_path)
df_test["target_label"] = 0 
df_test

Unnamed: 0,image_id,image_width,image_height,file_path,target_label
0,41,28469,16987,/kaggle/input/UBC-OCEAN/test_thumbnails/41_thu...,0


In [11]:
class CancerTilesDataset(Dataset):
    def __init__(
        self,
        df_data,
        path_img_dir: str =  '',
        transforms = None,
        mode: str = 'train',
        labels_lut = None,
        white_thr: int = 225,
        thr_max_bg: float = 0.2,
        split: float = 0.90
    ):
        assert os.path.isdir(path_img_dir)
        self.path_img_dir = path_img_dir
        self.transforms = transforms
        self.mode = mode
        self.white_thr = white_thr
        self.thr_max_bg = thr_max_bg
        self.split = split

        self.data = df_data
        self.labels_unique = sorted(self.data["label"].unique())
        self.labels_lut = labels_lut or {lb: i for i, lb in enumerate(self.labels_unique)}
        # shuffle data
        self.data = self.data.sample(frac=1, random_state=42).reset_index(drop=True)

        # split dataset
        assert 0.0 <= self.split <= 1.0
        frac = int(self.split * len(self.data))
        self.data = self.data[:frac] if mode in ["train", "test"] else self.data[frac:]
        self.img_dirs = [glob.glob(os.path.join(path_img_dir, str(idx), "*.png")) for idx in self.data["image_id"]]
        #print(f"missing: {sum([not os.path.isfile(os.path.join(self.path_img_dir, im))
        #                       for im in self.img_names])}")
        # self.labels = list(self.data['label'])
        self.labels =  self.data.target_label.values

    def __getitem__(self, idx: int) -> tuple:
        random.shuffle(self.img_dirs[idx])
        for img_path in self.img_dirs[idx]:
            assert os.path.isfile(img_path), f"missing: {img_path}"
            tile = cv2.imread(img_path)
            tile = cv2.cvtColor(tile, cv2.COLOR_BGR2RGB)
        
            # tile = np.array(Image.open(img_path))[..., :3]
            black_bg = np.sum(tile, axis=2) == 0
            tile[black_bg, :] = 255
            mask_bg = np.mean(tile, axis=2) > self.white_thr
            if np.sum(mask_bg) < (np.prod(mask_bg.shape) * self.thr_max_bg):
                break

        # augmentation
        if self.transforms:
            tile = self.transforms(image=tile)["image"]
        #print(f"img dim: {img.shape}")
        return {
            "image": tile,
            "label": torch.tensor(self.labels[idx], dtype=torch.long)
               }

    def __len__(self) -> int:
        return len(self.data)

In [12]:
data_transforms = {
    "train": A.Compose([
        A.RandomResizedCrop(CONFIG['img_size'], CONFIG['img_size'], scale=(0.8, 1.0)),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.2),
        A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=15, p=0.25),
        A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.25),
        A.CoarseDropout(p=0.2),
        #A.Cutout(p=0.2),
        A.Normalize(
            mean=[0.485, 0.456, 0.406], 
            std=[0.229, 0.224, 0.225], 
            max_pixel_value=255.0, 
            p=1.0
        ),
        ToTensorV2()], p=1.),
    
    "valid": A.Compose([
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),
        A.Normalize(
            mean=[0.485, 0.456, 0.406], 
            std=[0.229, 0.224, 0.225], 
            max_pixel_value=255.0, 
            p=1.0
        ),
        ToTensorV2()], p=1.)
}

## 2. Model Creation

In [13]:
class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeM, self).__init__()
        self.p = nn.Parameter(torch.ones(1)*p)
        self.eps = eps

    def forward(self, x):
        return self.gem(x, p=self.p, eps=self.eps)
        
    def gem(self, x, p=3, eps=1e-6):
        return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)
        
    def __repr__(self):
        return self.__class__.__name__ + \
                '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + \
                ', ' + 'eps=' + str(self.eps) + ')'


class EfficientNetB5(nn.Module):
    '''
    EfficientNet B0 fine-tune.
    '''
    def __init__(self, model_name, num_classes, pretrained=False, checkpoint_path=None):
        '''
        Fine tune for EfficientNetB0
        Args
            n_classes : int - Number of classification categories.
            learnable_modules : tuple - Names of the modules to fine-tune.
        Return
            
        '''
        super(EfficientNetB5, self).__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained, checkpoint_path=checkpoint_path)

        in_features = self.model.classifier.in_features
        self.model.classifier = nn.Identity()
        self.model.global_pool = nn.Identity()
        self.pooling = GeM()
        self.linear = nn.Linear(in_features, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, images):
        """
        Forward function for the fine-tuned model
        Args
            x: 
        Return
            result
        """
        features = self.model(images)
        pooled_features = self.pooling(features).flatten(1)
        output = self.linear(pooled_features)
        return output


## 3. Training

In [14]:
class EarlyStopping:
    def __init__(self, patience=5, verbose=False, delta=0, path='checkpoint.pth', trace_func=print):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = float('inf')
        self.delta = delta
        self.path = path
        self.trace_func = trace_func

    def __call__(self, val_loss, model):
        score = -val_loss
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decreases.'''
        if self.verbose:
            self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model to path {self.path}')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss


In [15]:
def fetch_scheduler(optimizer):
    if CONFIG['scheduler'] == 'CosineAnnealingLR':
        scheduler = lr_scheduler.CosineAnnealingLR(optimizer,T_max=CONFIG['T_max'], 
                                                   eta_min=CONFIG['min_lr'], verbose=False)
    elif CONFIG['scheduler'] == 'CosineAnnealingWarmRestarts':
        scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer,T_0=CONFIG['T_0'], 
                                                             eta_min=CONFIG['min_lr'], verbose=False)
    elif CONFIG['scheduler'] == 'ReduceLROnPlateau':
        scheduler =  ReduceLROnPlateau(optimizer, mode='min', factor=kwargs.get('factor', 0.1), patience=kwargs.get('patience', 5), verbose=False)
    elif CONFIG['scheduler'] == None:
        return None
        
    return scheduler

def get_optimizer(optimizer_name, model):
    if optimizer_name.lower() == "adam":
        CONFIG['learning_rate'] = 1e-4
        CONFIG['weight_decay'] = 1e-3
        optimizer = optim.Adam(model.parameters(), lr=CONFIG['learning_rate'],  weight_decay=CONFIG['weight_decay'])
    elif optimizer_name.lower() == "sgd":
        CONFIG['learning_rate'] = 1e-3
        CONFIG['weight_decay'] = 1e-3
        CONFIG['momentum'] = 1e-3
        optimizer = optim.SGD(model.parameters(), lr=CONFIG['learning_rate'], momentum=CONFIG['momentum'], weight_decay=CONFIG['weight_decay'])
    elif optimizer_name.lower() == "radam":
        CONFIG['learning_rate'] = 1e-3
        CONFIG['weight_decay'] = 0
        CONFIG['betas'] = (0.9, 0.999)
        CONFIG['eps'] = 1e-8
        optimizer = torch_optimizer.RAdam(
            model.parameters(),
            lr= CONFIG['learning_rate'],
            betas=CONFIG['betas'],
            eps=CONFIG['eps'],
            weight_decay=CONFIG['weight_decay'],
        )
    else:
        raise ValueError("Invalid Optimizer given!")
    return optimizer
    

In [16]:
def train_one_epoch(model, train_loader, optimizer, criterion, device, writer, epoch, scheduler=None):
    if torch.cuda.is_available():
        print("[INFO] Using GPU: {}\n".format(torch.cuda.get_device_name()))
    model.train()
    train_loss = 0.0
    bar = tqdm(enumerate(train_loader), total=len(train_loader))
    for step, data in bar:
        images = data['image'].to(device, dtype=torch.float)
        labels = data['label'].to(device, dtype=torch.long)

        optimizer.zero_grad()
        outputs = model(images)
        
        # crossentropy loss
        loss = criterion(outputs, labels)
        # Focal Loss
        #criterion = FocalLoss(gamma=0.7)
        #m = torch.nn.Softmax(dim=-1)
        #loss = criterion(m(outputs), labels)
        
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * images.size(0)
        # Update learning rate using the scheduler
        if scheduler:
            scheduler.step()
        
        # Log the training loss to TensorBoard
        writer.add_scalar('loss/train_batch', loss.item(), epoch * len(train_loader) + step)
    
    train_loss /= len(train_loader.dataset)
    # Log the average training loss for the epoch to TensorBoard
    writer.add_scalar('loss/train_epoch', train_loss, epoch)
    # gc.collect()
    return train_loss

def validate_one_epoch(model, valid_loader, criterion, device, writer, epoch):
    model.eval()
    valid_loss = 0.0
    valid_acc = 0.0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        bar_val = tqdm(enumerate(valid_loader), total=len(valid_loader))
        for step, data in bar_val:
            images = data['image'].to(device, dtype=torch.float)
            labels = data['label'].to(device, dtype=torch.long)
            outputs = model(images)
            
            # crossentropy loss
            loss = criterion(outputs, labels)
            # Focal Loss
            #criterion = FocalLoss(gamma=0.7)
            #m = torch.nn.Softmax(dim=-1)
            #loss = criterion(m(outputs), labels)
        
            valid_loss += loss.item() * images.size(0)
            _, predicted = torch.max(model.softmax(outputs), 1)
            acc = torch.sum( predicted == labels )
            valid_acc  += acc.item()
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
        
            writer.add_scalar('loss/valid_batch', loss.item(), epoch * len(valid_loader) + step)
            writer.add_scalar('acc/valid_batch', acc.item(), epoch * len(valid_loader) + step)
    valid_loss /= len(valid_loader.dataset)
    valid_acc /= len(valid_loader.dataset)
    bal_acc = balanced_accuracy_score(all_labels, all_preds)
    # At the end of your validation loop:
    macro_f1 = f1_score(all_labels, all_preds, average='macro')
    micro_f1 = f1_score(all_labels, all_preds, average='micro')
    weighted_f1 = f1_score(all_labels, all_preds, average='weighted')

    # Logging to TensorBoard
    writer.add_scalar('loss/val_epoch', valid_loss, epoch)
    writer.add_scalar('acc/val_epoch', valid_acc, epoch)
    writer.add_scalar('balanced_acc/val_epoch', bal_acc, epoch)
    writer.add_scalar('F1/macro', macro_f1, epoch)
    writer.add_scalar('F1/micro', micro_f1, epoch)
    writer.add_scalar('F1/weighted', weighted_f1, epoch)
    # in order to put multiple lines within one graph
    #writer.add_scalars('run_14h', {'xsinx':i*np.sin(i/r),
    #                        'xcosx':i*np.cos(i/r),
    #                        'tanx': np.tan(i/r)}, i)
    return valid_loss, valid_acc, bal_acc, weighted_f1

def train_model(model, train_loader, valid_loader, optimizer, criterion, device, num_epochs, scheduler, save_model_path=None):
    model_name = "model_epochs" + str(CONFIG["num_epochs"]) + "_bs"+str(CONFIG["train_batch_size"] )+ "_opt" +CONFIG["optimizer"]+ "_sched" + CONFIG["scheduler"] + "_lr"+str(CONFIG["learning_rate"])+ "_wd" + str(CONFIG["weight_decay"])
    print(f"Training model: {model_name}")
    datetime_now =  datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    if not save_model_path:
        save_model_path = 'best_model_checkpoint' + datetime_now + '.pth'
    print(f"Path for saving model: {save_model_path}")
    # Initialize TensorBoard writer
    writer = SummaryWriter('logs/fit/' + model_name)
    early_stopping = EarlyStopping(patience=CONFIG["patience"], verbose=True, path=save_model_path)
    #if scheduler_type:
    #    print(f"Define {scheduler_type} scheduler")
    #    scheduler = get_lr_scheduler(optimizer, scheduler_type, num_epochs=num_epochs)
    
    for epoch in range(num_epochs):
        train_loss = train_one_epoch(model, train_loader, optimizer, criterion, device, writer, epoch, scheduler)
        valid_loss, valid_acc, bal_acc, weighted_f1 = validate_one_epoch(model, valid_loader, criterion, device, writer, epoch)
        print(f"Epoch {epoch+1}/{num_epochs} - Train loss: {train_loss:.4f}, Validation loss: {valid_loss:.4f}, Validation acc: {valid_acc:.4f}, Balanced acc: {bal_acc:.4f}, Weighted F1-Score: {weighted_f1:.4f}")
        
        # Log metrics for each epoch
        mlflow.log_metrics({
            'epoch': epoch,
            'train_loss': train_loss,
            'valid_loss': valid_loss,
            'valid_acc': valid_acc,
            'balanced_acc': bal_acc,
            'weighted_f1': weighted_f1
        }, step=epoch)
    
        # Call early stopping
        if CONFIG["early_stopping"]:
            early_stopping(valid_loss, model)
            if early_stopping.early_stop:
                print("Early stopping")
                break
            
    writer.close()
    return train_loss, valid_loss, valid_acc, save_model_path
    # Load the last checkpoint with the best model
    #model.load_state_dict(torch.load('best_model_checkpoint.pth'))



In [17]:
def test_on_holdout(model, CONFIG, df_test, TRAIN_DIR=None, val_size=1.0):
    if not CONFIG["is_submission"]:
        test_dataset = CancerTilesDataset(df_test, TRAIN_DIR, transforms=data_transforms["valid"], mode="test", split=1.0)
        test_loader = DataLoader(test_dataset, batch_size=CONFIG['valid_batch_size'], 
                                  num_workers=2, shuffle=False, pin_memory=True)

        preds = []
        labels_list = []
        test_acc = 0.0

        with torch.no_grad():
            bar = tqdm(enumerate(test_loader), total=len(test_loader))
            for step, data in bar: 
                # print(step)
                images = data['image'].to(CONFIG["device"], dtype=torch.float)
                labels = data['label'].to(CONFIG["device"], dtype=torch.long)

                batch_size = images.size(0)
                outputs = model(images)
                _, predicted = torch.max(model.softmax(outputs), 1)
                preds.append(predicted.detach().cpu().numpy() )
                labels_list.append(labels.detach().cpu().numpy() )
                acc = torch.sum(predicted == labels )
                test_acc  += acc.item()
        test_acc /= len(test_loader.dataset)
        preds = np.concatenate(preds).flatten()
        labels_list = np.concatenate(labels_list).flatten()
        pred_labels = encoder.inverse_transform( preds )
        
        # Calculate Balanced Accuracy
        bal_acc = balanced_accuracy_score(labels_list, preds)
        # Calculate Confusion Matrix
        conf_matrix = confusion_matrix(labels_list, preds)
        macro_f1 = f1_score(labels_list, preds, average='macro')

    
        print(f"Test Accuracy: {test_acc}")
        print(f"Balanced Accuracy: {bal_acc}")
        print(f"Confusion Matrix: {conf_matrix}")
        
        # add to validation dataframe
        df_test["pred"] = preds
        df_test["pred_labels"] = pred_labels
        
        mlflow.log_metrics({
            'test_acc': test_acc,
            'test_balanced_acc': bal_acc,
            'test_f1_score': macro_f1,
        })
        return df_test
    else:
        print("Skip validation on training set due to submission!")
        return None

In [18]:
def convert_dict_to_tensor(dict_):
    """Converts the values of a dict into a PyTorch tensor."""

    # Create a new PyTorch tensor
    tensor = torch.empty(len(dict_))

    # Iterate over the dict and for each key-value pair, convert the value to a PyTorch tensor and add it to the new tensor
    for i, (key, value) in enumerate(dict_.items()):
        tensor[i] = value

    # Return the new tensor
    return tensor

def get_class_weights(df_train):
    label_counts = df_train.target_label.value_counts().sort_index().to_dict()
    ratios_dict = {}
    for key,val in label_counts.items():
        ratios_dict[key] = val / df_train.shape[0]
    ratios_dict
    weights = {}
    sum_weights = 0
    for key, val in ratios_dict.items():
        weights[key] = 1 / val
        sum_weights +=  1 / val
    for key, val in weights.items():
        weights[key] = val / sum_weights
    weight_tensor = convert_dict_to_tensor(weights)
    return weight_tensor



### Training N-Fold Models

In [19]:
if CONFIG["weighted_loss"]:
    class_weights = get_class_weights(df_train).to(CONFIG['device'], dtype=torch.float)
    print(f"Class weights: {class_weights}")
else:
    class_weights=None
criterion = nn.CrossEntropyLoss(weight=class_weights)


Class weights: tensor([0.1538, 0.1228, 0.0686, 0.3239, 0.3310], device='cuda:0')


In [20]:
def get_dataloaders(df):
    # df_train = df[df["kfold"]!=fold].reset_index(drop=True)
    train_dataset = CancerTilesDataset(df_train, TRAIN_DIR, transforms=data_transforms["train"], mode="train")
    train_loader = DataLoader(train_dataset, batch_size=CONFIG['train_batch_size'], 
                              num_workers=2, shuffle=False, pin_memory=True)
    valid_dataset = CancerTilesDataset(df_train, TRAIN_DIR, transforms=data_transforms["valid"], mode="valid")
    valid_loader = DataLoader(valid_dataset, batch_size=CONFIG['valid_batch_size'], 
                              num_workers=2, shuffle=False, pin_memory=True)
    print(f"Len Train Dataset: {len(train_dataset)}, Len Validation Dataset: {len(valid_dataset)}" )
    return train_loader, valid_loader, df_train


In [21]:
def print_logged_info(r):
    tags = {k: v for k, v in r.data.tags.items() if not k.startswith("mlflow.")}
    artifacts = [f.path for f in MlflowClient().list_artifacts(r.info.run_id, "model")]
    print(f"run_id: {r.info.run_id}")
    print(f"artifacts: {artifacts}")
    print(f"params: {r.data.params}")
    print(f"metrics: {r.data.metrics}")
    print(f"tags: {tags}")


In [22]:
# separate train and test dataset
df_test = df_train[df_train["kfold"]==CONFIG["test_fold"]].reset_index(drop=True)
df_train = df_train[df_train["kfold"]!=CONFIG["test_fold"]].reset_index(drop=True)



with mlflow.start_run(experiment_id=mlflow_experiment_id) as run:
    train_loader, valid_loader, df_train_fold = get_dataloaders(df_train.copy())

    checkpoint_path='//kaggle/input/tf-efficientnet-b0-aa-827b6e33-pth/tf_efficientnet_b0_aa-827b6e33.pth'
    model = EfficientNetB5(CONFIG['model_name'], CONFIG['num_classes'], pretrained=False , checkpoint_path=checkpoint_path)
    model.to(CONFIG['device']);

    optimizer = get_optimizer(CONFIG["optimizer"], model)
    scheduler = fetch_scheduler(optimizer)

    _, _, _, save_model_path = train_model(model, train_loader, valid_loader, optimizer, criterion, CONFIG["device"], CONFIG["num_epochs"], scheduler)
    model.load_state_dict(torch.load(save_model_path))

    print("Validate on Holdout Set:")
    df_test = test_on_holdout(model, CONFIG, df_test, TRAIN_DIR, val_size=1)
    df_test_file_path = "df_test_results.csv"
    df_test.to_csv(df_test_file_path, index=False)

    mlflow.log_params(CONFIG)
    mlflow.pytorch.log_model(model, "model")
    mlflow.log_artifact(save_model_path)
    mlflow.log_artifact(df_test_file_path)

print_logged_info(mlflow.get_run(run_id=run.info.run_id))

Len Train Dataset: 387, Len Validation Dataset: 43


  model = create_fn(


Training model: model_epochs50_bs8_optadam_schedCosineAnnealingLR_lr0.0001_wd0.001
Path for saving model: best_model_checkpoint2023-11-14_19-30-15.pth
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 1/50 - Train loss: 1.7240, Validation loss: 1.5662, Validation acc: 0.3953, Balanced acc: 0.2511, Weighted F1-Score: 0.3562
Validation loss decreased (inf --> 1.566175). Saving model to path best_model_checkpoint2023-11-14_19-30-15.pth
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 2/50 - Train loss: 1.5146, Validation loss: 1.4381, Validation acc: 0.3023, Balanced acc: 0.2808, Weighted F1-Score: 0.2725
Validation loss decreased (1.566175 --> 1.438089). Saving model to path best_model_checkpoint2023-11-14_19-30-15.pth
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 3/50 - Train loss: 1.4173, Validation loss: 1.3969, Validation acc: 0.4186, Balanced acc: 0.3403, Weighted F1-Score: 0.4014
Validation loss decreased (1.438089 --> 1.396941). Saving model to path best_model_checkpoint2023-11-14_19-30-15.pth
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 4/50 - Train loss: 1.3672, Validation loss: 1.0875, Validation acc: 0.5814, Balanced acc: 0.5746, Weighted F1-Score: 0.5787
Validation loss decreased (1.396941 --> 1.087461). Saving model to path best_model_checkpoint2023-11-14_19-30-15.pth
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 5/50 - Train loss: 1.3448, Validation loss: 1.1370, Validation acc: 0.5814, Balanced acc: 0.5960, Weighted F1-Score: 0.5658
EarlyStopping counter: 1 out of 10
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 6/50 - Train loss: 1.2050, Validation loss: 1.1477, Validation acc: 0.6279, Balanced acc: 0.5929, Weighted F1-Score: 0.6250
EarlyStopping counter: 2 out of 10
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 7/50 - Train loss: 1.2010, Validation loss: 1.1084, Validation acc: 0.5349, Balanced acc: 0.5221, Weighted F1-Score: 0.5266
EarlyStopping counter: 3 out of 10
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 8/50 - Train loss: 1.1367, Validation loss: 1.0511, Validation acc: 0.6047, Balanced acc: 0.5603, Weighted F1-Score: 0.5922
Validation loss decreased (1.087461 --> 1.051108). Saving model to path best_model_checkpoint2023-11-14_19-30-15.pth
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 9/50 - Train loss: 1.1152, Validation loss: 1.0587, Validation acc: 0.5581, Balanced acc: 0.5286, Weighted F1-Score: 0.5431
EarlyStopping counter: 1 out of 10
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 10/50 - Train loss: 1.0594, Validation loss: 0.9769, Validation acc: 0.6047, Balanced acc: 0.5768, Weighted F1-Score: 0.6112
Validation loss decreased (1.051108 --> 0.976902). Saving model to path best_model_checkpoint2023-11-14_19-30-15.pth
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 11/50 - Train loss: 1.0263, Validation loss: 0.9745, Validation acc: 0.6977, Balanced acc: 0.7554, Weighted F1-Score: 0.7080
Validation loss decreased (0.976902 --> 0.974500). Saving model to path best_model_checkpoint2023-11-14_19-30-15.pth
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 12/50 - Train loss: 0.9791, Validation loss: 0.9583, Validation acc: 0.6512, Balanced acc: 0.6594, Weighted F1-Score: 0.6431
Validation loss decreased (0.974500 --> 0.958319). Saving model to path best_model_checkpoint2023-11-14_19-30-15.pth
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 13/50 - Train loss: 0.9694, Validation loss: 0.8928, Validation acc: 0.5814, Balanced acc: 0.5917, Weighted F1-Score: 0.5947
Validation loss decreased (0.958319 --> 0.892842). Saving model to path best_model_checkpoint2023-11-14_19-30-15.pth
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 14/50 - Train loss: 1.0062, Validation loss: 1.0044, Validation acc: 0.6279, Balanced acc: 0.6054, Weighted F1-Score: 0.6235
EarlyStopping counter: 1 out of 10
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 15/50 - Train loss: 0.8422, Validation loss: 0.8132, Validation acc: 0.6744, Balanced acc: 0.6794, Weighted F1-Score: 0.6669
Validation loss decreased (0.892842 --> 0.813174). Saving model to path best_model_checkpoint2023-11-14_19-30-15.pth
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 16/50 - Train loss: 0.8554, Validation loss: 0.7996, Validation acc: 0.6744, Balanced acc: 0.7094, Weighted F1-Score: 0.6733
Validation loss decreased (0.813174 --> 0.799613). Saving model to path best_model_checkpoint2023-11-14_19-30-15.pth
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 17/50 - Train loss: 0.8729, Validation loss: 0.9911, Validation acc: 0.5581, Balanced acc: 0.5810, Weighted F1-Score: 0.5308
EarlyStopping counter: 1 out of 10
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 18/50 - Train loss: 0.8309, Validation loss: 0.9509, Validation acc: 0.6279, Balanced acc: 0.6143, Weighted F1-Score: 0.6158
EarlyStopping counter: 2 out of 10
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 19/50 - Train loss: 0.8554, Validation loss: 0.7593, Validation acc: 0.6512, Balanced acc: 0.7246, Weighted F1-Score: 0.6310
Validation loss decreased (0.799613 --> 0.759252). Saving model to path best_model_checkpoint2023-11-14_19-30-15.pth
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 20/50 - Train loss: 0.8518, Validation loss: 0.9616, Validation acc: 0.6744, Balanced acc: 0.6102, Weighted F1-Score: 0.6734
EarlyStopping counter: 1 out of 10
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 21/50 - Train loss: 0.7696, Validation loss: 0.8870, Validation acc: 0.6279, Balanced acc: 0.6308, Weighted F1-Score: 0.6234
EarlyStopping counter: 2 out of 10
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 22/50 - Train loss: 0.7560, Validation loss: 1.0373, Validation acc: 0.6047, Balanced acc: 0.5768, Weighted F1-Score: 0.6263
EarlyStopping counter: 3 out of 10
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 23/50 - Train loss: 0.7927, Validation loss: 0.7636, Validation acc: 0.6744, Balanced acc: 0.6579, Weighted F1-Score: 0.6734
EarlyStopping counter: 4 out of 10
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 24/50 - Train loss: 0.7958, Validation loss: 0.8420, Validation acc: 0.6047, Balanced acc: 0.6810, Weighted F1-Score: 0.5814
EarlyStopping counter: 5 out of 10
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 25/50 - Train loss: 0.7368, Validation loss: 0.7800, Validation acc: 0.6744, Balanced acc: 0.7229, Weighted F1-Score: 0.6709
EarlyStopping counter: 6 out of 10
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 26/50 - Train loss: 0.7224, Validation loss: 0.7486, Validation acc: 0.7209, Balanced acc: 0.7665, Weighted F1-Score: 0.7285
Validation loss decreased (0.759252 --> 0.748604). Saving model to path best_model_checkpoint2023-11-14_19-30-15.pth
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 27/50 - Train loss: 0.7858, Validation loss: 0.7230, Validation acc: 0.7442, Balanced acc: 0.7865, Weighted F1-Score: 0.7385
Validation loss decreased (0.748604 --> 0.722971). Saving model to path best_model_checkpoint2023-11-14_19-30-15.pth
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 28/50 - Train loss: 0.7370, Validation loss: 0.6924, Validation acc: 0.7209, Balanced acc: 0.7540, Weighted F1-Score: 0.7181
Validation loss decreased (0.722971 --> 0.692381). Saving model to path best_model_checkpoint2023-11-14_19-30-15.pth
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 29/50 - Train loss: 0.6938, Validation loss: 0.7718, Validation acc: 0.6512, Balanced acc: 0.6817, Weighted F1-Score: 0.6355
EarlyStopping counter: 1 out of 10
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 30/50 - Train loss: 0.7518, Validation loss: 0.7758, Validation acc: 0.7442, Balanced acc: 0.7651, Weighted F1-Score: 0.7387
EarlyStopping counter: 2 out of 10
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 31/50 - Train loss: 0.6953, Validation loss: 0.5629, Validation acc: 0.8140, Balanced acc: 0.8110, Weighted F1-Score: 0.8174
Validation loss decreased (0.692381 --> 0.562872). Saving model to path best_model_checkpoint2023-11-14_19-30-15.pth
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 32/50 - Train loss: 0.6899, Validation loss: 0.9786, Validation acc: 0.6047, Balanced acc: 0.6071, Weighted F1-Score: 0.5938
EarlyStopping counter: 1 out of 10
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 33/50 - Train loss: 0.6551, Validation loss: 0.8169, Validation acc: 0.7209, Balanced acc: 0.6976, Weighted F1-Score: 0.7220
EarlyStopping counter: 2 out of 10
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 34/50 - Train loss: 0.6162, Validation loss: 0.5713, Validation acc: 0.7674, Balanced acc: 0.7802, Weighted F1-Score: 0.7573
EarlyStopping counter: 3 out of 10
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 35/50 - Train loss: 0.6404, Validation loss: 0.7926, Validation acc: 0.6512, Balanced acc: 0.7246, Weighted F1-Score: 0.6372
EarlyStopping counter: 4 out of 10
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 36/50 - Train loss: 0.5909, Validation loss: 0.8963, Validation acc: 0.6977, Balanced acc: 0.6516, Weighted F1-Score: 0.6964
EarlyStopping counter: 5 out of 10
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 37/50 - Train loss: 0.6461, Validation loss: 0.8027, Validation acc: 0.7674, Balanced acc: 0.7238, Weighted F1-Score: 0.7558
EarlyStopping counter: 6 out of 10
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 38/50 - Train loss: 0.6480, Validation loss: 1.0555, Validation acc: 0.6047, Balanced acc: 0.6197, Weighted F1-Score: 0.5899
EarlyStopping counter: 7 out of 10
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 39/50 - Train loss: 0.6278, Validation loss: 0.7086, Validation acc: 0.6977, Balanced acc: 0.7294, Weighted F1-Score: 0.6821
EarlyStopping counter: 8 out of 10
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 40/50 - Train loss: 0.5880, Validation loss: 0.8409, Validation acc: 0.6512, Balanced acc: 0.6643, Weighted F1-Score: 0.6512
EarlyStopping counter: 9 out of 10
[INFO] Using GPU: Tesla P100-PCIE-16GB



  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 41/50 - Train loss: 0.6038, Validation loss: 0.9719, Validation acc: 0.6047, Balanced acc: 0.5768, Weighted F1-Score: 0.6058
EarlyStopping counter: 10 out of 10
Early stopping
Validate on Holdout Set:


  0%|          | 0/14 [00:00<?, ?it/s]

Test Accuracy: 0.6574074074074074
Balanced Accuracy: 0.6422222222222221
Confusion Matrix: [[14  1  3  0  2]
 [ 0 10  6  5  4]
 [ 2  2 35  4  2]
 [ 1  0  2  6  0]
 [ 0  1  1  1  6]]




run_id: 8f94a2414efd46ffbcc8f8ccaf3e37f2
artifacts: ['model/MLmodel', 'model/conda.yaml', 'model/data', 'model/python_env.yaml', 'model/requirements.txt']
params: {'is_submission': 'False', 'weighted_loss': 'True', 'datetime_now': '2023-11-14_19-30-04', 'n_fold': '5', 'test_fold': '0', 'seed': '42', 'img_size': '512', 'model_name': 'tf_efficientnet_b0_ns', 'num_classes': '5', 'train_batch_size': '8', 'valid_batch_size': '8', 'device': 'cuda:0', 'num_epochs': '50', 'early_stopping': 'True', 'patience': '10', 'optimizer': 'adam', 'scheduler': 'CosineAnnealingLR', 'min_lr': '1e-06', 'T_max': '30', 'momentum': '0.9', 'weight_decay': '0.001', 'learning_rate': '0.0001'}
metrics: {'epoch': 40.0, 'train_loss': 0.603825959894392, 'valid_loss': 0.971877879874651, 'valid_acc': 0.604651162790698, 'balanced_acc': 0.576825396825397, 'weighted_f1': 0.605839231547017, 'test_acc': 0.657407407407407, 'test_balanced_acc': 0.642222222222222, 'test_f1_score': 0.606437193045889}
tags: {}


In [23]:
# model.load_state_dict(torch.load('/kaggle/working/best_model_checkpoint' + CONFIG["datetime_now"] + '.pth'))
df_test = test_on_holdout(model, CONFIG, df_test, TRAIN_DIR, val_size=1)
df_test

  0%|          | 0/14 [00:00<?, ?it/s]

Test Accuracy: 0.6481481481481481
Balanced Accuracy: 0.6448888888888888
Confusion Matrix: [[14  1  3  0  2]
 [ 0  7 12  3  3]
 [ 0  4 36  4  1]
 [ 1  0  3  5  0]
 [ 0  0  1  0  8]]


Unnamed: 0,image_id,label,image_width,image_height,is_tma,file_path,target_label,kfold,pred,pred_labels
0,431,HGSC,39991,40943,False,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,2,0.0,2,HGSC
1,1101,HGSC,26306,18403,False,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,2,0.0,0,CC
2,1943,CC,73730,34949,False,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,0,0.0,2,HGSC
3,2666,EC,53270,44031,False,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,1,0.0,2,HGSC
4,2706,HGSC,71289,22569,False,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,2,0.0,3,LGSC
...,...,...,...,...,...,...,...,...,...,...
103,63367,EC,62905,24783,False,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,1,0.0,3,LGSC
104,63429,EC,67783,29066,False,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,1,0.0,3,LGSC
105,63836,EC,17416,21934,False,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,1,0.0,0,CC
106,63941,HGSC,47123,36600,False,/kaggle/input/tiles-of-cancer-2048px-scale-0-2...,2,0.0,2,HGSC
