# Evaluate Model 

In [1]:
import os
import itertools
from functools import partial
from concurrent.futures import ProcessPoolExecutor
from typing import List
from typing_extensions import override

import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader

import lightning as pl
from lightning.pytorch.callbacks import ModelCheckpoint, LearningRateMonitor, EarlyStopping, RichProgressBar
from lightning.pytorch.loggers import WandbLogger

import subprocess
import timm

from tqdm import tqdm

import monai as mn
from transforms.Transform4ClassifierBase import Transform4ClassifierBase
from models.ClassifierBase import Classifier

SEED = 5566
pl.seed_everything(SEED)
torch.set_float32_matmul_precision('medium')

Seed set to 5566


In [2]:
def get_data_dict_part(df_part):
    "Important! Modify this function"
    
    BASE_PATH = #edit
    LABEL_COLUMN_NAME = #edit
    IMG_PATH_COLUMN_NAME = # edit
    
    data_dict = list()
    for i in tqdm(range(len(df_part)), desc="Processing part"):
        row = df_part.iloc[i]

        data_dict.append({
            'img':f'{BASE_PATH}/'+row[f"{IMG_PATH_COLUMN_NAME}"],
            "paths": f'{BASE_PATH}/'+row[f"{IMG_PATH_COLUMN_NAME}"]
        })
    
    return data_dict

def get_data_dict(df, num_cores=2):
    parts = np.array_split(df, num_cores)
    func = partial(get_data_dict_part)
    
    with ProcessPoolExecutor(num_cores) as executor:
        data_dicts = executor.map(func, parts)
    
    return list(itertools.chain(*data_dicts))

### Set parameters

In [3]:
# IMPORTANT BEFORE PROCEEDING --> DO YOU WANT TO DELETE CACHE???
DELETE_CACHE = False

INPUT = './Test.csv' #edit

TIMM_MODEL = "hf-hub:timm/convnext_base.fb_in22k_ft_in1k"

PROJECT =  #edit
TEST_NAME =  #edit
MONAI_CACHE_DIR = f'./cache/{TEST_NAME}/eval' #edit
IMG_SIZE = 256 #edit
BATCH_SIZE = 64 #edit
PRECISION = 'bf16-mixed' 
WEIGHT_PATH = './weights/xxx.ckpt' #edit

os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
os.environ["CUDA_VISIBLE_DEVICES"] = '0' #edit

In [4]:
if DELETE_CACHE:
    if os.path.exists(MONAI_CACHE_DIR):
        subprocess.call(['rm', '-rf', f'{MONAI_CACHE_DIR}'])
        print(f"MONAI's {MONAI_CACHE_DIR} cache directory removed successfully!")
    else:
        print(f"MONAI's {MONAI_CACHE_DIR} cache directory does not exist!")

MONAI's /home/fli40/Data/Frank/cache/Laterality/eval cache directory removed successfully!


### Read input file

In [5]:
df = pd.read_csv(INPUT) 

In [None]:
df

In [7]:
# Split train and val data

eval_dict = get_data_dict(df)

'DataFrame.swapaxes' is deprecated and will be removed in a future version. Please use 'DataFrame.transpose' instead.
Processing part: 100%|██████████| 126469/126469 [00:02<00:00, 42592.57it/s]
Processing part: 100%|██████████| 126469/126469 [00:02<00:00, 42944.82it/s]


### Model setup

In [8]:
# define transforms

eval_transforms = Transform4ClassifierBase(IMG_SIZE).predict

# define datasets

eval_ds = mn.data.PersistentDataset(data=eval_dict, transform=eval_transforms, cache_dir=f"{MONAI_CACHE_DIR}")

# define data loader

eval_dl = DataLoader(eval_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, drop_last=False, persistent_workers=True)

# instantiate the model

model = Classifier(TIMM_MODEL=TIMM_MODEL, BATCH_SIZE=BATCH_SIZE, use_ema=False)

In [9]:
# Load checkpoint

ckpt = torch.load(WEIGHT_PATH)['state_dict']
model.load_state_dict(ckpt)

You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.


<All keys matched successfully>

In [None]:
# SPOT CHECK
test_ds=mn.data.Dataset(data=eval_dict, transform=eval_transforms)

for _ in range(3):
    random_i = np.random.randint(0, len(test_ds))
    for data_ in test_ds[random_i:random_i+1]:
        
        print(f"{data_['paths']}")
        plt.imshow(np.flipud(np.rot90(np.squeeze(np.array(data_['img'])))), cmap='gray')
        plt.show()

### Define Callbacks

In [11]:
progress_bar = RichProgressBar()

### Evaluation

In [12]:
# instantiate trainer

trainer = pl.Trainer(callbacks=[progress_bar], inference_mode=True)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `lightning.pytorch` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default


In [None]:
# evaluate the model

predictions = trainer.predict(model, dataloaders=eval_dl)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [2]


Output()

Divide by zero (a_min == a_max)


In [None]:
df_prediction = pd.concat(predictions)
df_prediction.to_csv('./results/test_results.csv', index=False)