In [55]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [72]:
import os 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import cv2
import pickle
import pytorch_lightning as pl
import torchvision.transforms as T

from PIL import Image
from pathlib import Path
from pytorch_lightning.callbacks import TQDMProgressBar
from tqdm.notebook import tqdm
from typing import List, Union, Callable

from lib.datamodule import ImageNetModule, CatStudyModule
from lib.model import ImageNetModel

# Evaluate a cat classifier

This notebook can be used to evaluate a classifier created with ```07_train_model_catstudy.ipynb```.

In [73]:
# set checkpoint that you would like to evaluate
checkpoint = 'checkpoints/ImageNetModel_Pilot_epoch14_val_acc0.73.ckpt'

# load model 
model = ImageNetModel().load_from_checkpoint(checkpoint)



In [75]:
# set dirs
img_dir = Path('image_files')
excluded_dir = img_dir / 'Excluded'
experimental_dir = img_dir / 'Experimental_study_set'
recommender_dir = img_dir / 'Recommender_test_set'

In [76]:
# load data module
dm = CatStudyModule(
    img_dir=img_dir
)


# load trainer
trainer =  pl.Trainer(
    fast_dev_run=False,
    devices=1,
    accelerator='gpu',
    logger=False)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [77]:
IDX_TO_LABEL = {
    0: 'Domestic cat',
    1: 'Wild cat',
    2: 'Big cat'
}

In [84]:
# set up excluded
dm.set_split('Excluded')

trainer.test(model, dataloaders=dm.predict_dataloader())

You are using a CUDA device ('NVIDIA A100-PCIE-40GB MIG 2g.10gb') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4]


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test/acc            0.4333333373069763
        test/loss           1.8983571529388428
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test/loss': 1.8983571529388428, 'test/acc': 0.4333333373069763}]

In [85]:
dm.set_split('Excluded')

# excluded dataset 
val_res = []

# collect predictions
preds = trainer.predict(model, dataloaders=dm.predict_dataloader())

# combine predictions across batches
y_hats = torch.cat([p[1] for p in preds])
y_probs = torch.cat([p[0] for p in preds])


val_dataset = dm.excluded_dataset
for idx, (yhat, score) in enumerate(zip(y_hats, y_probs)):
    path, ytrue = val_dataset.samples[idx]
    img = val_dataset.load_image(path)
    val_res.append([path[-7:], IDX_TO_LABEL[ytrue], IDX_TO_LABEL[yhat.item()], np.round(score.item(), decimals=4)])


val_df = pd.DataFrame(val_res, columns=['Filename', 'True', 'Pred', 'Score'])
val_df.to_csv('CatStudyFollowUp_Excluded_Dataset.csv', index=False)
val_df

You are using a CUDA device ('NVIDIA A100-PCIE-40GB MIG 2g.10gb') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4]


Predicting: 0it [00:00, ?it/s]

Unnamed: 0,Filename,True,Pred,Score
0,102.jpg,Domestic cat,Wild cat,0.8826
1,108.jpg,Domestic cat,Wild cat,0.8279
2,112.jpg,Domestic cat,Domestic cat,1.0
3,117.jpg,Domestic cat,Domestic cat,1.0
4,119.jpg,Domestic cat,Big cat,0.9605
5,202.jpg,Wild cat,Wild cat,1.0
6,204.jpg,Wild cat,Wild cat,0.9201
7,213.jpg,Wild cat,Wild cat,1.0
8,215.jpg,Wild cat,Big cat,1.0
9,217.jpg,Wild cat,Big cat,0.9998


In [86]:
# set up recommender test set evaluation
dm.set_split('Recommender_test_set')

trainer.test(model, dataloaders=dm.predict_dataloader())

You are using a CUDA device ('NVIDIA A100-PCIE-40GB MIG 2g.10gb') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4]


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test/acc            0.7333333492279053
        test/loss            1.058653473854065
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test/loss': 1.058653473854065, 'test/acc': 0.7333333492279053}]

In [87]:
# set up recommender test set evaluation
dm.set_split('Recommender_test_set')

# validation dataset 
val_res = []

# collect predictions
preds = trainer.predict(model, dataloaders=dm.predict_dataloader())

# combine predictions across batches
y_hats = torch.cat([p[1] for p in preds])
y_probs = torch.cat([p[0] for p in preds])

val_dataset = dm.validation_dataset
for idx, (yhat, score) in enumerate(zip(y_hats, y_probs)):
    path, ytrue = val_dataset.samples[idx]
    img = val_dataset.load_image(path)
    val_res.append([path[-7:], IDX_TO_LABEL[ytrue], IDX_TO_LABEL[yhat.item()], np.round(score.item(), decimals=4)])


val_df = pd.DataFrame(val_res, columns=['Filename', 'True', 'Pred', 'Score'])
val_df.to_csv('CatStudyFollowUp_Validation_Dataset.csv', index=False)
val_df

You are using a CUDA device ('NVIDIA A100-PCIE-40GB MIG 2g.10gb') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4]


Predicting: 0it [00:00, ?it/s]

Unnamed: 0,Filename,True,Pred,Score
0,101.jpg,Domestic cat,Domestic cat,1.0
1,104.jpg,Domestic cat,Big cat,0.9996
2,106.jpg,Domestic cat,Domestic cat,1.0
3,109.jpg,Domestic cat,Wild cat,1.0
4,110.jpg,Domestic cat,Wild cat,0.9991
5,111.jpg,Domestic cat,Wild cat,1.0
6,113.jpg,Domestic cat,Domestic cat,1.0
7,118.jpg,Domestic cat,Wild cat,1.0
8,120.jpg,Domestic cat,Domestic cat,1.0
9,205.jpg,Wild cat,Wild cat,0.8195


In [88]:
# set up exerimental study set evaluation
dm.set_split('Experimental_study_set')

trainer.test(model, dataloaders=dm.predict_dataloader())

You are using a CUDA device ('NVIDIA A100-PCIE-40GB MIG 2g.10gb') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4]


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test/acc            0.7333333492279053
        test/loss           1.8325761556625366
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test/loss': 1.8325761556625366, 'test/acc': 0.7333333492279053}]

In [89]:
# set up exerimental study set evaluation
dm.set_split('Experimental_study_set')

val_res = []

# collect predictions
preds = trainer.predict(model, dataloaders=dm.predict_dataloader())

# combine predictions across batches
y_hats = torch.cat([p[1] for p in preds])
y_probs = torch.cat([p[0] for p in preds])

val_dataset = dm.experimental_dataset
for idx, (yhat, score) in enumerate(zip(y_hats, y_probs)):
    path, ytrue = val_dataset.samples[idx]
    img = val_dataset.load_image(path)
    val_res.append([path[-7:], IDX_TO_LABEL[ytrue], IDX_TO_LABEL[yhat.item()], np.round(score.item(), decimals=4)])


val_df = pd.DataFrame(val_res, columns=['Filename', 'True', 'Pred', 'Score'])
val_df.to_csv('CatStudyFollowUp_Experimental_Dataset.csv', index=False)
val_df

You are using a CUDA device ('NVIDIA A100-PCIE-40GB MIG 2g.10gb') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4]


Predicting: 0it [00:00, ?it/s]

Unnamed: 0,Filename,True,Pred,Score
0,103.jpg,Domestic cat,Domestic cat,0.9922
1,105.jpg,Domestic cat,Domestic cat,1.0
2,107.jpg,Domestic cat,Big cat,0.9997
3,114.jpg,Domestic cat,Domestic cat,1.0
4,115.jpg,Domestic cat,Domestic cat,1.0
5,116.jpg,Domestic cat,Domestic cat,0.7583
6,121.jpg,Domestic cat,Domestic cat,1.0
7,201.jpg,Wild cat,Big cat,1.0
8,203.jpg,Wild cat,Wild cat,0.9363
9,208.jpg,Wild cat,Wild cat,1.0


In [90]:
df_excluded = pd.read_csv('CatStudyFollowUp_Excluded_Dataset.csv')
df_experimental = pd.read_csv('CatStudyFollowUp_Experimental_Dataset.csv')
df_validation = pd.read_csv('CatStudyFollowUp_Validation_Dataset.csv')

df_comb = pd.concat([df_excluded, df_experimental, df_validation])
df_study = pd.concat([df_experimental, df_validation])

In [91]:
# overall accuracy
sum(df_comb['True'] == df_comb['Pred']) / len(df_comb)

0.6333333333333333

In [92]:
# recommender test set accuracy
sum(df_validation['True'] == df_validation['Pred']) / len(df_validation)

0.7333333333333333

In [93]:
# experimental study set accuracy 
sum(df_experimental['True'] == df_experimental['Pred']) / len(df_experimental)

0.7333333333333333

In [94]:
# recommender + experimental set accuracy
sum(df_study['True'] == df_study['Pred']) / len(df_study)

0.7333333333333333