In [1]:
%reload_ext autoreload
%autoreload 2
## sys package
import os, sys
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"  # specify which GPU(s) to be used
sys.path.append("./prediction_models/input/prostate-cancer-grade-assessment/")
## warning off
import warnings
warnings.filterwarnings("ignore")

## general package
import random
from tqdm import tqdm_notebook as tqdm
import numpy as np
import pandas as pd
import torch
from torch.utils.data import *
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import confusion_matrix
import seaborn as sn
import pandas as pd
import matplotlib.pyplot as plt
import pickle
from torch.utils.data import dataloader
# device = torch.device("cuda:4" if torch.cuda.is_available() else "cpu") 
# print(device)

## customized package
from input.inputPipeline_stiching import PandaPatchDataset, crossValDataloader, data_transform
from model.resnext_ssl_stiching import Model_Infer as Model

In [2]:
data_dir = './input/panda-36x256x256-tiles-data-spine-loc/train'
# csv_file = './input/panda-36x256x256-tiles-data-spine/4_fold_whole_train.csv'
# SAMPLE = './input/prostate-cancer-grade-assessment/sample_submission.csv'

In [25]:
models_r = []
models_k = []
weights_r = [f'./train/weights/Resnext50_36patch_overlook_cos_spine_loc_pre_rad/Resnext50_36patch_overlook_cos_spine_loc_pre_rad_{i}_best.pth.tar' for i in [0,3]]
weights_k = [f'./train/weights/Resnext50_medreso_36patch_adam_cosine_bin/Resnext50_medreso_36patch_adam_cosine_bin_{i}_best.pth.tar' for i in [0,3]]
for path in weights_r:
    state_dict = torch.load(path)
    model = Model()
    model.load_state_dict(state_dict)
    model.float()
    model.eval()
    model.cuda()
    models_r.append(model)
for path in weights_k:
    state_dict = torch.load(path)
    model = Model()
    model.load_state_dict(state_dict)
    model.float()
    model.eval()
    model.cuda()
    models_k.append(model)

del state_dict

In [19]:
sz = 256
bs = 1
folds = 4
tsfm = None

In [20]:
names = [] ## record image names and predictions
preds = []
grades = []

# first do radboud
csv_file = './input/panda-36x256x256-tiles-data-spine/radboud_4_fold_train.csv'
dataset = PandaPatchDataset(csv_file, data_dir, sz, transform=tsfm, N = 36, rand = False)
crossValData = crossValDataloader(csv_file, dataset, bs)

for fold in [0]:
    ## Model inference
    with torch.no_grad():    
        _, dataloader0 = crossValData(fold)
        model = models_r[fold]
        for idx, data in enumerate(tqdm(dataloader0)):
            img, name, provider, grade = data['img'], data['name'], data['datacenter'], data['isup_grade']
            img = img.cuda()
            logits = model(img)['out']
            preds.append(logits.sigmoid().sum(1).round().cpu().numpy().astype(int))
            names.append(name)
            grades.append(grade.sum(1).cpu().numpy().astype(int))
#             if idx > 200:
#                 break
del dataset
del crossValData
del dataloader0

# ## next do karolinska
# csv_file = './input/panda-36x256x256-tiles-data-spine/karolinska_4_fold_train.csv'
# dataset = PandaPatchDataset(csv_file, data_dir, sz, transform=tsfm, N = 36, rand = False)
# crossValData = crossValDataloader(csv_file, dataset, bs)

# for fold in [0,1,2,3]:
#     ## Model inference
#     with torch.no_grad():
#         _, dataloader0 = crossValData(fold)
#         model = models_k[fold]
#         for idx, data in enumerate(tqdm(dataloader0)):
#             img, name, provider, grade = data['img'], data['name'], data['datacenter'], data['isup_grade']
#             img = img.cuda()
#             logits = model(img)['out']
#             preds.append(logits.sigmoid().sum(1).round().cpu().numpy().astype(int))
#             names.append(name)
#             grades.append(grade.sum(1).cpu().numpy().astype(int))
# #             if idx > 200:
# #                 break

HBox(children=(FloatProgress(value=0.0, max=1267.0), HTML(value='')))




In [21]:
len(grades),len(preds)

(1267, 1267)

In [22]:
names = np.concatenate(names)
grades = np.concatenate(grades)
preds = np.concatenate(preds)

In [23]:
kappa = cohen_kappa_score(grades, preds, weights='quadratic')
cm = confusion_matrix(grades, preds)
ncm = cm / cm.astype(np.float).sum(axis=1)
print("The average kappa score is {}.".format(kappa))

The average kappa score is 0.8655008358879217.


###  Whole Dataset only for reference

In [38]:
# csv_file = './input/panda-36x256x256-tiles-data-spine/4_fold_train.csv'
csv_file = './input/prostate-cancer-grade-assessment/train.csv'
dataset = PandaPatchDataset(csv_file, data_dir, sz, transform=tsfm, N = 36, rand = False)
dataloader = DataLoader(dataset, batch_size=bs,
                        shuffle=False, num_workers=4)

In [39]:
names = [] ## record image names and predictions
preds = {}
grades = []
for i in range(1 * len(models_k)):
    preds[f'{i}'] = []

with torch.no_grad():
    for idx, data in enumerate(tqdm(dataloader)):
        img, name, provider, grade = data['img'], data['name'], data['datacenter'], data['isup_grade']
        img = img.cuda()
        if provider[0] == "radboud":
            for i,model in enumerate(models_r):
                logits = model(img)['out']
                preds[f'{i}'].append(logits)
            names.append(name)
        else:
            for i,model in enumerate(models_k):
                logits = model(img)['out']
                preds[f'{i}'].append(logits)
            names.append(name)
        grades.append(grade.sum(1).cpu().numpy().astype(int))
        
        if idx >= 100:
            break
        
    names = np.concatenate(names)
    grades = np.concatenate(grades)
    predictions = 0
    for i in range(1 * len(models_k)):
        predictions += torch.cat(preds[f'{i}']).sigmoid().cpu()
    predictions = predictions / (1 * len(models_k))
    predictions = predictions.sum(1).round().numpy().astype(int)

HBox(children=(FloatProgress(value=0.0, max=10616.0), HTML(value='')))




In [40]:
kappa = cohen_kappa_score(grades, predictions, weights='quadratic')
cm = confusion_matrix(grades, predictions)
ncm = cm / cm.astype(np.float).sum(axis=1)
print("The average kappa score is {}.".format(kappa))

The average kappa score is 0.9714361764069804.


In [47]:
# grades[80:100] 
predictions[0:20]

array([0, 0, 4, 4, 0, 0, 1, 2, 1, 0, 0, 1, 1, 3, 2, 3, 0, 2, 0, 4])

### Record a confusion dict with file name

In [None]:
val_preds_p, val_label_p = val_preds_p.cpu().numpy(), val_label_p.cpu().numpy()

In [None]:
cfm_name = {}
for i in range(6):
    for j in range(6):
        if i == j:
            continue
        cfm_name["{}_{}".format(i, j)] = [] ## label i, preds, j
for idx in range(len(names)):
    label = val_label_p[idx]
    preds = val_preds_p[idx]
    if label != preds:
        cfm_name["{}_{}".format(int(label), int(preds))].append(names[idx])

In [None]:
f = open("Resnext50_medreso_36patch_adam_cosine_bin_0_cfm_name.pkl","wb")
pickle.dump(cfm_name,f)
f.close()

In [None]:
df_cm = pd.DataFrame(ncm, range(6), range(6))
sn.set(font_scale=1.4) # for label size
sn.heatmap(df_cm, annot=True, annot_kws={"size": 16}) # font size
plt.show()