In [None]:
## Purpose: To test final model on the test cohorts and evaluate

In [None]:
## Import all required libraries
import os 
import numpy as np
import pandas as pd
import tensorflow as tf
import ast
import matplotlib.pyplot as plt
import scikitplot as skplt
from tensorflow import keras
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB3
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import f1_score
from sklearn.metrics import precision_recall_curve
from sklearn.utils import resample
from numpy import asarray
from numpy import savetxt, loadtxt

In [None]:
## Set current working directory
os.chdir("/media/data/Projects/LargeDuctVsSmallDuct")

In [None]:
## Set project directory
SSDDir = '/home/thomas/Projects/LargeDuctVsSmallDuct'
FiguresDir = SSDDir+'/Figures/InternalTest/'
model_dir = SSDDir+'/saved_models/CrossValidation/'
TestSetDir = 'Tiles_internal/Sets/Test'

In [None]:
## Define positive and negative category
PosCategory = 'small'
NegCategory = 'large'

In [None]:
## Ungroup the tables for each fold
def ungroup_data_table(DataTable):

    Tilenames_new = []
    Tilenames_flatten = []
    Category_new = []
    PatientNo_new = []
    n = 0

    for i in DataTable['Tilenames']:
        Tilenames_new.append(i)
        for a in range(i.count(', ')+1):
            PatientNo_new.append(DataTable.loc[n, 'PatientNo'])
            Category_new.append(DataTable.loc[n, 'Category'])
        n = n + 1

    Tilenames_flatten = [inner for item in Tilenames_new for inner in ast.literal_eval(item)] 
    Ungrouped_DataTable = pd.DataFrame({'PatientNo': PatientNo_new, 'Category': Category_new, 'Tilenames': Tilenames_flatten, })
    return Ungrouped_DataTable

In [None]:
## Ungroup the tables for each fold ver. 2
def ungroup_data_table2(DataTable):

    Tilenames_new = []
    Predictions_new = []
    Tilenames_flatten = []
    Predictions_flatten = []
    Category_new = []
    PatientNo_new = []
    n = 0

    for i in DataTable['Tilenames']:
        Tilenames_new.append(i)
        Predictions_new.extend(DataTable['Predictions'][n])
        for a in range(i.count(', ')+1):
            PatientNo_new.append(DataTable.loc[n, 'PatientNo'])
            Category_new.append(DataTable.loc[n, 'Category'])
        n = n + 1

    Tilenames_flatten = [inner for item in Tilenames_new for inner in ast.literal_eval(item)] 
    Ungrouped_DataTable = pd.DataFrame({'PatientNo': PatientNo_new, 'Category': Category_new, 'Tilenames': Tilenames_flatten, 'Predictions': Predictions_new })
    return Ungrouped_DataTable

In [None]:
## Read TestSet table
TestTable = pd.read_csv('/media/data/Projects/LargeDuctVsSmallDuct/Tables/Int_HD_master_TestTable_grouped.csv')
TestTableTileLevel = ungroup_data_table(TestTable)

In [None]:
## Variable parameters
img_height = 300
img_width = 300
IMAGE_SIZE = [img_height, img_width]

In [None]:
## Define target variable
y = TestTable['Category']

In [None]:
## Create instances of ImageDataGenerator
idg_test = ImageDataGenerator(preprocessing_function=preprocess_input)

In [None]:
## Instantiate ImageDataGenerator
test_data_generator = idg_test.flow_from_dataframe(TestTableTileLevel, directory = TestSetDir,
                                                   x_col = "Tilenames", y_col = "Category",
                                                   batch_size = 64,
                                                   target_size = (img_height, img_width),
                                                   class_mode = 'binary', shuffle = False)

In [None]:
## Load final model
MyModel = keras.models.load_model(model_dir+'/tuned_model_2.h5')

In [None]:
# Load final model and evaluate
results = MyModel.evaluate(test_data_generator)
results = dict(zip(MyModel.metrics_names,results))
    
print('Die Accuracy beträgt ' +str(results['accuracy'])+'.')
print('Der Loss beträgt '+str(results['loss'])+'.')
        
tf.keras.backend.clear_session()

In [None]:
## Evaluate probabilities on test set
predictions = MyModel.predict(test_data_generator)

In [None]:
## Read tile and patient thresholds
Thresholds=loadtxt('/home/thomas/Projects/LargeDuctVsSmallDuct/Tables/Thresholds_CV.csv', dtype=float, delimiter=',').astype(float)

In [None]:
## Find predicted class and append in list on tile level 
predicted_class = []
for i in predictions:
    if i > Thresholds[0]:
        predicted_class.append(PosCategory)
    else:
        predicted_class.append(NegCategory)

In [None]:
## Create tile and patient level prediction tables
PredTableTileLevel = TestTableTileLevel.copy()
PredTableTileLevel['Predictions'] = predictions
PredTableTileLevel['PredictedClass'] = predicted_class

PredTablePatientLevel = PredTableTileLevel.groupby(['PatientNo', 'Category'])['Predictions'].agg(list).reset_index()
PredTablePatientLevel['Predictions_mean'] = PredTablePatientLevel['Predictions'].apply(np.mean)

In [None]:
## Find predicted class and append in list on patient level
predictionsmean = PredTablePatientLevel['Predictions_mean']
predicted_class2 = []
for i in predictionsmean:
    if i > Thresholds[1]:
        predicted_class2.append(PosCategory)
    else:
        predicted_class2.append(NegCategory)
PredTablePatientLevel['PredictedClass'] = predicted_class2

In [None]:
## Copy TestTable with predictions
TestTable_merg = TestTable.copy()
TestTable_merg['Predictions']= PredTablePatientLevel['Predictions']
TestTable_merg=TestTable_merg.drop('TileCount', axis=1)

In [None]:
## Bootstrapping on patient Level
n_iterations = 100
stats = list()
stats2 = list()
accuracy_list1 = []
sensitivity_list1 = []
specificity_list1 = []
ppv_list1 = []
npv_list1 = []
accuracy_list2 = []
sensitivity_list2 = []
specificity_list2 = []
ppv_list2 = []
npv_list2 = []
f1score_list1 = []
f1score_list2 = []

stats_prc = []
tprs_prc = []

stats2_prc = []
tprs2_prc = []

mean_fpr = np.linspace(0, 1, 100)
tprs = []
tprs2 = []

for i in range(n_iterations):
    print(i)
    boot_subset_patient = resample(TestTable_merg)
    boot_subset_patient.reset_index(drop=True, inplace=True)
    boot_subset = ungroup_data_table2(boot_subset_patient)

    predictions_bt = boot_subset['Predictions']
    fpr2, tpr2, threshold2 = roc_curve(boot_subset['Category'], predictions_bt, pos_label=PosCategory)
    roc_auc = auc(fpr2, tpr2)
    stats.append(roc_auc)
    interp_tpr = np.interp(mean_fpr, fpr2, tpr2)
    interp_tpr[0] = 0.0
    tprs.append(interp_tpr)
    
    precision, recall, thresholds = precision_recall_curve(boot_subset['Category'], predictions_bt, pos_label=PosCategory)
    prc_auc = auc(recall, precision)
    stats_prc.append(prc_auc)
    recall = np.flip(recall)
    precision = np.flip(precision)
    interp_tpr_prc = np.interp(mean_fpr, recall, precision)
    interp_tpr_prc[0] = 1.0
    tprs_prc.append(interp_tpr_prc)  
        
    predicted_class = []
    for i in predictions_bt:
        if i > Thresholds[0]:
            predicted_class.append(PosCategory)
        else:
            predicted_class.append(NegCategory)
    
    boot_subset['PredictedClass'] = predicted_class
    
    large_TileNo = boot_subset.loc[boot_subset['Category'] == 'large'].shape[0]
    large_correct = boot_subset.loc[(boot_subset['Category'] == 'large') & (boot_subset['PredictedClass'] == 'large')].shape[0]
    large_allPositive = boot_subset.loc[boot_subset['PredictedClass'] == 'large'].shape[0]
    small_allPositive = boot_subset.loc[boot_subset['PredictedClass'] == 'small'].shape[0]
    large_allNegative = boot_subset.loc[boot_subset['PredictedClass'] == 'small'].shape[0]
    large_correctneg = boot_subset.loc[(boot_subset['Category'] == 'large') & (boot_subset['PredictedClass'] == 'large')].shape[0]

    small_TileNo = boot_subset.loc[boot_subset['Category'] == 'small'].shape[0]
    small_correct = boot_subset.loc[(boot_subset['Category'] == 'small') & (boot_subset['PredictedClass'] == 'small')].shape[0]

    accuracy_list1.append(((large_correct+small_correct)/(large_TileNo+small_TileNo))*100)
    sensitivity_list1.append((small_correct/small_TileNo)*100)
    specificity_list1.append((large_correct/large_TileNo)*100)
    ppv_list1.append((small_correct/small_allPositive)*100)
    npv_list1.append((large_correctneg/large_allPositive)*100)
    f1score_list1.append((f1_score(boot_subset['Category'], boot_subset['PredictedClass'], average = 'macro')))
    
    TableCopy = boot_subset.copy()
    TableCopy = TableCopy.groupby(['PatientNo', 'Category'])['Predictions'].agg(list).reset_index()
    TableCopy['Predictions_mean'] = TableCopy['Predictions'].apply(np.mean)
    fpr3, tpr3, threshold3 = roc_curve(TableCopy['Category'], TableCopy['Predictions_mean'], pos_label=PosCategory)
    roc_auc2 = auc(fpr3, tpr3)
    stats2.append(roc_auc2)
    interp_tpr2 = np.interp(mean_fpr, fpr3, tpr3)
    interp_tpr2[0] = 0.0
    tprs2.append(interp_tpr2)
    
    precision2, recall2, thresholds2 = precision_recall_curve(TableCopy['Category'], TableCopy['Predictions_mean'], pos_label=PosCategory)
    prc_auc2 = auc(recall2, precision2)
    stats2_prc.append(prc_auc2)
    recall2 = np.flip(recall2)
    precision2 = np.flip(precision2)
    interp_tpr2_prc = np.interp(mean_fpr, recall2, precision2)
    interp_tpr2_prc[0] = 1.0
    tprs2_prc.append(interp_tpr2_prc)     

    predicted_class2 = []
    predictionsmean = TableCopy['Predictions_mean']
    for i in predictionsmean:
        if i > Thresholds[1]:
            predicted_class2.append(PosCategory)
        else:
            predicted_class2.append(NegCategory)
    TableCopy['PredictedClass'] = predicted_class2
    
    large_PatientNo = TableCopy.loc[TableCopy['Category'] == 'large'].shape[0]
    large_correct = TableCopy.loc[(TableCopy['Category'] == 'large') & (TableCopy['PredictedClass'] == 'large')].shape[0]
    large_allPositive = TableCopy.loc[TableCopy['PredictedClass'] == 'large'].shape[0]
    small_allPositive = TableCopy.loc[TableCopy['PredictedClass'] == 'small'].shape[0]
    large_allNegative = TableCopy.loc[TableCopy['PredictedClass'] == 'small'].shape[0]
    large_correctneg = TableCopy.loc[(TableCopy['Category'] == 'large') & (TableCopy['PredictedClass'] == 'large')].shape[0]

    small_PatientNo = TableCopy.loc[TableCopy['Category'] == 'small'].shape[0]
    small_correct = TableCopy.loc[(TableCopy['Category'] == 'small') & (TableCopy['PredictedClass'] == 'small')].shape[0]

    accuracy_list2.append(((large_correct+small_correct)/(large_PatientNo+small_PatientNo))*100)
    sensitivity_list2.append((small_correct/small_PatientNo)*100)
    specificity_list2.append((large_correct/large_PatientNo)*100)
    ppv_list2.append((small_correct/small_allPositive)*100)
    npv_list2.append((large_correctneg/large_allPositive)*100)
    f1score_list2.append((f1_score(TableCopy['Category'], TableCopy['PredictedClass'], average = 'macro')))

## Calculate confidence interval ROC
alpha = 0.95
p = ((1.0-alpha)/2.0) * 100
lower = max(0.0, np.percentile(stats, p))
p = (alpha+((1.0-alpha)/2.0)) * 100
upper = min(1.0, np.percentile(stats, p))

p = ((1.0-alpha)/2.0) * 100
lower2 = max(0.0, np.percentile(stats2, p))
p = (alpha+((1.0-alpha)/2.0)) * 100
upper2 = min(1.0, np.percentile(stats2, p))

## Calculate confidence boundaries ROC
alpha = 0.95
p = ((1.0-alpha)/2.0) * 100
roc_lower = np.percentile(tprs, p, axis=0)
p = (alpha+((1.0-alpha)/2.0)) * 100
roc_upper = np.percentile(tprs, p, axis=0)

alpha = 0.95
p = ((1.0-alpha)/2.0) * 100
roc_lower2 = np.percentile(tprs2, p, axis=0)
p = (alpha+((1.0-alpha)/2.0)) * 100
roc_upper2 = np.percentile(tprs2, p, axis=0)


## Calculate confidence interval PRC
alpha = 0.95
p = ((1.0-alpha)/2.0) * 100
lower_prc = max(0.0, np.percentile(stats_prc, p))
p = (alpha+((1.0-alpha)/2.0)) * 100
upper_prc = min(1.0, np.percentile(stats_prc, p))

p = ((1.0-alpha)/2.0) * 100
lower2_prc = max(0.0, np.percentile(stats2_prc, p))
p = (alpha+((1.0-alpha)/2.0)) * 100
upper2_prc = min(1.0, np.percentile(stats2_prc, p))

## Calculate confidence boundaries PRC
alpha = 0.95
p = ((1.0-alpha)/2.0) * 100
roc_lower_prc = np.percentile(tprs_prc, p, axis=0)
p = (alpha+((1.0-alpha)/2.0)) * 100
roc_upper_prc = np.percentile(tprs_prc, p, axis=0)

alpha = 0.95
p = ((1.0-alpha)/2.0) * 100
roc_lower2_prc = np.percentile(tprs2_prc, p, axis=0)
p = (alpha+((1.0-alpha)/2.0)) * 100
roc_upper2_prc = np.percentile(tprs2_prc, p, axis=0)

In [None]:
## Plot ROC on tile level
fpr2, tpr2, thresholds2 = roc_curve(PredTableTileLevel['Category'], PredTableTileLevel['Predictions'], pos_label=PosCategory)
auc_tile = auc(fpr2, tpr2)
plt.figure(2)
plt.plot([0, 1], [0, 1], 'k--', linewidth = 1.0, label='No Skill')
plt.plot(fpr2, tpr2, linewidth = 1.0, label='AUC = ' + str(format((round(auc_tile,3)),'.3f')), zorder=3)
plt.fill_between(mean_fpr, roc_lower, roc_upper, color='moccasin',
                 label='95% CI ' +str(format((round((lower),3)),'.3f')) + '-' + str(format((round((upper),3)),'.3f')), zorder=1)
plt.xlabel('False positive rate', fontsize=12, fontweight='bold')
plt.ylabel('True positive rate', fontsize=12, fontweight='bold')
plt.tick_params(axis='both', which='major', labelsize=10)
plt.tick_params(axis='both', which='minor', labelsize=10)
plt.xlim(-0.02,1.02)
plt.ylim(-0.02,1.02)
plt.gca().set_aspect('equal', adjustable='box')
leg = plt.legend(loc='lower right', fontsize=8)
leg.get_frame().set_linewidth(0.0)
plt.gca().spines['left'].set_zorder(2)
plt.gca().spines['top'].set_zorder(2)
plt.savefig(FiguresDir+'ROC_InternalTest_TileLV.png', dpi=1200, bbox_inches='tight')
plt.show()
print('Die AUC auf Tile-Level beträgt '+str(round(auc_tile,2))+'.')

In [None]:
## Plot ROC on patient level
fpr, tpr, thresholds = roc_curve(PredTablePatientLevel['Category'], PredTablePatientLevel['Predictions_mean'], pos_label=PosCategory)
auc_patient = auc(fpr, tpr)
plt.figure(1)
plt.plot([0, 1], [0, 1], 'k--', linewidth = 1.0, label='No Skill')
plt.plot(fpr, tpr, linewidth = 1.0, label='AUC = ' + str(format((round(auc_patient,3)),'.3f')), zorder=3)
plt.fill_between(mean_fpr, roc_lower2, roc_upper2, color='moccasin',
                 label='95% CI ' + str(format((round((lower2),3)),'.3f')) + '-' + str(format((round((upper2),3)),'.3f')), zorder=1)
plt.xlabel('False positive rate', fontsize=12, fontweight='bold')
plt.ylabel('True positive rate', fontsize=12, fontweight='bold')
plt.tick_params(axis='both', which='major', labelsize=10)
plt.tick_params(axis='both', which='minor', labelsize=10)
plt.xlim(-0.02,1.02)
plt.ylim(-0.02,1.02)
plt.gca().set_aspect('equal', adjustable='box')
leg = plt.legend(loc='lower right', fontsize=8)
leg.get_frame().set_linewidth(0.0)
plt.gca().spines['left'].set_zorder(2)
plt.gca().spines['top'].set_zorder(2)
plt.savefig(FiguresDir+'ROC_InternalTest_PatientLV.png', dpi=1200, bbox_inches='tight')
plt.show()
print('Die AUC auf Patient-Level beträgt '+str(round(auc_patient,2))+'.')

In [None]:
## Plot PRC on tile level
precision, recall, thresholds = precision_recall_curve(PredTableTileLevel['Category'], PredTableTileLevel['Predictions'], pos_label=PosCategory)
auc_patient = auc(recall, precision)
plt.figure(2)
no_skill = len(PredTableTileLevel[PredTableTileLevel['Category']=='small']) / len(PredTableTileLevel)
plt.plot([0,1], [no_skill,no_skill], 'k--', linestyle='--', linewidth=1.0, label='No Skill')
plt.plot(recall, precision, linewidth = 1.0, label='AUC = ' + str(format((round(auc_patient,3)),'.3f')),zorder=3)
plt.fill_between(mean_fpr, roc_lower_prc, roc_upper_prc, color='moccasin',
                 label='95% CI ' + str(format((round((lower_prc),3)),'.3f')) + '-' + str(format((round((upper_prc),3)),'.3f')), zorder=1)
plt.xlabel('Recall', fontsize=12, fontweight='bold')
plt.ylabel('Precision', fontsize=12, fontweight='bold')
plt.tick_params(axis='both', which='major', labelsize=10)
plt.tick_params(axis='both', which='minor', labelsize=10)
plt.xlim(-0.02,1.02)
plt.ylim(-0.02,1.02)
plt.gca().set_aspect('equal', adjustable='box')
leg = plt.legend(loc='lower right', fontsize=8)
leg.get_frame().set_linewidth(0.0)
plt.gca().spines['left'].set_zorder(2)
plt.gca().spines['top'].set_zorder(2)
plt.gca().spines['right'].set_zorder(2)
plt.savefig(FiguresDir+'PRC_InternalTest_TileLV.png', dpi=1200, bbox_inches='tight')
plt.show()
print('Die PRC-AUC auf Tile-Level beträgt '+str(round(auc_patient,2))+'.')

In [None]:
## Plot PRC on patient level
precision, recall, thresholds = precision_recall_curve(PredTablePatientLevel['Category'], PredTablePatientLevel['Predictions_mean'], pos_label=PosCategory)
auc_patient = auc(recall, precision)
plt.figure(1)
no_skill = len(PredTablePatientLevel[PredTablePatientLevel['Category']=='small']) / len(PredTablePatientLevel)
plt.plot([0,1], [no_skill,no_skill], 'k--', linestyle='--', linewidth=1.0, label='No Skill')
plt.plot(recall, precision, linewidth = 1.0, label='AUC = ' + str(format((round(auc_patient,3)),'.3f')),zorder=3)
plt.fill_between(mean_fpr, roc_lower2_prc, roc_upper2_prc, color='moccasin',
                 label='95% CI ' + str(format((round((lower2_prc),3)),'.3f')) + '-' + str(format((round((upper2_prc),3)),'.3f')), zorder=1)
plt.xlabel('Recall', fontsize=12, fontweight='bold')
plt.ylabel('Precision', fontsize=12, fontweight='bold')
plt.tick_params(axis='both', which='major', labelsize=10)
plt.tick_params(axis='both', which='minor', labelsize=10)
plt.xlim(-0.02,1.02)
plt.ylim(-0.02,1.02)
plt.gca().set_aspect('equal', adjustable='box')
leg = plt.legend(loc='lower right', fontsize=8)
leg.get_frame().set_linewidth(0.0)
plt.gca().spines['left'].set_zorder(2)
plt.gca().spines['top'].set_zorder(2)
plt.gca().spines['right'].set_zorder(2)
plt.savefig(FiguresDir+'PRC_InternalTest_PatientLV.png', dpi=1200, bbox_inches='tight')
plt.show()
print('Die PRC-AUC auf Patient-Level beträgt '+str(round(auc_patient,2))+'.')

In [None]:
# Generate confusion matrix on patient level (absolute)
skplt.metrics.plot_confusion_matrix(PredTablePatientLevel['Category'], PredTablePatientLevel['PredictedClass'], title = ' ', figsize = (4,3),normalize=False)
plt.xlabel('Predicted', fontweight='bold')
plt.ylabel('Ground Truth', fontweight='bold')
locs, labels = plt.xticks() 
plt.xticks(locs,['large duct', 'small duct'])
locs, labels = plt.yticks() 
plt.yticks(locs,['large duct', 'small duct'])
plt.savefig(FiguresDir+'CoMa_InternalTest_PatientLV_abs.png', dpi=1200, bbox_inches='tight')
plt.show()

In [None]:
# Generate confusion matrix on patient level (relative)
skplt.metrics.plot_confusion_matrix(PredTablePatientLevel['Category'], PredTablePatientLevel['PredictedClass'], title = ' ', figsize = (4,3),normalize=True)
plt.xlabel('Predicted', fontweight='bold')
plt.ylabel('Ground Truth', fontweight='bold')
locs, labels = plt.xticks() 
plt.xticks(locs,['large duct', 'small duct'])
locs, labels = plt.yticks() 
plt.yticks(locs,['large duct', 'small duct'])
plt.savefig(FiguresDir+'CoMa_InternalTest_PatientLV_rel.png', dpi=1200, bbox_inches='tight')
plt.show()

In [None]:
# Generate confusion matrix on tile level (absolute)
skplt.metrics.plot_confusion_matrix(PredTableTileLevel['Category'], PredTableTileLevel['PredictedClass'], title = ' ', figsize = (4,3), normalize=False)
plt.xlabel('Predicted', fontweight='bold')
plt.ylabel('Ground Truth', fontweight='bold')
locs, labels = plt.xticks() 
plt.xticks(locs,['large duct', 'small duct'])
locs, labels = plt.yticks() 
plt.yticks(locs,['large duct', 'small duct'])
plt.savefig(FiguresDir+'CoMa_InternalTest_TileLV_abs.png', dpi=1200, bbox_inches='tight')
plt.show()

In [None]:
# Generate confusion matrix on tile level (relative)
skplt.metrics.plot_confusion_matrix(PredTableTileLevel['Category'], PredTableTileLevel['PredictedClass'], title = ' ', figsize = (4,3), normalize=True)
plt.xlabel('Predicted', fontweight='bold')
plt.ylabel('Ground Truth', fontweight='bold')
locs, labels = plt.xticks() 
plt.xticks(locs,['large duct', 'small duct'])
locs, labels = plt.yticks() 
plt.yticks(locs,['large duct', 'small duct'])
plt.savefig(FiguresDir+'CoMa_InternalTest_TileLV_rel.png', dpi=1200, bbox_inches='tight')
plt.show()

In [None]:
## Save prediction tables
PredTablePatientLevel.to_csv('/home/thomas/Projects/LargeDuctVsSmallDuct/Tables/PredTablePatientLevel.csv', index=False)
PredTableTileLevel.to_csv('/home/thomas/Projects/LargeDuctVsSmallDuct/Tables/PredTableTileLevel.csv', index=False)

In [None]:
##Compute metrics on tile level. Small duct is disease.
Metrics_TileLevel_Test = pd.DataFrame(columns=['Name', 'Accuracy','Sensitivity','Specificity','PPV','NPV', 'F1'])
names = ['Original','Lower bound (95%CI)', 'Upper bound (95%CI)']
Metrics_TileLevel_Test['Name'] = names
accuracy=[]
sensitivity=[]
specificity=[]
ppv =[]
npv=[]
f1s=[]

small_TileNo = PredTableTileLevel.loc[PredTableTileLevel['Category'] == 'small'].shape[0]
small_correct = PredTableTileLevel.loc[(PredTableTileLevel['Category'] == 'small') & (PredTableTileLevel['PredictedClass'] == 'small')].shape[0]
small_allPositive = PredTableTileLevel.loc[PredTableTileLevel['PredictedClass'] == 'small'].shape[0]
small_allNegative = PredTableTileLevel.loc[PredTableTileLevel['PredictedClass'] == 'large'].shape[0]
small_correctneg = PredTableTileLevel.loc[(PredTableTileLevel['Category'] == 'large') & (PredTableTileLevel['PredictedClass'] == 'large')].shape[0]

large_TileNo = PredTableTileLevel.loc[PredTableTileLevel['Category'] == 'large'].shape[0]
large_correct = PredTableTileLevel.loc[(PredTableTileLevel['Category'] == 'large') & (PredTableTileLevel['PredictedClass'] == 'large')].shape[0]

accuracy.append(np.round(((small_correct+large_correct)/(small_TileNo+large_TileNo))*100,3))
sensitivity.append(np.round((small_correct/small_TileNo)*100,3))
specificity.append(np.round((large_correct/large_TileNo)*100,3))
ppv.append(np.round((small_correct/small_allPositive)*100,3))
npv.append(np.round((small_correctneg/small_allNegative)*100,3))
f1s.append(np.round((f1_score(PredTableTileLevel['Category'], PredTableTileLevel['PredictedClass'], average = 'macro')),3))

alpha = 0.95
p = ((1.0-alpha)/2.0) * 100
p1 = (alpha+((1.0-alpha)/2.0)) * 100

accuracy.append(np.round(max(0.0, np.percentile(accuracy_list1, p)),3))
sensitivity.append(np.round(max(0.0, np.percentile(sensitivity_list1, p)),3))
specificity.append(np.round(max(0.0, np.percentile(specificity_list1, p)),3))
ppv.append(np.round(max(0.0, np.percentile(ppv_list1, p)),3))
npv.append(np.round(max(0.0, np.percentile(npv_list1, p)),3))
f1s.append(np.round(max(0.0, np.percentile(f1score_list1, p)),3))        

accuracy.append(np.round(min(100.0, np.percentile(accuracy_list1, p1)),3))
sensitivity.append(np.round(min(100.0, np.percentile(sensitivity_list1, p1)),3))
specificity.append(np.round(min(100.0, np.percentile(specificity_list1, p1)),3))
ppv.append(np.round(min(100.0,np.percentile(ppv_list1, p1)),3))
npv.append(np.round(min(100.0, np.percentile(npv_list1, p1)),3))
f1s.append(np.round(min(100.0, np.percentile(f1score_list1, p1)),3))           

Metrics_TileLevel_Test['Accuracy']=accuracy
Metrics_TileLevel_Test['Sensitivity']=sensitivity
Metrics_TileLevel_Test['Specificity']=specificity
Metrics_TileLevel_Test['PPV']=ppv
Metrics_TileLevel_Test['NPV']=npv
Metrics_TileLevel_Test['F1']=f1s

Metrics_TileLevel_Test

In [None]:
## Save tile metrics
Metrics_TileLevel_Test.to_csv('/home/thomas/Projects/LargeDuctVsSmallDuct/Tables/Metrics_TileLevel_InternalTest.csv', index=False)

In [None]:
## Save lists as Pandas dataframe
Bootstrap_internal_TileLevel = pd.DataFrame(columns=['Accuracy', 'Sensitivity','Specificity','PPV','NPV', 'AUC', 'F1'])
Bootstrap_internal_TileLevel['Accuracy']=accuracy_list1
Bootstrap_internal_TileLevel['Sensitivity']=sensitivity_list1
Bootstrap_internal_TileLevel['Specificity']=specificity_list1
Bootstrap_internal_TileLevel['PPV']=ppv_list1
Bootstrap_internal_TileLevel['NPV']=npv_list1
Bootstrap_internal_TileLevel['AUC']=stats
Bootstrap_internal_TileLevel['F1']=f1score_list1

Bootstrap_internal_TileLevel.to_csv('/home/thomas/Projects/LargeDuctVsSmallDuct/Tables/Bootstrap_internal_TileLevel.csv', index=False)

In [None]:
##Compute metrics on patient level. Small duct is disease.
Metrics_PatientLevel_Test = pd.DataFrame(columns=['Name', 'Accuracy','Sensitivity','Specificity','PPV','NPV','F1'])
names = ['Original','Lower bound (95%CI)', 'Upper bound (95%CI)']
Metrics_PatientLevel_Test['Name'] = names
accuracy=[]
sensitivity=[]
specificity=[]
ppv =[]
npv=[]
f1s=[]

small_PatientNo = PredTablePatientLevel.loc[PredTablePatientLevel['Category'] == 'small'].shape[0]
small_correct = PredTablePatientLevel.loc[(PredTablePatientLevel['Category'] == 'small') & (PredTablePatientLevel['PredictedClass'] == 'small')].shape[0]
small_allPositive = PredTablePatientLevel.loc[PredTablePatientLevel['PredictedClass'] == 'small'].shape[0]
small_allNegative = PredTablePatientLevel.loc[PredTablePatientLevel['PredictedClass'] == 'large'].shape[0]
small_correctneg = PredTablePatientLevel.loc[(PredTablePatientLevel['Category'] == 'large') & (PredTablePatientLevel['PredictedClass'] == 'large')].shape[0]

large_PatientNo = PredTablePatientLevel.loc[PredTablePatientLevel['Category'] == 'large'].shape[0]
large_correct = PredTablePatientLevel.loc[(PredTablePatientLevel['Category'] == 'large') & (PredTablePatientLevel['PredictedClass'] == 'large')].shape[0]

accuracy.append(np.round(((small_correct+large_correct)/(small_PatientNo+large_PatientNo))*100,3))
sensitivity.append(np.round((small_correct/small_PatientNo)*100,3))
specificity.append(np.round((large_correct/large_PatientNo)*100,3))
ppv.append(np.round((small_correct/small_allPositive)*100,3))
npv.append(np.round((small_correctneg/small_allNegative)*100,3))
f1s.append(np.round((f1_score(PredTablePatientLevel['Category'], PredTablePatientLevel['PredictedClass'], average = 'macro')),3))

alpha = 0.95
p = ((1.0-alpha)/2.0) * 100
p1 = (alpha+((1.0-alpha)/2.0)) * 100

accuracy.append(np.round(max(0.0, np.percentile(accuracy_list2, p)),3))
sensitivity.append(np.round(max(0.0, np.percentile(sensitivity_list2, p)),3))
specificity.append(np.round(max(0.0, np.percentile(specificity_list2, p)),3))
ppv.append(np.round(max(0.0, np.percentile(ppv_list2, p)),3))
npv.append(np.round(max(0.0, np.percentile(npv_list2, p)),3))
f1s.append(np.round(max(0.0, np.percentile(f1score_list2, p)),3))                 

accuracy.append(np.round(min(100.0, np.percentile(accuracy_list2, p1)),3))
sensitivity.append(np.round(min(100.0, np.percentile(sensitivity_list2, p1)),3))
specificity.append(np.round(min(100.0, np.percentile(specificity_list2, p1)),3))
ppv.append(np.round(min(100.0,np.percentile(ppv_list2, p1)),3))
npv.append(np.round(min(100.0, np.percentile(npv_list2, p1)),3))
f1s.append(np.round(min(100.0, np.percentile(f1score_list2, p1)),3))

Metrics_PatientLevel_Test['Accuracy']=accuracy
Metrics_PatientLevel_Test['Sensitivity']=sensitivity
Metrics_PatientLevel_Test['Specificity']=specificity
Metrics_PatientLevel_Test['PPV']=ppv
Metrics_PatientLevel_Test['NPV']=npv
Metrics_PatientLevel_Test['F1']=f1s


Metrics_PatientLevel_Test

In [None]:
## Save patient metrics
Metrics_PatientLevel_Test.to_csv('/home/thomas/Projects/LargeDuctVsSmallDuct/Tables/Metrics_PatientLevel_InternalTest.csv', index=False)

In [None]:
## Save lists as Pandas Dataframe
Bootstrap_internal_PatientLevel = pd.DataFrame(columns=['Accuracy', 'Sensitivity','Specificity','PPV','NPV', 'AUC', 'F1'])
Bootstrap_internal_PatientLevel['Accuracy']=accuracy_list2
Bootstrap_internal_PatientLevel['Sensitivity']=sensitivity_list2
Bootstrap_internal_PatientLevel['Specificity']=specificity_list2
Bootstrap_internal_PatientLevel['PPV']=ppv_list2
Bootstrap_internal_PatientLevel['NPV']=npv_list2
Bootstrap_internal_PatientLevel['AUC']=stats2
Bootstrap_internal_TileLevel['F1']=f1score_list2

Bootstrap_internal_PatientLevel.to_csv('/home/thomas/Projects/LargeDuctVsSmallDuct/Tables/Bootstrap_internal_PatientLevel.csv', index=False)