In [None]:
'''
Purpose:
To perform stratified K-Fold-Cross-Validation

Prerequisite:
Download most current weights of imagenet database for transfer learning
!wget https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/noisystudent/noisy_student_efficientnet-b3.tar.gz -P /home/thomas/Projects/ICCAvsMETS/weights
!tar -xf /home/thomas/Projects/ICCAvsMETS/weights/noisy_student_efficientnet-b3.tar.gz -C /home/thomas/Projects/ICCAvsMETS/weights
!python /home/thomas/Projects/VENV/efficientnet_weight_update_util.py --model b3 --notop --ckpt /home/thomas/Projects/ICCAvsMETS/weights/noisy-student-efficientnet-b3/model.ckpt --o /home/thomas/Projects/ICCAvsMETS/weights/noisy-student-efficientnet-b3/efficientnetb3_notop.h5
'''

In [None]:
## Import all required libraries
import numpy as np
import os 
import pandas as pd
import tensorflow as tf
import ast
import random
import matplotlib.pyplot as plt
import scipy
import scipy.stats as st
import scikitplot as skplt
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB3
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras import layers
from sklearn.model_selection import StratifiedKFold
from numpy import asarray
from numpy import savetxt, loadtxt
from sklearn.metrics import auc
from sklearn.metrics import plot_roc_curve
from sklearn.metrics import roc_curve, auc

In [None]:
## Set current working directory (source for image tiles)
os.chdir("/media/data/Projects/ICCAVSMETS")

In [None]:
## Set project directory (model-relevant and processed data)
SSDDir = '/home/thomas/Projects/ICCAvsMETS'
FiguresDir = SSDDir+'/Figures/CrossValidation/'
model_dir = SSDDir+'/saved_models/CrossValidation/'

In [None]:
## Define folder of source image tiles: folders should have the following hierachy: */Category/Material/**.jpg
NormalizedTiles = 'Tiles/Normalized'
Sets = 'Tiles/Sets'
TrainingSetDir = 'Tiles/Sets/Train'
TestSetDir = 'Tiles/Sets/Test'

In [None]:
## Define lists
PatientNo = []
Category = []
Tilename = []

In [None]:
## Get FrozenModel name for each of the k iterations
def get_frozen_model_name(k):
    return 'frozen_model_'+str(k)+'.h5'

In [None]:
## Get tuned Model name for each of the k iterations
def get_tuned_model_name(k):
    return 'tuned_model_'+str(k)+'.h5'

In [None]:
## Ungroup the tables for each fold
def ungroup_data_table(DataTable):

    Tilenames_new = []
    Tilenames_flatten = []
    Category_new = []
    PatientNo_new = []
    n = 0

    for i in DataTable['Tilenames']:
        Tilenames_new.append(i)
        for a in range(i.count(', ')+1):
            PatientNo_new.append(DataTable.loc[n, 'PatientNo'])
            Category_new.append(DataTable.loc[n, 'Category'])
        n = n + 1

    Tilenames_flatten = [inner for item in Tilenames_new for inner in ast.literal_eval(item)] 
    Ungrouped_DataTable = pd.DataFrame({'PatientNo': PatientNo_new, 'Category': Category_new, 'Tilenames': Tilenames_flatten})
    return Ungrouped_DataTable

In [None]:
## Read TrainingSet Table
TrainTable = pd.read_csv(SSDDir+'/Tables/TrainTable.csv')

In [None]:
## Fixed Parameters
num_classes = TrainTable['Category'].nunique()
if num_classes == 2:
    num_classes = num_classes-1
num_patients = len(TrainTable.index)

In [None]:
## Variable Parameters
num_epochs = 100
img_height = 300
img_width = 300
IMAGE_SIZE = [img_height, img_width]

In [None]:
## Define Target Variable and create instance of stratifiedkfold
y = TrainTable['Category']
skf = StratifiedKFold(n_splits = 4, random_state = 7, shuffle = True)

In [None]:
## Create instances of ImageDataGenerators for train and validation set
idg_train = ImageDataGenerator(preprocessing_function=preprocess_input,
                                   vertical_flip=True,
                                   horizontal_flip=True)

idg_valid = ImageDataGenerator(preprocessing_function=preprocess_input)

In [None]:
## Destination of precalibrated weights
weights_B3 = '/home/thomas/Projects/ICCAvsMETS/weights/noisy-student-efficientnet-b3/efficientnetb3_notop.h5'

In [None]:
## Performing cross-validation via K-Fold-Splitting, Transfer learning and Fine Tuning
VALIDATION_ACCURACY = []
VALIDATION_LOSS = []
best_epochs_transfer = []
best_epochs_tuning = []
top_dropout_rate = 0.2
fold_var = 1

probabilities_tiles = []
probabilities_patient = []

## Loop over each fold of K-Fold-Splitting
for train_index, val_index in skf.split(np.zeros(num_patients),y):
        training_data_grouped = TrainTable.iloc[train_index].reset_index(drop=True)
        validation_data_grouped = TrainTable.iloc[val_index].reset_index(drop=True)
        
        training_data = ungroup_data_table(training_data_grouped)
        validation_data = ungroup_data_table(validation_data_grouped)
        
        train_data_generator = idg_train.flow_from_dataframe(training_data, directory = TrainingSetDir,
                                                       x_col = "Tilenames", y_col = "Category", 
                                                       batch_size = 32,
                                                       target_size = (img_height, img_width),
                                                       class_mode = "binary", shuffle = True)
        valid_data_generator  = idg_valid.flow_from_dataframe(validation_data, directory = TrainingSetDir,
                                                        x_col = "Tilenames", y_col = "Category",
                                                        target_size = (img_height, img_width),
                                                        class_mode = "binary", shuffle = False)
        
        # CREATE NEW MODEL
        inputs = tf.keras.Input(shape=(img_height, img_width, 3))
        base = EfficientNetB3 (include_top=False, weights=weights_B3, input_tensor=inputs, input_shape=(img_height, img_width, 3))

        # Freeze the pretrained weights
        base.trainable = False

        # Rebuild top (IMPORTANT: run in inference mode by setting training=false for finetuning)
        top_activation_layer = base.get_layer('top_activation')
        x = layers.GlobalAveragePooling2D(name="avg_pool")(top_activation_layer.output)
        x = layers.BatchNormalization()(x)
        x = layers.Dropout(top_dropout_rate, name="top_dropout")(x)
        x = layers.Dense(num_classes, activation="sigmoid", name="pred")(x)
        
        MyModel = tf.keras.Model(inputs=inputs, outputs=x)

        # COMPILE NEW MODEL
        opt = tf.keras.optimizers.Adam(learning_rate=1e-2)
        MyModel.compile(loss='binary_crossentropy',
                        optimizer=opt,
                        metrics=['accuracy'])
     
        # CREATE CALLBACKS
        es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)
        checkpoint = tf.keras.callbacks.ModelCheckpoint(model_dir+get_frozen_model_name(fold_var), 
                                                        monitor='val_loss', verbose=1, 
                                                        save_best_only=True, mode='min')
        callbacks_list = [es, checkpoint]
                
        # Set step size
        STEP_SIZE_TRAIN=train_data_generator.n//train_data_generator.batch_size
        STEP_SIZE_VALID=valid_data_generator.n//valid_data_generator.batch_size
        
        # FIT THE MODEL
        history = MyModel.fit(train_data_generator,
                              steps_per_epoch=STEP_SIZE_TRAIN,
                              epochs=num_epochs,
                              validation_data=valid_data_generator,
                              validation_steps=STEP_SIZE_VALID,
                              callbacks=callbacks_list)
        hist = MyModel.history.history['val_loss']
        n_epochs_best = np.argmin(hist) + 1
        best_epochs_transfer.append(n_epochs_best)
                                    
        ## LOAD BEST MODEL WEIGHTS FOR FINETUNING
        MyModel.load_weights(model_dir+get_frozen_model_name(fold_var))
        
        # Unfreeze the base model
        base.trainable = True
        for layer in MyModel.layers:
            if isinstance(layer, layers.BatchNormalization):
                layer.trainable = False
        
        # Recompile
        opt2 = tf.keras.optimizers.Adam(learning_rate=1e-4)
        MyModel.compile(loss='binary_crossentropy',
                        optimizer=opt2,
                        metrics=['accuracy'])
        
        # CREATE NEW CALLBACKS
        es2 = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)
        checkpoint2 = tf.keras.callbacks.ModelCheckpoint(model_dir+get_tuned_model_name(fold_var), 
                                                         monitor='val_loss', verbose=1, 
                                                         save_best_only=True, mode='min')
        callbacks_list2 = [es2, checkpoint2]
        
        # Fine tuning
        history = MyModel.fit(train_data_generator,
                              steps_per_epoch=STEP_SIZE_TRAIN,
                              epochs=num_epochs,
                              validation_data=valid_data_generator,
                              validation_steps=STEP_SIZE_VALID,
                              callbacks=callbacks_list2)
        hist = MyModel.history.history['val_loss']
        n_epochs_best = np.argmin(hist) + 1
        best_epochs_tuning.append(n_epochs_best)
        
        # LOAD BEST MODEL TO EVALUATE FINAL PERFORMANCE
        MyModel.load_weights(model_dir+get_tuned_model_name(fold_var))
        
        results = MyModel.evaluate(valid_data_generator)
        results = dict(zip(MyModel.metrics_names,results))
        
        VALIDATION_ACCURACY.append(results['accuracy'])
        VALIDATION_LOSS.append(results['loss'])
        
        ## Save Dataframe for Tile Prediction
        predictions = MyModel.predict(valid_data_generator)
        PredTableTileLevel = validation_data.copy()
        PredTableTileLevel['Predictions'] = predictions    
        probabilities_tiles.append(PredTableTileLevel)
        PredTableTileLevel.to_csv('/home/thomas/Projects/ICCAvsMETS/Tables/PredTableTileLevel_cv_'+str(fold_var)+'.csv', index=False)
        
        ## Save Dataframe for Patient Prediction
        PredTablePatientLevel = PredTableTileLevel.groupby(['PatientNo', 'Category'])['Predictions'].agg(list).reset_index()
        PredTablePatientLevel['Predictions_mean'] = PredTablePatientLevel['Predictions'].apply(np.mean)
        probabilities_patient.append(PredTablePatientLevel)
        PredTablePatientLevel.to_csv('/home/thomas/Projects/ICCAvsMETS/Tables/PredTablePatientLevel_cv_'+str(fold_var)+'.csv', index=False)
       
        PredTablePatientLevel = []
        PredTableTileLevel = []
    
        tf.keras.backend.clear_session()
        
        fold_var += 1

In [None]:
## Load all dataframes
probabilities_tiles = []
probabilities_patient = []

PredTablePatientLevel1 = pd.read_csv('/home/thomas/Projects/ICCAvsMETS/Tables/PredTablePatientLevel_cv_1.csv')
PredTablePatientLevel2 = pd.read_csv('/home/thomas/Projects/ICCAvsMETS/Tables/PredTablePatientLevel_cv_2.csv')
PredTablePatientLevel3 = pd.read_csv('/home/thomas/Projects/ICCAvsMETS/Tables/PredTablePatientLevel_cv_3.csv')
PredTablePatientLevel4 = pd.read_csv('/home/thomas/Projects/ICCAvsMETS/Tables/PredTablePatientLevel_cv_4.csv')
probabilities_patient.append(PredTablePatientLevel1)
probabilities_patient.append(PredTablePatientLevel2)
probabilities_patient.append(PredTablePatientLevel3)
probabilities_patient.append(PredTablePatientLevel4)

PredTableTileLevel1 = pd.read_csv('/home/thomas/Projects/ICCAvsMETS/Tables/PredTableTileLevel_cv_1.csv')
PredTableTileLevel2 = pd.read_csv('/home/thomas/Projects/ICCAvsMETS/Tables/PredTableTileLevel_cv_2.csv')
PredTableTileLevel3 = pd.read_csv('/home/thomas/Projects/ICCAvsMETS/Tables/PredTableTileLevel_cv_3.csv')
PredTableTileLevel4 = pd.read_csv('/home/thomas/Projects/ICCAvsMETS/Tables/PredTableTileLevel_cv_4.csv')
probabilities_tiles.append(PredTableTileLevel1)
probabilities_tiles.append(PredTableTileLevel2)
probabilities_tiles.append(PredTableTileLevel3)
probabilities_tiles.append(PredTableTileLevel4)

In [None]:
## Plot ROC-Curves Tiles Level (Validation-Set)
tprs = []
aucs = []
i = 1
mean_fpr = np.linspace(0, 1, 100)
colors = ['red', 'green', 'yellow', 'purple']

for dataframe in probabilities_tiles:
    fpr, tpr, thresholds = roc_curve(dataframe['Category'], dataframe['Predictions'], pos_label='KolonMet')
    interp_tpr = np.interp(mean_fpr, fpr, tpr)
    interp_tpr[0] = 0.0
    tprs.append(interp_tpr)
    roc_auc = auc(fpr, tpr)
    aucs.append(roc_auc)
    plt.plot(fpr, tpr, lw=1.0, label='ROC fold %d (AUC = %0.3f)' % (i, roc_auc), color = colors[i-1], zorder=3)
    i = i + 1

mean_tpr = np.mean(tprs, axis=0)
mean_tpr[-1] = 1.0
mean_auc = auc(mean_fpr, mean_tpr)
std_auc = np.std(aucs)
plt.plot(mean_fpr, mean_tpr, color='b',
        label=r'Mean ROC (AUC = %0.3f)' % (mean_auc),
        lw=1, zorder=2)

std_tpr = np.std(tprs, axis=0)
tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
alpha = 0.95
p = ((1.0-alpha)/2.0) * 100
lower = max(0.0, np.percentile(aucs, p))
p = (alpha+((1.0-alpha)/2.0)) * 100
upper = min(1.0, np.percentile(aucs, p))
plt.fill_between(mean_fpr, tprs_lower, tprs_upper, color='moccasin',
                 label='$\pm$ 1 std. dev.', zorder=1)
plt.plot([0,1],[0,1],'k--',linewidth = 1.0, color = 'black')
plt.xlabel('False positive rate', fontsize=12, fontweight='bold')
plt.ylabel('True positive rate', fontsize=12, fontweight='bold')
plt.tick_params(axis='both', which='major', labelsize=10)
plt.tick_params(axis='both', which='minor', labelsize=10)
plt.xlim(0,1)
plt.ylim(0,1)
plt.gca().set_aspect('equal', adjustable='box')
leg = plt.legend(loc='lower right', fontsize=8)
leg.get_frame().set_linewidth(0.0)
plt.gca().spines['left'].set_zorder(2)
plt.gca().spines['top'].set_zorder(2)
plt.savefig(FiguresDir+'ROC_CV_TileLV.png', dpi=1200, bbox_inches='tight')
plt.show()

In [None]:
## Determine optimal threshold via Youden statistics
optimal_idx = np.argmax(tpr - fpr)
optimal_threshold_TileLevel = thresholds[optimal_idx]

In [None]:
## Plot ROC-Curves Patient Level
tprs = []
aucs = []
i = 1
mean_fpr = np.linspace(0, 1, 100)
colors = ['red', 'green', 'yellow', 'purple']

for dataframe in probabilities_patient:
    fpr, tpr, thresholds = roc_curve(dataframe['Category'], dataframe['Predictions_mean'], pos_label='KolonMet')
    interp_tpr = np.interp(mean_fpr, fpr, tpr)
    interp_tpr[0] = 0.0
    tprs.append(interp_tpr)
    roc_auc = auc(fpr, tpr)
    aucs.append(roc_auc)
    plt.plot(fpr, tpr, lw=1, label='ROC fold %d (AUC = %0.3f)' % (i, roc_auc), color = colors[i-1], zorder=3)
    i = i + 1

plt.plot([0,1],[0,1],linestyle = '--',lw = 1,color = 'black')
mean_tpr = np.mean(tprs, axis=0)
mean_tpr[-1] = 1.0
mean_auc = auc(mean_fpr, mean_tpr)
std_auc = np.std(aucs)
plt.plot(mean_fpr, mean_tpr, color='b',
        label=r'Mean ROC (AUC = %0.3f)' % (mean_auc),
        lw=1, zorder=2)

std_tpr = np.std(tprs, axis=0)
tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
alpha = 0.95
p = ((1.0-alpha)/2.0) * 100
lower = max(0.0, np.percentile(aucs, p))
p = (alpha+((1.0-alpha)/2.0)) * 100
upper = min(1.0, np.percentile(aucs, p))
plt.fill_between(mean_fpr, tprs_lower, tprs_upper, color='moccasin',
                 label='$\pm$ 1 std. dev.', zorder=1)

plt.xlabel('False positive rate', fontsize=12, fontweight='bold')
plt.ylabel('True positive rate', fontsize=12, fontweight='bold')
plt.tick_params(axis='both', which='major', labelsize=10)
plt.tick_params(axis='both', which='minor', labelsize=10)
plt.xlim(0,1)
plt.ylim(0,1)
plt.gca().set_aspect('equal', adjustable='box')
leg = plt.legend(loc='lower right', fontsize=8)
leg.get_frame().set_linewidth(0.0)
plt.gca().spines['left'].set_zorder(2)
plt.gca().spines['top'].set_zorder(2)
plt.savefig(FiguresDir+'ROC_CV_PatientLV.png', dpi=1200, bbox_inches='tight')
plt.show()

In [None]:
## Determine optimal threshold on patient level
optimal_idx = np.argmax(tpr - fpr)
optimal_threshold_PatientLevel = thresholds[optimal_idx]

In [None]:
## Save Thresholds
Thresholds_CV = np.asarray([optimal_threshold_TileLevel, optimal_threshold_PatientLevel])
savetxt('/home/thomas/Projects/ICCAvsMETS/Tables/Thresholds_CV.csv', Thresholds_CV, delimiter=',')

In [None]:
## Define positive and negative category
PosCategory = 'KolonMet'
NegCategory = 'ICCA'

In [None]:
## Find predicted class and append in list on tile level
predicted_class = []
for dataframe in probabilities_tiles:
    for i in dataframe['Predictions']:
        if i > 0.5:
            predicted_class.append(PosCategory)
        else:
            predicted_class.append(NegCategory)
    dataframe['PredictedClass'] = predicted_class
    predicted_class = []

In [None]:
## Find predicted class and append in list on patient level
predicted_class = []
for dataframe in probabilities_patient:
    for i in dataframe['Predictions_mean']:
        if i > 0.5:
            predicted_class.append(PosCategory)
        else:
            predicted_class.append(NegCategory)
    dataframe['PredictedClass'] = predicted_class
    predicted_class = []

In [None]:
# Generate confusion matrix on patient level (absolute), using best run
skplt.metrics.plot_confusion_matrix(PredTablePatientLevel4['Category'], PredTablePatientLevel4['PredictedClass'], title = ' ', figsize = (4,3),normalize=False)
plt.xlabel('Predicted', fontweight='bold')
plt.ylabel('Ground Truth', fontweight='bold')
locs, labels = plt.xticks() 
plt.xticks(locs,['iCCA', 'CRM'])
locs, labels = plt.yticks() 
plt.yticks(locs,['iCCA', 'CRM'])
plt.savefig(FiguresDir+'CoMa_Test_CV_PatientLV_abs.png', dpi=1200, bbox_inches='tight')
plt.show()

In [None]:
# Generate confusion matrix on patient level (relative), using best run
skplt.metrics.plot_confusion_matrix(PredTablePatientLevel4['Category'], PredTablePatientLevel4['PredictedClass'], title = ' ', figsize = (4,3),normalize=True)
plt.xlabel('Predicted', fontweight='bold')
plt.ylabel('Ground Truth', fontweight='bold')
locs, labels = plt.xticks() 
plt.xticks(locs,['iCCA', 'CRM'])
locs, labels = plt.yticks() 
plt.yticks(locs,['iCCA', 'CRM'])
plt.savefig(FiguresDir+'CoMa_Test_CV_PatientLV_rel.png', dpi=1200, bbox_inches='tight')
plt.show()

In [None]:
# Generate confusion matrix on tile level (absolute), using best run
skplt.metrics.plot_confusion_matrix(PredTableTileLevel4['Category'], PredTableTileLevel4['PredictedClass'], title = ' ', figsize = (4,3), normalize=False)
plt.xlabel('Predicted', fontweight='bold')
plt.ylabel('Ground Truth', fontweight='bold')
locs, labels = plt.xticks() 
plt.xticks(locs,['iCCA', 'CRM'])
locs, labels = plt.yticks() 
plt.yticks(locs,['iCCA', 'CRM'])
plt.savefig(FiguresDir+'CoMa_Test_CV_TileLV_abs.png', dpi=1200, bbox_inches='tight')
plt.show()

In [None]:
# Generate confusion matrix on tile level (relative), best run
skplt.metrics.plot_confusion_matrix(PredTableTileLevel4['Category'], PredTableTileLevel4['PredictedClass'], title = ' ', figsize = (4,3), normalize=True)
plt.xlabel('Predicted', fontweight='bold')
plt.ylabel('Ground Truth', fontweight='bold')
locs, labels = plt.xticks() 
plt.xticks(locs,['iCCA', 'CRM'])
locs, labels = plt.yticks() 
plt.yticks(locs,['iCCA', 'CRM'])
plt.savefig(FiguresDir+'CoMa_Test_CV_TileLV_rel.png', dpi=1200, bbox_inches='tight')
plt.show()

In [None]:
##Compute metrics on tile level. Arbritarly, colorectal metastasis is defined as disease.  
Metrics_TileLevel_CV = pd.DataFrame(columns=['Name', 'Accuracy','Sensitivity','Specificity','PPV','NPV'])
names = ['Fold 1', 'Fold 2', 'Fold 3', 'Fold 4', 'Mean', 'SD']
Metrics_TileLevel_CV['Name'] = names
accuracy_list = []
accuracy_list_stats = []
sensitivity_list = []
sensitivity_list_stats = []
specificity_list = []
specificity_list_stats = []
ppv_list = []
ppv_list_stats = []
npv_list = []
npv_list_stats = []

for dataframe in probabilities_tiles:
    KolonMet_TileNo = dataframe.loc[dataframe['Category'] == 'KolonMet'].shape[0]
    KolonMet_correct = dataframe.loc[(dataframe['Category'] == 'KolonMet') & (dataframe['PredictedClass'] == 'KolonMet')].shape[0]
    KolonMet_allPositive = dataframe.loc[dataframe['PredictedClass'] == 'KolonMet'].shape[0]
    KolonMet_allNegative = dataframe.loc[dataframe['PredictedClass'] == 'ICCA'].shape[0]
    KolonMet_correctneg = dataframe.loc[(dataframe['Category'] == 'ICCA') & (dataframe['PredictedClass'] == 'ICCA')].shape[0]

    ICCA_TileNo = dataframe.loc[dataframe['Category'] == 'ICCA'].shape[0]
    ICCA_correct = dataframe.loc[(dataframe['Category'] == 'ICCA') & (dataframe['PredictedClass'] == 'ICCA')].shape[0]

    accuracy_list.append(np.round(((KolonMet_correct+ICCA_correct)/(KolonMet_TileNo+ICCA_TileNo))*100,2))
    sensitivity_list.append(np.round((KolonMet_correct/KolonMet_TileNo)*100,2))
    specificity_list.append(np.round((ICCA_correct/ICCA_TileNo)*100,2))
    ppv_list.append(np.round((KolonMet_correct/KolonMet_allPositive)*100,2))
    npv_list.append(np.round((KolonMet_correctneg/KolonMet_allNegative)*100,2))

accuracy_list_stats.append(np.round((np.mean(accuracy_list)),3))
accuracy_list_stats.append(np.round((np.std(accuracy_list)),3))
sensitivity_list_stats.append(np.round((np.mean(sensitivity_list)),3))
sensitivity_list_stats.append(np.round((np.std(sensitivity_list)),3))
specificity_list_stats.append(np.round((np.mean(specificity_list)),3))
specificity_list_stats.append(np.round((np.std(specificity_list)),3))
ppv_list_stats.append(np.round((np.mean(ppv_list)),3))
ppv_list_stats.append(np.round((np.std(ppv_list)),3))
npv_list_stats.append(np.round((np.mean(npv_list)),3))
npv_list_stats.append(np.round((np.std(npv_list)),3))

accuracy_list.extend(accuracy_list_stats)
sensitivity_list.extend(sensitivity_list_stats)
specificity_list.extend(specificity_list_stats)
ppv_list.extend(ppv_list_stats)
npv_list.extend(npv_list_stats)
                
Metrics_TileLevel_CV['Accuracy']=accuracy_list
Metrics_TileLevel_CV['Sensitivity']=sensitivity_list
Metrics_TileLevel_CV['Specificity']=specificity_list
Metrics_TileLevel_CV['PPV']=ppv_list
Metrics_TileLevel_CV['NPV']=npv_list

In [None]:
## Save tile metrics to csv
Metrics_TileLevel_CV.to_csv('/home/thomas/Projects/ICCAvsMETS/Tables/Metrics_TileLevel_CV.csv', index=False)

In [None]:
##Compute metrics on patient level. Arbritarly, colorectal metastasis is defined as disease. 
Metrics_PatientLevel_CV = pd.DataFrame(columns=['Name', 'Accuracy','Sensitivity','Specificity','PPV','NPV'])
names = ['Fold 1', 'Fold 2', 'Fold 3', 'Fold 4', 'Mean', 'SD']
Metrics_PatientLevel_CV['Name'] = names
accuracy_list = []
accuracy_list_stats = []
sensitivity_list = []
sensitivity_list_stats = []
specificity_list = []
specificity_list_stats = []
ppv_list = []
ppv_list_stats = []
npv_list = []
npv_list_stats = []

for dataframe in probabilities_patient:
    KolonMet_PatientNo = dataframe.loc[dataframe['Category'] == 'KolonMet'].shape[0]
    KolonMet_correct = dataframe.loc[(dataframe['Category'] == 'KolonMet') & (dataframe['PredictedClass'] == 'KolonMet')].shape[0]
    KolonMet_allPositive = dataframe.loc[dataframe['PredictedClass'] == 'KolonMet'].shape[0]
    KolonMet_allNegative = dataframe.loc[dataframe['PredictedClass'] == 'ICCA'].shape[0]
    KolonMet_correctneg = dataframe.loc[(dataframe['Category'] == 'ICCA') & (dataframe['PredictedClass'] == 'ICCA')].shape[0]

    ICCA_PatientNo = dataframe.loc[dataframe['Category'] == 'ICCA'].shape[0]
    ICCA_correct = dataframe.loc[(dataframe['Category'] == 'ICCA') & (dataframe['PredictedClass'] == 'ICCA')].shape[0]

    accuracy_list.append(np.round(((KolonMet_correct+ICCA_correct)/(KolonMet_PatientNo+ICCA_PatientNo))*100,2))
    sensitivity_list.append(np.round((KolonMet_correct/KolonMet_PatientNo)*100,2))
    specificity_list.append(np.round((ICCA_correct/ICCA_PatientNo)*100,2))
    ppv_list.append(np.round((KolonMet_correct/KolonMet_allPositive)*100,2))
    npv_list.append(np.round((KolonMet_correctneg/KolonMet_allNegative)*100,2))

accuracy_list_stats.append(np.round((np.mean(accuracy_list)),3))
accuracy_list_stats.append(np.round((np.std(accuracy_list)),3))
sensitivity_list_stats.append(np.round((np.mean(sensitivity_list)),3))
sensitivity_list_stats.append(np.round((np.std(sensitivity_list)),3))
specificity_list_stats.append(np.round((np.mean(specificity_list)),3))
specificity_list_stats.append(np.round((np.std(specificity_list)),3))
ppv_list_stats.append(np.round((np.mean(ppv_list)),3))
ppv_list_stats.append(np.round((np.std(ppv_list)),3))
npv_list_stats.append(np.round((np.mean(npv_list)),3))
npv_list_stats.append(np.round((np.std(npv_list)),3))

accuracy_list.extend(accuracy_list_stats)
sensitivity_list.extend(sensitivity_list_stats)
specificity_list.extend(specificity_list_stats)
ppv_list.extend(ppv_list_stats)
npv_list.extend(npv_list_stats)
    
Metrics_PatientLevel_CV['Accuracy']=accuracy_list
Metrics_PatientLevel_CV['Sensitivity']=sensitivity_list
Metrics_PatientLevel_CV['Specificity']=specificity_list
Metrics_PatientLevel_CV['PPV']=ppv_list
Metrics_PatientLevel_CV['NPV']=npv_list

In [None]:
# Save patient level metrics to csv
Metrics_PatientLevel_CV.to_csv('/home/thomas/Projects/ICCAvsMETS/Tables/Metrics_PatientLevel_CV.csv', index=False)

In [None]:
## Print validation accuracies and loss for each run
AVG_VALIDATION_ACCURACY = np.mean(VALIDATION_ACCURACY)
AVG_VALIDATION_LOSS = np.mean(VALIDATION_LOSS)
SDV_acc = np.std (VALIDATION_ACCURACY)
SDV_loss = np.std (VALIDATION_LOSS)
for item in VALIDATION_ACCURACY: print('Validation Accuracy: '+ str(item))
for item in VALIDATION_LOSS: print('Validation Loss: '+ str(item))
print('Average Validation Accuracy: '+str(AVG_VALIDATION_ACCURACY)+' +/- '+str(SDV_acc))
print('Average Validation Loss: '+str(AVG_VALIDATION_LOSS)+' +/- '+str(SDV_loss))