In [1]:
import os
import json
import numpy as np

from imgclas.data_utils import load_image, load_class_names
from imgclas import paths, plot_utils

from imgclas import test_utils
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix

import warnings
warnings.filterwarnings("ignore") # To ignore UndefinedMetricWarning: [Recall/Precision/F-Score] is ill-defined and being set to 0.0 in labels with no [true/predicted] samples.

# User parameters to set
# timestamp = ['2022-03-22_TortSp1_50ep_4Batch', '2022-03-23_TortSp2_50ep_4Batch',
#             '2022-03-31_TortSp3_16ep_stop_8Batch', '2022-03-31_TortSp4_25ep_stop_8Batch',
#             '2022-04-01_TortSp5_22ep_stop_8Batch']      # timestamp of the model
timestamp = ['2022-04-19_Fold1SpAnd45Balanced_18ep_stop_16Batch', 
             '2022-04-19_Fold2SpAnd45Balanced_17ep_stop_16Batch',
            '2022-04-19_Fold3SpAn45Balanced_12ep_stop_16Batch', 
             '2022-04-20_Fold4SpAnd45Balanced_35ep_stop15_16Batch',
            '2022-04-20_Fold5SpAnd45Balanced_21ep_stop15_16Batch']
SPLIT_NAME = 'test'                   # dataset split to predict
MODEL_NAME = 'final_model.h5'         # model to use to make the mediction
TOP_K = 2                             # number of top classes predictions to save

wrong_predictions=[]

for TIMESTAMP in timestamp:

    # Set the timestamp
    paths.timestamp = TIMESTAMP

    # Load clas names
    class_names = load_class_names(splits_dir=paths.get_ts_splits_dir())

    # Load back the predictions
    pred_path = os.path.join(paths.get_predictions_dir(), '{}+{}+top{}.json'.format(MODEL_NAME, SPLIT_NAME, TOP_K))
    with open(pred_path) as f:
        pred_dict = json.load(f)
    
    for i in range(len(pred_dict['true_lab'])):
        if pred_dict['pred_lab'][i][0]!=pred_dict['true_lab'][i]:
            wrong_predictions.append(pred_dict['filenames'][i].split('/')[-1].split('_')[0])

Loading class names...
Loading class names...
Loading class names...
Loading class names...
Loading class names...


In [5]:
print("Lista de pacientes predichos erróneamente por los modelos: ")
print(wrong_predictions[:]) # 33 imagenes predichas erróneamente en total por los modelos

Lista de pacientes predichos erróneamente por los modelos: 
['5990', '8165', '4161', '7255', '7253', '8261', '8028', '10435', '10548', '8305', '11337', '5567', '9475', '1647', '2349', '10654', 'P5433', '1805', '3814', '8851', '10712', '7844', '9378', '10240', '2622', '8631', '7122', '4850', '1639', '6241']


In [3]:
len(wrong_predictions)

30

In [17]:
import os
import json
import numpy as np

from imgclas.data_utils import load_image, load_class_names
from imgclas import paths, plot_utils

from imgclas import test_utils
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import roc_curve, roc_auc_score, precision_recall_curve
from sklearn.metrics import confusion_matrix

import warnings
warnings.filterwarnings("ignore") # To ignore UndefinedMetricWarning: [Recall/Precision/F-Score] is ill-defined and being set to 0.0 in labels with no [true/predicted] samples.

# User parameters to set
# timestamp = ['2022-03-22_TortSp1_50ep_4Batch', '2022-03-23_TortSp2_50ep_4Batch',
#             '2022-03-31_TortSp3_16ep_stop_8Batch', '2022-03-31_TortSp4_25ep_stop_8Batch',
#             '2022-04-01_TortSp5_22ep_stop_8Batch']      # timestamp of the model
timestamp = ['2022-04-19_Fold1SpAnd45Balanced_18ep_stop_16Batch', 
             '2022-04-19_Fold2SpAnd45Balanced_17ep_stop_16Batch',
            '2022-04-19_Fold3SpAn45Balanced_12ep_stop_16Batch', 
             '2022-04-20_Fold4SpAnd45Balanced_35ep_stop15_16Batch',
            '2022-04-20_Fold5SpAnd45Balanced_21ep_stop15_16Batch']
SPLIT_NAME = 'test'                   # dataset split to predict
MODEL_NAME = 'final_model.h5'         # model to use to make the mediction
TOP_K = 2                             # number of top classes predictions to save

train_accs=[]
val_accs=[]
accs=[]
sens=[]
specs=[]
aucs=[]
recalls=[]
precisions=[]
f1_scores=[]

for TIMESTAMP in timestamp:

    # Set the timestamp
    paths.timestamp = TIMESTAMP
    
    # Load training statistics
    stats_path = os.path.join(paths.get_stats_dir(), 'stats.json')
    with open(stats_path) as f:
        stats = json.load(f)
    train_accs.append(stats['acc'][-1])
    val_accs.append(stats['val_acc'][-1])

    # Load clas names
    class_names = load_class_names(splits_dir=paths.get_ts_splits_dir())

    # Load back the predictions
    pred_path = os.path.join(paths.get_predictions_dir(), '{}+{}+top{}.json'.format(MODEL_NAME, SPLIT_NAME, TOP_K))
    with open(pred_path) as f:
        pred_dict = json.load(f)
    
    true_lab, pred_lab = np.array(pred_dict['true_lab']), np.array(pred_dict['pred_lab'])

    top1 = test_utils.topK_accuracy(true_lab, pred_lab, K=1)
    accs.append(top1)
    
    scores=[]
    for i in range(0, len(pred_dict['pred_lab'])):
        if pred_dict['pred_lab'][i][0]==0:
            scores.append(pred_dict['pred_prob'][i][1])
        else:
            scores.append(pred_dict['pred_prob'][i][0])
    
    # computing sensibility, specificity and ROC curve.
    # calculate roc curve
    probs = np.array([item[0] for item in pred_dict['pred_prob']])
    y_pred = np.array([item[0] for item in pred_lab])
    # calculate AUC
    auc = roc_auc_score(true_lab, y_pred) # este seria sin tener en cuenta las probabilidades!
    auc = roc_auc_score(true_lab, scores)
    aucs.append(auc)
    # calculate precision-recall curve
    precision, recall, thresholds = precision_recall_curve(y_pred, probs)
    # calculate F1 score
    f1 = f1_score(true_lab, y_pred, average='weighted')
    f1_scores.append(f1)
    # calculate precision
    precision = precision_score(true_lab, y_pred, average='weighted')
    precisions.append(precision)
    # calculate recall
    recall = recall_score(true_lab, y_pred, average='weighted')
    recalls.append(recall)
    # standard confussion matrix
    TN, FP, FN, TP = confusion_matrix(true_lab, y_pred, labels=[0, 1]).ravel()
    sensitivity  = TP/(TP+FN)
    specificity  = TN/(TN+FP)
    sens.append(sensitivity)
    specs.append(specificity)

Loading class names...
Loading class names...
Loading class names...
Loading class names...
Loading class names...


In [18]:
print('Train accuracies: (%.3f +- %.3f)' % (np.mean(train_accs), np.std(train_accs)))
print('Val accuracies: (%.3f +- %.3f)' % (np.mean(val_accs), np.std(val_accs)))
print('Test accuracies: (%.3f +- %.3f)' % (np.mean(accs), np.std(accs)))
print('Sensitivities: (%.3f +- %.3f)' % (np.mean(sens), np.std(sens)))
print('Specificities: (%.3f +- %.3f)' % (np.mean(specs), np.std(specs)))
print('Precisions: (%.3f +- %.3f)' % (np.mean(precisions), np.std(precisions)))
print('Recalls: (%.3f +- %.3f)' % (np.mean(recalls), np.std(recalls)))
print('F1 Scores: (%.3f +- %.3f)' % (np.mean(f1_scores), np.std(f1_scores)))
print('AUCs: (%.3f +- %.3f)' % (np.mean(aucs), np.std(aucs)))

Train accuracies: (0.965 +- 0.012)
Val accuracies: (0.844 +- 0.076)
Test accuracies: (0.875 +- 0.062)
Sensitivities: (0.866 +- 0.103)
Specificities: (0.883 +- 0.103)
Precisions: (0.885 +- 0.059)
Recalls: (0.875 +- 0.062)
F1 Scores: (0.874 +- 0.062)
AUCs: (0.959 +- 0.028)
