# Analysis of a experience 

To calculate confusion matrixes and ROC curves

In [None]:
from sklearn import metrics
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import json
import numpy as np
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 300

In [None]:
model_name = '{model_name}'

In [None]:
path = os.path.join(f'logs-{model_name}') # INSERT HERE THE LOGS PATH

In [None]:
images_path = os.path.join('..', '..', 'images')

In [None]:
results = {
    'Head' : {},
    'Femur' : {},
    'Abdomen' : {}
}
mean_fpr = np.linspace(0, 1, 100)
for exp in os.listdir(path):
    df = pd.read_csv(os.path.join(path, exp, 'test_prediction.csv'))
    with open(os.path.join(path, exp, 'config.json')) as file:
        config = json.load(file)
    eco_type = config['dataset'].split('/')[1]
    cm = metrics.confusion_matrix(df.y_test, df.y_pred)
    results[eco_type]['cm'] = results[eco_type].get('cm', []) + [cm]

    roc_score = metrics.roc_auc_score(df.y_test, df.y_proba)
    fpr_proba, tpr_proba, threshold_proba = metrics.roc_curve(df.y_test, df.y_proba)
    interp_tpr = np.interp(mean_fpr, fpr_proba, tpr_proba)
    interp_tpr[0] = 0.0
    results[eco_type]['tpr_rates'] = results[eco_type].get('tpr_rates', []) + [interp_tpr]
    results[eco_type]['mean_fpr'] = mean_fpr
    results[eco_type]['roc_auc'] = results[eco_type].get('roc_auc', []) + [roc_score]

In [None]:
for eco_type, values in results.items():
    # plot cm matrix
    average_cm = np.array(values['cm']).mean(axis=0)
    group_counts = ['{0:0.0f}'.format(value) for value in
                average_cm.flatten()]
    percentages_cm = (average_cm.T / average_cm.sum(axis=1)).T
    group_percentages = ['{0:.2%}'.format(value) for value in
                     percentages_cm.flatten()]
    labels = [f'{v1}\n({v2})' for v1, v2 in
          zip(group_counts,group_percentages)]
    labels = np.asarray(labels).reshape(2,2)
    plt.figure(figsize=(15, 10))
    sns.set(font_scale=2.5)
    sns.heatmap(average_cm, 
                annot=labels, 
                fmt='', 
                cmap='Blues',
                xticklabels=['Vaginal Delivery', 'Cesarean Delivery'], 
                yticklabels=['Vaginal Delivery', 'Cesarean Delivery'])
    plt.xlabel('Predicted Label', fontdict=dict(size=25))
    plt.ylabel('True Label', fontdict=dict(size=25))
    plt.savefig(os.path.join(images_path, f'average_cm_{model_name}_{eco_type}.png'), transparent=True)
    plt.show()

In [None]:
plt.figure(figsize=(15, 15))
sns.set(font_scale=2)
sns.set_style("whitegrid")
for eco_type, values in results.items():
    mean_tpr = np.mean(np.array(values['tpr_rates']), axis=0)
    mean_tpr[-1] = 1.0
    plt.plot(
        values['mean_fpr'],
        mean_tpr,
        label=f'Mean ROC (AUC={np.mean(values["roc_auc"]).round(3)} $\pm$ {np.std(values["roc_auc"]).round(3)}) - {eco_type.strip("_")}',
        lw=3
        )
plt.plot([0, 1], [0, 1], linewidth=2, linestyle='dashed', color = 'g', label='Random Classifier')
plt.legend(fontsize="14")

plt.xlabel('False Positive Rate', fontdict=dict(size=25))
plt.ylabel('True Positive Rate', fontdict=dict(size=25))
plt.savefig(os.path.join(images_path, f'roc_curves_image_classifiers_{model_name}.png'))