In [6]:
from sklearn.metrics import confusion_matrix
from sklearn import metrics

def result_performance(y_true,y_pred,prediction):
    CM = confusion_matrix(y_true, y_pred)
    TN = CM[0][0]
    FN = CM[1][0]
    TP = CM[1][1]
    FP = CM[0][1]
    FPR = ((FP*0.1)/(FP+TN))
    FNR = ((FN*0.1)/(FN+TP))
    TPR = (TP) / (TP + FN)
    TNR = (TN) / (TN + FP)
    balanced_accuracy = (TPR + TNR) / 2 

    (precisions, recalls, thresholds) = metrics.precision_recall_curve(y_true, prediction)
    auprc = metrics.auc(recalls, precisions)
    k=3
    
    res_per = []
    res_per.append(round(balanced_accuracy,k))
    res_per.append(round(auprc,k))
    
    return res_per


### change parameters here

In [7]:
from os import listdir
from os.path import isfile, join
import fnmatch
import pandas as pd


# *************** change only model name and group in the following line ******************** 
model_path = r"csv_files/dp_age90_4/test/"
model_choice = "age90"  # "black", "age90", "asian", "age30", "hispanic", "white", "age8090"

# take the csv files in that directory
files = [f for f in listdir(model_path) if fnmatch.fnmatch(f, '*temp_test_isotonic*.csv')]
print(files)

['temp_test_isotonic_0.csv', 'temp_test_isotonic_1.csv', 'temp_test_isotonic_2.csv']


In [8]:
import heapq

full_metrics_list = []
for f in files:
    df = pd.read_csv(model_path + f)
    if model_choice == 'black':
        df = df[df['ETHNICITY'].str.contains("BLACK")]
    elif model_choice == 'asian':
        df = df[df['ETHNICITY'].str.contains("ASIAN")]
    elif model_choice == 'white':
        df = df[df['ETHNICITY'].str.contains("WHITE")]
    elif model_choice == 'hispanic':
        df = df[df['ETHNICITY'].str.contains("HISPANIC")]
    elif model_choice == 'age90':
        df = df[df['AGE'] >= 90]
    elif model_choice == 'age30':
        df = df[df['AGE'] < 30]
    elif model_choice == 'age4050':
        df = df[(df['AGE'] >= 40) & (df['AGE'] < 50)]
    elif model_choice == 'age3040':
        df = df[(df['AGE'] >= 30) & (df['AGE'] < 40)]
    elif model_choice == 'age8090':
        df = df[(df['AGE'] >= 80) & (df['AGE'] < 90)]
    else:
        print("ERROR!!!!")
    
    y_pred = df["y_pred"].values
    y_true = df["y_true"].values
    prediction = df["calibrated_prediction"].values
    
    full_metrics_list.append(result_performance(y_true,y_pred, prediction))
    
df_full_metrics = pd.DataFrame(full_metrics_list, columns = ['bal_acc', 'AUC_PR_C1']) 
df_full_metrics['Files'] = files
print(df_full_metrics)

bal_acc = df_full_metrics["bal_acc"].values
AUC_PR_C1 = list(df_full_metrics["AUC_PR_C1"].values)

 
list_index = heapq.nlargest(3, range(len(bal_acc)), key=bal_acc.__getitem__)
print(list_index)
print(heapq.nlargest(3, bal_acc))
selected_file = files[AUC_PR_C1.index(max(AUC_PR_C1[list_index[0]], AUC_PR_C1[list_index[1]], AUC_PR_C1[list_index[2]]))]

print(selected_file)

    
    

   bal_acc  AUC_PR_C1                     Files
0    0.670      0.461  temp_test_isotonic_0.csv
1    0.647      0.460  temp_test_isotonic_1.csv
2    0.666      0.381  temp_test_isotonic_2.csv
[0, 2, 1]
[0.67, 0.666, 0.647]
temp_test_isotonic_0.csv
