In [6]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from pathlib import PurePath
from json import load

In [1]:
SMALL_SIZE = 12
MEDIUM_SIZE = 18
BIGGER_SIZE = 26
CHONK_SIZE = 32
font = {'family' : 'DIN Condensed',
        'weight' : 'bold',
        'size'   : SMALL_SIZE}
plt.rc('font', **font)
plt.rc('axes', titlesize=BIGGER_SIZE, labelsize=MEDIUM_SIZE, facecolor="xkcd:white")
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=CHONK_SIZE, facecolor="xkcd:white", edgecolor="xkcd:black") #  powder blue

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [3]:
def calc_model_performance(tpr, fpr, thresholds):
    performance_dict = {}
    
    # calculating auc
    performance_dict["AUC"] = np.trapz(y = tpr, x = fpr)
    
    # calculating eer
    tnr = 1 - np.array(tpr)
    scores = np.array((tnr, fpr)).T
    diffs = np.absolute(scores[:, 0] - scores[:, 1])
    min_index = np.argmin(diffs)
    lowest_threshold = thresholds[min_index]
    eer = (tnr[min_index] + fpr[min_index]) / 2
    performance_dict["EER"] = eer
    performance_dict["Threshold"] = lowest_threshold

    return performance_dict

In [4]:

def plot_ROC_curve(tpr, fpr, thresholds, performance, model_name, output_folder):
    fig, ax = plt.subplots()
    ax.fill_between(fpr, tpr)
    sns.scatterplot(x = fpr, y = tpr, ax = ax)
    ax.set_xlabel("False Positive Rate")
    ax.set_ylabel("True Positive Rate")

    v, h = .1, .1
    ax.set_xlim(0-h, 1+h)
    ax.set_ylim(0-v, 1+v)

    # Loop through the data points 
    for i, threshold in enumerate (thresholds):
        plt.text(fpr[i], tpr[i], threshold)

    vals = [i for i in np.arange(0, 1, 0.01)]
    xp = [i for i in np.arange(1, 0, -0.01)]
    sns.lineplot(x = vals, y = vals, ax = ax, color = "red")
    sns.lineplot(x = vals, y = xp, ax = ax, color = "green")
    fig.suptitle(f"ROC Curve: {model_name}")
    fig.set_size_inches(10, 7)
    start = .2
    gap = .2
    height = 1.05
    plt.text(start, height, f"EER: {round(performance['EER'] * 100, 1)}%")
    plt.text(start + gap, height, f"Threshold: {round(performance['Threshold'], 3)}")
    plt.text(start + 2*gap + 0.05, height, f"AUC: {round(performance['AUC'], 3)}")

    plt.savefig(f"{output_folder}{model_name}", dpi = 400)

    pass

In [10]:
read_path = PurePath("/Users/joshuaelms/Desktop/github_repos/nsf-reu2022/data/simulation_results/tpr_fpr_Manhattan.json")
with open(read_path, "r") as f:
    data = load(f)

In [17]:
t_start, t_stop, t_step = 0, 10, 1
thresholds = [round(i, 2) for i in np.arange(t_start, t_stop, t_step)]
aggregate_data = {str(threshold): {"tpr": [], "fpr": []} for threshold in thresholds}
for user in data:
    for threshold in thresholds:
        threshold = str(threshold)
        tpr, fpr = data[user][threshold]["tpr"], data[user][threshold]["fpr"]
        if tpr != None:
            aggregate_data[threshold]["tpr"].append(tpr)

        if fpr != None: 
            aggregate_data[threshold]["fpr"].append(fpr)

In [22]:
aggregate_data

{'0': {'tpr': [0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0],
  'fpr': [0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0

In [29]:
for threshold in thresholds: 
    tpr_arr = np.array([result[threshold]["tpr"] for result in aggregate_data.values()]).mean(axis = 0)
    fpr_arr = np.array([result[threshold]["fpr"] for result in aggregate_data]).mean(axis = 0)
    aggregate_data[threshold]["tpr"] = tpr_arr
    aggregate_data[threshold]["fpr"] = fpr_arr

TypeError: string indices must be integers

In [15]:
aggregate_data

{'0': {'tpr': [], 'fpr': []},
 '1': {'tpr': [0.11392405063291139,
   0.06329113924050633,
   0.08163265306122448,
   0.25316455696202533,
   0.06060606060606061,
   0.10091743119266056,
   0.09803921568627451,
   0.07352941176470588,
   0.0425531914893617,
   0.125,
   0.29411764705882354,
   0.125,
   0.13333333333333333,
   0.15384615384615385,
   0.16,
   0.14492753623188406,
   0.11363636363636363,
   0.05263157894736842,
   0.2159090909090909,
   0.10666666666666667,
   0.14634146341463414,
   0.1780821917808219,
   0.0967741935483871,
   0.0784313725490196,
   0.12121212121212122,
   0.1346153846153846,
   0.056910569105691054,
   0.14814814814814814,
   0.10714285714285714,
   0.15555555555555556,
   0.07142857142857142,
   0.06153846153846154,
   0.13559322033898305,
   0.056910569105691054,
   0.034482758620689655,
   0.25,
   0.0410958904109589,
   0.1509433962264151,
   0.21641791044776118,
   0.07377049180327869,
   0.1388888888888889,
   0.2631578947368421,
   0.1222222222