In [2]:
import os 
os.chdir('../../')
print("Current working directory is now: ", os.getcwd())

import numpy as np 
import pandas as pd 

import matplotlib.pyplot as plt 

from utils.load_settings import load_settings
from utils.latex_helpers import df_to_latex
from utils.fairness_functions import reshape_general_violent_cond_auc_summaries

%matplotlib inline
# load parameters
settings = load_settings()

Current working directory is now:  C:\Users\Caroline Wang\OneDrive\Duke\Criminal Recidivism\psa-analysis


## AUC Fairness 

In this notebook, we check the AUC conditioned on the sensitive attributes of race and gender. 

In [4]:
# read in auc data for prediction problems of interest
auc_data = {}
for time_period in [
                    "six-month", 
                    "two-year"
                    ]:
    
    for problem in ['general', 'violent']:
        problem_path = "broward/race-auc/" + time_period + "/" + problem
        dircontents = os.listdir()
        if os.path.isdir(problem_path):
            onlyfiles = [f for f in os.listdir(problem_path) if os.path.isfile(os.path.join(problem_path, f))]
        else: 
            continue
        for filename in onlyfiles:
            filename_list = filename.split("_")
            if len(filename_list) == 3: 
                model_name = filename_list[0]
            elif len(filename_list) ==4: 
                model_name = filename_list[0] + "_" + filename_list[1]
            if model_name in ['ebm', 'riskslim_cs', 'compas', 'arnold']:
                auc_data["-".join((problem, time_period, model_name))] = pd.read_csv(problem_path + "/" + filename)

auc_data.keys()

dict_keys(['general-six-month-arnold', 'general-six-month-compas', 'general-six-month-ebm', 'general-six-month-riskslim_cs', 'violent-six-month-arnold', 'violent-six-month-compas', 'violent-six-month-ebm', 'violent-six-month-riskslim_cs', 'general-two-year-arnold', 'general-two-year-compas', 'general-two-year-ebm', 'general-two-year-riskslim_cs', 'violent-two-year-arnold', 'violent-two-year-compas', 'violent-two-year-ebm', 'violent-two-year-riskslim_cs'])

## AUC of Arnold NCA and Arnold NVCA

In [5]:
# Arnold NCA corresp to general-two-year-arnold
# Arnold NVCA corresp to violent-two-year-arnold
auc_data['general-two-year-compas'].head()
auc_data['violent-two-year-compas'].head()

Unnamed: 0,Attribute,Attribute Value,AUC,fold_num
0,race,African-American,0.562706,0
1,race,Hispanic,0.47619,0
2,race,Caucasian,0.619231,0
3,race,Other,0.657895,0
4,sex,male,0.583992,0


In [8]:
models = {'arnold': ['Arnold NCA', 'Arnold NVCA Raw'], 
          'compas': ['COMPAS General', "COMPAS Viol."], 
          'ebm': ['EBM', 'EBM'],
          'riskslim_cs': ['RiskSLIM (con.)', 'RiskSLIM (con.)']}

res_dfs = []
for model_name, model_table_labels in models.items():
    viol_auc_name = 'violent-two-year-' + model_name
    res_df = reshape_general_violent_cond_auc_summaries(general_auc=auc_data['general-two-year-' + model_name],
                                                        general_model_name=model_table_labels[0],
                                                        violent_auc=auc_data['violent-two-year-' + model_name],
                                                        violent_model_name=model_table_labels[1]
                                                        )
    res_dfs.append(res_df)
    
auc_summary_table = (pd.concat(res_dfs, axis=0)
                        .rename(columns={"African-American": "Afr-Am.",
                                        "Other": "Other Race",
                                        "Hispanic": "Hisp.",
                                        "Caucasian": "Cauc.",
                                        "female": "Female", 
                                        "male": "Male"}))

# TODO: Round OUTPUT
auc_summary_table = auc_summary_table[["Model", "Label", 
                                       "Afr-Am.", "Cauc.", "Hisp.", "Other Race", "race_range",
                                       "Female", "Male", "sex_range"]]

for column_name in ["Afr-Am.", "Cauc.", "Hisp.", "Other Race", "race_range",
                    "Female", "Male", "sex_range"]:
    
    auc_summary_table[column_name] = auc_summary_table[column_name].apply(lambda x: round(x, 3))
    
auc_summary_table

Unnamed: 0,Model,Label,Afr-Am.,Cauc.,Hisp.,Other Race,race_range,Female,Male,sex_range
0,Arnold NCA,general\_two\_year,0.585,0.605,0.614,0.645,0.06,0.652,0.594,0.057
1,Arnold NVCA Raw,violent\_two\_year,0.597,0.651,0.732,0.674,0.136,0.681,0.622,0.059
0,COMPAS General,general\_two\_year,0.623,0.644,0.531,0.567,0.113,0.682,0.624,0.059
1,COMPAS Viol.,violent\_two\_year,0.638,0.636,0.616,0.618,0.022,0.608,0.638,0.03
0,EBM,general\_two\_year,0.66,0.658,0.616,0.617,0.044,0.702,0.651,0.051
1,EBM,violent\_two\_year,0.653,0.707,0.706,0.667,0.054,0.726,0.667,0.059
0,RiskSLIM (con.),general\_two\_year,0.626,0.613,0.605,0.551,0.075,0.643,0.618,0.025
1,RiskSLIM (con.),violent\_two\_year,0.62,0.709,0.673,0.593,0.116,0.753,0.641,0.112


In [9]:
# print as latex
df_to_latex(auc_summary_table)

\begin{array}{l | l | r | r | r | r | r | r | r | r} \hline
    \verb|     Model     | & \verb|      Label       | & \verb|Afr-Am.| & \verb|Cauc.| & \verb|Hisp.| & \verb|Other Race| & \verb|race_range| & \verb|Female| & \verb|Male | & \verb|sex_range| \\ \hline
    \hline
    \verb|Arnold NCA     | & general\_two\_year &   0.585 & 0.605 & 0.614 &      0.645 &      0.060 &  0.652 & 0.594 &     0.057 \\ \hline
    \verb|Arnold NVCA Raw| & violent\_two\_year &   0.597 & 0.651 & 0.732 &      0.674 &      0.136 &  0.681 & 0.622 &     0.059 \\ \hline
    \verb|COMPAS General | & general\_two\_year &   0.623 & 0.644 & 0.531 &      0.567 &      0.113 &  0.682 & 0.624 &     0.059 \\ \hline
    \verb|COMPAS Viol.   | & violent\_two\_year &   0.638 & 0.636 & 0.616 &      0.618 &      0.022 &  0.608 & 0.638 &     0.030 \\ \hline
    EBM             & general\_two\_year &   0.660 & 0.658 & 0.616 &      0.617 &      0.044 &  0.702 & 0.651 &     0.051 \\ \hline
    EBM             & violent\_two\_yea

In [10]:
## write result
auc_summary_table.to_csv("broward/logs/fairness_results/auc_visualizations/auc_summary_general_violence.csv")