En este notebook saco **correlaciones**

## Import libraries and data

In [4]:
from IPython.display import clear_output


# import matplotlib.pyplot as plt 
import numpy as np 
import pandas as pd

# import shap

# pd.set_option('display.max_columns', None)

In [None]:
from sklearn.metrics import precision_score
from sklearn.model_selection import train_test_split

from imblearn.over_sampling import SMOTE, RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler

In [None]:
import xgboost as xgb
from xgboost import XGBClassifier

In [3]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay, mean_squared_error

## Auxiliar functions

In [4]:
dic_days_mont = {1: 31, 2: 28, 3: 31, 4: 30, 5: 31, 6: 30,
7: 31, 8: 31, 9: 30, 10: 31, 11: 30, 12: 31}

def leap_year(year):
    return year%4 == 0

def top_days(month_tmp, year_tmp):
    top = dic_days_mont[month_tmp]
    if month_tmp == 2 and leap_year(year_tmp):
        top += 1
    return top

def get_days_distance(year_in, month_in, day_in, year_out, month_out, day_out, it):
    days = 0
    day_tmp = day_in
    month_tmp = month_in
    year_tmp = year_in
    while day_tmp!=day_out or month_tmp!=month_out or year_tmp!=year_out:
        days += 1
        if day_tmp < top_days(month_tmp, year_tmp):
            day_tmp += 1
        else:
            day_tmp = 1
            if month_tmp == 12:
                month_tmp = 1
                year_tmp += 1
            else:
                month_tmp +=1
    # We count the last day in jail
    days += 1
    return days

def count_days(date_1, date_2, it):
    days = 0
    year_in = int(date_1[0])
    month_in = int(date_1[1])
    day_in = int(date_1[2])
    year_out = int(date_2[0])
    month_out = int(date_2[1])
    day_out = int(date_2[2])
    days += get_days_distance(year_in, month_in, day_in, year_out, month_out, day_out, it)    
    return days

# Bias in COMPAS

In this section, we will study if the COMPAS model is biased by comparing the output scores with the real rate of recividism. In other words, given two individuals with the same features except race, we will try to analyze if the model overpredicts a higher score for a given race. 

COMPAS works by evaluating a range of factors including age, sex, personality traits, measures of social isolation, prior criminal history, family criminality, geography, and employment status. Northpointe gets some of this information from criminal records, and the rest from a questionnaire that asks defendants to respond to queries like, “How many of your friends/acquaintances are taking drugs illegally?” and to agree or disagree with statements like, “A hungry person has a right to steal.”

COMPAS returns a score from 0 to 10 indicating the risk of recividism. In order to compare more easily, the decimal score will be transformed to a binary label indicating High risk (5-10) or Low risk (1-4)

In [5]:
url = 'https://raw.githubusercontent.com/propublica/compas-analysis/master/compas-scores-two-years.csv'
df = pd.read_csv(url)
df['high_risk'] = (df['decile_score'] >= 5).astype(int)

In [6]:
df_backup = df.copy()

In [7]:
df.head()

Unnamed: 0,id,name,first,last,compas_screening_date,sex,dob,age,age_cat,race,juv_fel_count,decile_score,juv_misd_count,juv_other_count,priors_count,days_b_screening_arrest,c_jail_in,c_jail_out,c_case_number,c_offense_date,c_arrest_date,c_days_from_compas,c_charge_degree,c_charge_desc,is_recid,r_case_number,r_charge_degree,r_days_from_arrest,r_offense_date,r_charge_desc,r_jail_in,r_jail_out,violent_recid,is_violent_recid,vr_case_number,vr_charge_degree,vr_offense_date,vr_charge_desc,type_of_assessment,decile_score.1,score_text,screening_date,v_type_of_assessment,v_decile_score,v_score_text,v_screening_date,in_custody,out_custody,priors_count.1,start,end,event,two_year_recid,high_risk
0,1,miguel hernandez,miguel,hernandez,2013-08-14,Male,1947-04-18,69,Greater than 45,Other,0,1,0,0,0,-1.0,2013-08-13 06:03:42,2013-08-14 05:41:20,13011352CF10A,2013-08-13,,1.0,F,Aggravated Assault w/Firearm,0,,,,,,,,,0,,,,,Risk of Recidivism,1,Low,2013-08-14,Risk of Violence,1,Low,2013-08-14,2014-07-07,2014-07-14,0,0,327,0,0,0
1,3,kevon dixon,kevon,dixon,2013-01-27,Male,1982-01-22,34,25 - 45,African-American,0,3,0,0,0,-1.0,2013-01-26 03:45:27,2013-02-05 05:36:53,13001275CF10A,2013-01-26,,1.0,F,Felony Battery w/Prior Convict,1,13009779CF10A,(F3),,2013-07-05,Felony Battery (Dom Strang),,,,1,13009779CF10A,(F3),2013-07-05,Felony Battery (Dom Strang),Risk of Recidivism,3,Low,2013-01-27,Risk of Violence,1,Low,2013-01-27,2013-01-26,2013-02-05,0,9,159,1,1,0
2,4,ed philo,ed,philo,2013-04-14,Male,1991-05-14,24,Less than 25,African-American,0,4,0,1,4,-1.0,2013-04-13 04:58:34,2013-04-14 07:02:04,13005330CF10A,2013-04-13,,1.0,F,Possession of Cocaine,1,13011511MM10A,(M1),0.0,2013-06-16,Driving Under The Influence,2013-06-16,2013-06-16,,0,,,,,Risk of Recidivism,4,Low,2013-04-14,Risk of Violence,3,Low,2013-04-14,2013-06-16,2013-06-16,4,0,63,0,1,0
3,5,marcu brown,marcu,brown,2013-01-13,Male,1993-01-21,23,Less than 25,African-American,0,8,1,0,1,,,,13000570CF10A,2013-01-12,,1.0,F,Possession of Cannabis,0,,,,,,,,,0,,,,,Risk of Recidivism,8,High,2013-01-13,Risk of Violence,6,Medium,2013-01-13,,,1,0,1174,0,0,1
4,6,bouthy pierrelouis,bouthy,pierrelouis,2013-03-26,Male,1973-01-22,43,25 - 45,Other,0,1,0,0,2,,,,12014130CF10A,,2013-01-09,76.0,F,arrest case no charge,0,,,,,,,,,0,,,,,Risk of Recidivism,1,Low,2013-03-26,Risk of Violence,1,Low,2013-03-26,,,2,0,1102,0,0,0


## Experiments

As we don't have the input features needed to replicate the COMPAS model, we will train a classifier to predict the COMPAS score given the gender, race, age, priors_count, and crime factor. We will evaluate the model by using different fairness metrics, and study how different methods of data rebalancing can affect these metrics.


SMOTE/Undersample/Oversample -> Train -> Evaluate different metrics

### Metrics to evaluate:

Castelnovo, A., Crupi, R., Greco, G., & Regoli, D. (2021). The zoo of Fairness metrics in Machine Learning. arXiv preprint arXiv:2106.00467.


(INDEPENDENCE)

- **Demographic parity**: Positive prediction ratio between two races.
- **Demographic parity conditioned on priors?**

(SEPARATION)

- **Predictive equality** -> FPR
- **Equality of opportunity** -> FNR

(SUFFICIENCY)

- **Predictive parity** -> Precision

In [8]:
def eval_fairness(y_pred, y_true, black_mask, white_mask):
    y_pred_black = y_pred[black_mask]
    y_true_black = y_true[black_mask]
    y_pred_white = y_pred[white_mask]
    y_true_white = y_true[white_mask]
    # False Positive Rates FPR = FP / (FP + TN)
    fpr_black = np.sum((y_pred_black == 1) * (y_true_black == 0)) / np.sum(y_true_black == 0)
    fpr_white = np.sum((y_pred_white == 1) * (y_true_white == 0)) / np.sum(y_true_white == 0)
    # True positive rates TPR = TP / (TP + FN)
    tpr_black = np.sum((y_pred_black == 1)*(y_true_black == 1)) / np.sum(y_true_black == 1)
    tpr_white = np.sum((y_pred_white == 1)*(y_true_white == 1)) / np.sum(y_true_white == 1)
    # Precision
    precision_white = precision_score(y_true_white, y_pred_white)
    precision_black = precision_score(y_true_black, y_pred_black)

    data = {}
    data['TPR_w'] = tpr_white
    data['FPR_w'] = fpr_white
    data['Precision_white'] = precision_white
    data['Accuracy_white'] = np.mean(y_pred_white == y_true_white)
    data['TPR_b'] = tpr_black
    data['FPR_b'] = fpr_black
    data['Precision_black'] = precision_black
    data['Accuracy_black'] = np.mean(y_pred_black == y_true_black)
    data['Eq. Oportunity'] = abs(tpr_white-tpr_black)
    data['Pred. Equality'] = abs(fpr_white-fpr_black)
    data['Eq. odds'] = abs(tpr_white-tpr_black) + abs(fpr_white-fpr_black)
    data['Accuracy'] = np.mean(y_pred == y_true)
    data['Precision'] = precision_score(y_pred, y_true)
    # cm_tmp = confusion_matrix(y_true_black, y_pred_black)
    # print(f"cm black FPR: {cm_tmp[1,0]/(cm_tmp[1,0]+cm_tmp[0,0])}")
    # print(f"FPR black: {fpr_black}")
    # print(f"same?? {(cm_tmp[1,0]/(cm_tmp[1,0]+cm_tmp[0,0])) == fpr_black}")

    return data 

### SMOTE/Oversampling/Undersampling

In [9]:
def eval_resampler(df, sampler=None, resample_test=False):

    # Prepare the data
    df_temp = df[(df['race'] == 'African-American') | (df['race'] == 'Caucasian')]
    cols = ['age', 'sex', 'race', 'priors_count', 'score_text']
    X, recid = df_temp[cols], df_temp['two_year_recid']
    X['score_text'] = [0 if y_i == 'Low' else 1 for y_i in X['score_text']]
    X = pd.get_dummies(X, drop_first=True)
    X_train, X_test, recid_train, recid_test = train_test_split(X, recid.values, test_size=0.2, random_state=42)

    ##############################
    # RESAMPLE THE TRAINING SET  #
    ##############################

    # Build target variable combining both the race and whether it has recivided or not
    #   - '00': Black, Non-recividist
    #   - '01': Black, Recividist
    #   - '10': White, Non-recividist
    #   - '11': White, Recividist
    if sampler:
        # get the race value
        y_race = X_train['race_Caucasian'].values
        # build the target variable
        y_sampler = np.array([str(a) + str(b) for a, b in zip(y_race, recid_train)])

        print("TRAINING SET:")
        print("Before Sampling: \n\tBlack, Non-recidivist: {}\n\tBlack, Recidivist: {}\
            \n\tWhite, Non-recidivist: {}\n\tWhite, Recidivist: {}".format(np.sum(y_sampler == '00'), \
            np.sum(y_sampler == '01'), np.sum(y_sampler == '10'), np.sum(y_sampler == '11')))

        # Sample the dataset according to the race and the recividism rates
        X_train, y_sampler = sampler.fit_resample(X_train, y_sampler)

        print("After Sampling: \n\tBlack, Non-recidivist: {}\n\tBlack, Recidivist: {}\
            \n\tWhite, Non-recidivist: {}\n\tWhite, Recidivist: {}".format(np.sum(y_sampler == '00'), \
            np.sum(y_sampler == '01'), np.sum(y_sampler == '10'), np.sum(y_sampler == '11')))

        # Undo the label, i.e. get the race and the real recividism rate
        race, recid_train = np.array([int(y_i[0]) for y_i in y_sampler]), np.array([int(y_i[1]) for y_i in y_sampler])
        X_train['race_Caucasian'] = race 
        
    X_train, y_train = X_train.drop(columns='score_text'), X_train['score_text']

    ####################################
    # RESAMPLE THE TEST SET (OPTIONAL) #
    ####################################

    if resample_test and sampler:
    # get the race value
        y_race = X_test['race_Caucasian'].values
        # build the target variable
        y_sampler = np.array([str(a) + str(b) for a, b in zip(y_race, recid_test)])

        print("TEST SET:")
        print("Before Sampling: \n\tBlack, Non-recidivist: {}\n\tBlack, Recidivist: {}\
            \n\tWhite, Non-recidivist: {}\n\tWhite, Recidivist: {}".format(np.sum(y_sampler == '00'), \
            np.sum(y_sampler == '01'), np.sum(y_sampler == '10'), np.sum(y_sampler == '11')))

        # Sample the dataset according to the race and the recividism rates
        X_test, y_sampler = sampler.fit_resample(X_test, y_sampler)

        print("After Sampling: \n\tBlack, Non-recidivist: {}\n\tBlack, Recidivist: {}\
            \n\tWhite, Non-recidivist: {}\n\tWhite, Recidivist: {}".format(np.sum(y_sampler == '00'), \
            np.sum(y_sampler == '01'), np.sum(y_sampler == '10'), np.sum(y_sampler == '11')))

        # Undo the label, i.e. get the race and the real recividism rate
        race, recid_test = np.array([int(y_i[0]) for y_i in y_sampler]), np.array([int(y_i[1]) for y_i in y_sampler])
        X_test['race_Caucasian'] = race 

    X_test, y_test = X_test.drop(columns='score_text'), X_test['score_text']

    # Train the model

    clf = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
    clf.fit(X_train, y_train)

    # Predict
    y_pred = clf.predict(X_test)

    black_mask = X_test['race_Caucasian'] == 0
    white_mask = X_test['race_Caucasian'] == 1

    # Evaluate fairness metrics
    data = eval_fairness(y_pred, recid_test, black_mask, white_mask)
    return data

In [10]:
data = []
index= []

index.append("Original Training - Original Test")
data.append(eval_resampler(df))
# index.append("SMOTE Training - Original Test")
# data.append(eval_resampler(df, sampler=SMOTE(random_state=42)))
# index.append("SMOTE Training - SMOTE Test")
# data.append(eval_resampler(df, sampler=SMOTE(random_state=42), resample_test=True))
# index.append("Oversampling Training - Original Test")
# data.append(eval_resampler(df, sampler=RandomOverSampler(random_state=42)))
# index.append("Oversampling Training - Oversampling Test")
# data.append(eval_resampler(df, sampler=RandomOverSampler(random_state=42), resample_test=True))
# index.append("Undersampling Training - Original Test")
# data.append(eval_resampler(df, sampler=RandomUnderSampler(random_state=42)))
# index.append("Undersampling Training - Undersampling Test")
# data.append(eval_resampler(df, sampler=RandomUnderSampler(random_state=42), resample_test=True))


clear_output(wait=True)

pd.DataFrame(data, index=index)#.style.highlight_max(color='red')

Unnamed: 0,TPR_w,FPR_w,Precision_white,Accuracy_white,TPR_b,FPR_b,Precision_black,Accuracy_black,Eq. Oportunity,Pred. Equality,Eq. odds,Accuracy,Precision
Original Training - Original Test,0.381443,0.168317,0.592,0.655936,0.716146,0.409742,0.657895,0.656207,0.334703,0.241425,0.576128,0.656098,0.603806


### Training a different classifier for each race

In [11]:
df_temp = df[(df['race'] == 'African-American') | (df['race'] == 'Caucasian')]
cols = ['age', 'sex', 'race', 'priors_count', 'score_text']
X, recid = df_temp[cols], df_temp['two_year_recid']
X['score_text'] = [0 if y_i == 'Low' else 1 for y_i in X['score_text']]
X = pd.get_dummies(X, drop_first=True)
X_train, X_test, recid_train, recid_test = train_test_split(X, recid.values, test_size=0.2, random_state=42)

# Train a classifier for each race
X_train_black, recid_train_black = X_train[X_train['race_Caucasian'] == 0], recid_train[X_train['race_Caucasian'] == 0]
X_train_white, recid_train_white = X_train[X_train['race_Caucasian'] == 1], recid_train[X_train['race_Caucasian'] == 1]
# Get score text in order to train
X_train_black, y_train_black = X_train_black.drop(columns='score_text'), X_train_black['score_text']
X_train_white, y_train_white = X_train_white.drop(columns='score_text'), X_train_white['score_text']

clf_black = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
clf_white = XGBClassifier(use_label_encoder=False, eval_metric='logloss')

# Fit the models
clf_black.fit(X_train_black, y_train_black)
clf_white.fit(X_train_white, y_train_white)

# Make predictions
X_test_black, recid_test_black = X_test[X_test['race_Caucasian'] == 0], recid_test[X_test['race_Caucasian'] == 0]
X_test_white, recid_test_white = X_test[X_test['race_Caucasian'] == 1], recid_test[X_test['race_Caucasian'] == 1]
# Get score text in order to train
X_test_black, y_test_black = X_test_black.drop(columns='score_text'), X_test_black['score_text']
X_test_white, y_test_white = X_test_white.drop(columns='score_text'), X_test_white['score_text']

y_pred_black = clf_black.predict(X_test_black)
y_pred_white = clf_white.predict(X_test_white)
y_pred = np.concatenate((y_pred_black, y_pred_white))
recid_test = np.concatenate((recid_test_black, recid_test_white))
black_mask = np.array([True]*len(y_pred_black) + [False]*len(y_pred_white))
white_mask = np.array([False]*len(y_pred_black) + [True]*len(y_pred_white))

index.append("Split by race")
data.append(eval_fairness(y_pred, recid_test, black_mask, white_mask))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['score_text'] = [0 if y_i == 'Low' else 1 for y_i in X['score_text']]
Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



### Removing race attribute

In [12]:
# Train without the race variable
df_temp = df[(df['race'] == 'African-American') | (df['race'] == 'Caucasian')]
cols = ['age', 'sex', 'race', 'priors_count', 'score_text']
X, recid = df_temp[cols], df_temp['two_year_recid']
X['score_text'] = [0 if y_i == 'Low' else 1 for y_i in X['score_text']]
X = pd.get_dummies(X, drop_first=True)
X_train, X_test, recid_train, recid_test = train_test_split(X, recid.values, test_size=0.2, random_state=42)

# drop the race
X_train, y_train = X_train.drop(columns=['race_Caucasian', 'score_text']), X_train['score_text']
# Train the model without race
clf = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
clf.fit(X_train, y_train)

# Predict
y_pred = clf.predict(X_test.drop(columns=['race_Caucasian', 'score_text']))
black_mask = X_test['race_Caucasian'] == 0
white_mask = X_test['race_Caucasian'] == 1 

index.append("Remove race attribute")
data.append(eval_fairness(y_pred, recid_test, black_mask, white_mask))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['score_text'] = [0 if y_i == 'Low' else 1 for y_i in X['score_text']]
Parameters: { "use_label_encoder" } are not used.



In [13]:
# pd.set_option('precision', 3)
# display(pd.DataFrame(data, index=index))
# pd.reset_option('precision')

# New experiment: feature relevance

In [14]:
df_aux = df_backup.copy()

In [15]:
df_aux["race"].unique()

array(['Other', 'African-American', 'Caucasian', 'Hispanic',
       'Native American', 'Asian'], dtype=object)

In [16]:
list_drop = ["id", "name", "dob", "days_b_screening_arrest", "c_case_number", "c_offense_date", "c_arrest_date", "r_case_number", "r_charge_degree", "c_charge_desc",
      "r_days_from_arrest", "r_offense_date", "r_charge_desc","violent_recid", "vr_case_number", "vr_charge_degree", "vr_offense_date", "vr_charge_desc", "first", "last",
      "type_of_assessment", "start", "end", "event", "v_type_of_assessment", "v_screening_date", "compas_screening_date", "screening_date", "age_cat", "priors_count.1",
      "v_decile_score", "v_score_text", "high_risk", "decile_score.1", "two_year_recid"]
df_aux = df_aux.drop(list_drop, axis=1)

In [17]:
df_aux["days_custody"] = 0
df_aux["days_c_jail"] = 0
df_aux["days_r_jail"] = 0
for i in range(len(df_aux)):
    # print(f"it {i+1}/{len(df)}")
    if pd.notna(df_aux.loc[i,"in_custody"]) and pd.notna(df_aux.loc[i,"out_custody"]):
        df_aux.loc[i, "days_custody"] = count_days(df_aux.loc[i,"in_custody"].split("-"), df_aux.loc[i,"out_custody"].split("-"), i+1)

    if pd.notna(df_aux.loc[i,"c_jail_in"]) and pd.notna(df_aux.loc[i,"c_jail_out"]):
        df_aux.loc[i, "days_c_jail"] = count_days(df_aux.loc[i,"c_jail_in"].split(" ")[0].split("-"), df_aux.loc[i,"c_jail_out"].split(" ")[0].split("-"), i+1)

    if pd.notna(df_aux.loc[i,"r_jail_in"]) and pd.notna(df_aux.loc[i,"r_jail_out"]):
        df_aux.loc[i, "days_r_jail"] = count_days(df_aux.loc[i,"r_jail_in"].split(" ")[0].split("-"), df_aux.loc[i,"r_jail_out"].split(" ")[0].split("-"), i+1)

list_drop = ['in_custody', 'out_custody', 'c_jail_in', 'c_jail_out', 'r_jail_in', 'r_jail_out']
df_aux = df_aux.drop(list_drop, axis=1)

In [18]:
# drop na
print(len(df_aux))
df_aux.dropna(inplace=True)
# Reset index
df_aux = df_aux.reset_index()
df_aux = df_aux.drop(['index'], axis=1)
print(len(df_aux))

7214
7192


In [19]:
df_aux['score_text'] = [0 if y_i == 'Low' else 1 for y_i in df_aux['score_text']]
df_aux.loc[df_aux["sex"] == "Male", "sex"] = 0
df_aux.loc[df_aux["sex"] == "Female", "sex"] = 1
df_aux["sex"] = df_aux["sex"].astype(int)
df_aux.loc[df_aux["c_charge_degree"] == "M", "c_charge_degree"] = 0
df_aux.loc[df_aux["c_charge_degree"] == "F", "c_charge_degree"] = 1
df_aux["c_charge_degree"] = df_aux["c_charge_degree"].astype(int)

In [20]:
df_aux["race"].value_counts()

race
African-American    3687
Caucasian           2449
Hispanic             631
Other                376
Asian                 32
Native American       17
Name: count, dtype: int64

In [21]:
df_aux_copy = df_aux.copy()

In [22]:
# Obtener los valores únicos en el orden en que aparecen
unique_races = df_aux["race"].unique()
# Crear un diccionario {string → int}
mapping = {race: i for i, race in enumerate(unique_races)}
# Sustituir en el DataFrame
df_aux["race"] = df_aux["race"].map(mapping).astype(int)

In [23]:
df_aux.head()

Unnamed: 0,sex,age,race,juv_fel_count,decile_score,juv_misd_count,juv_other_count,priors_count,c_days_from_compas,c_charge_degree,is_recid,is_violent_recid,score_text,days_custody,days_c_jail,days_r_jail
0,0,69,0,0,1,0,0,0,1.0,1,0,0,0,8,2,0
1,0,34,1,0,3,0,0,0,1.0,1,1,1,0,11,11,0
2,0,24,1,0,4,0,1,4,1.0,1,1,0,0,1,2,1
3,0,23,1,0,8,1,0,1,1.0,1,0,0,1,0,0,0
4,0,43,0,0,1,0,0,2,76.0,1,0,0,0,0,0,0


In [24]:
rs = np.random.RandomState(0)
corr = df_aux.corr()
corr.style.background_gradient(cmap="coolwarm").format("{:.2f}") # corr.style.background_gradient(cmap='coolwarm').set_precision(2)

Unnamed: 0,sex,age,race,juv_fel_count,decile_score,juv_misd_count,juv_other_count,priors_count,c_days_from_compas,c_charge_degree,is_recid,is_violent_recid,score_text,days_custody,days_c_jail,days_r_jail
sex,1.0,0.0,0.02,-0.06,-0.06,-0.05,-0.06,-0.12,0.0,-0.05,-0.1,-0.06,-0.04,-0.04,-0.06,-0.06
age,0.0,1.0,0.12,-0.07,-0.39,-0.12,-0.16,0.14,0.09,-0.09,-0.2,-0.1,-0.3,-0.01,0.01,-0.05
race,0.02,0.12,1.0,-0.05,-0.17,-0.07,-0.03,-0.12,-0.02,-0.07,-0.09,-0.04,-0.14,-0.04,-0.02,-0.05
juv_fel_count,-0.06,-0.07,-0.05,1.0,0.18,0.09,0.05,0.18,0.0,0.05,0.09,0.06,0.13,0.06,0.06,0.11
decile_score,-0.06,-0.39,-0.17,0.18,1.0,0.22,0.18,0.44,0.03,0.18,0.35,0.17,0.87,0.16,0.21,0.19
juv_misd_count,-0.05,-0.12,-0.07,0.09,0.22,1.0,0.27,0.24,0.01,0.03,0.11,0.09,0.17,0.03,0.04,0.03
juv_other_count,-0.06,-0.16,-0.03,0.05,0.18,0.27,1.0,0.1,0.0,0.03,0.11,0.05,0.15,0.1,0.04,0.02
priors_count,-0.12,0.14,-0.12,0.18,0.44,0.24,0.1,1.0,0.01,0.14,0.28,0.09,0.37,0.13,0.2,0.14
c_days_from_compas,0.0,0.09,-0.02,0.0,0.03,0.01,0.0,0.01,1.0,-0.05,-0.07,-0.01,0.03,0.05,0.02,-0.01
c_charge_degree,-0.05,-0.09,-0.07,0.05,0.18,0.03,0.03,0.14,-0.05,1.0,0.1,-0.01,0.15,0.08,0.12,0.09


## Concat

In [25]:
df_aux = df_aux_copy.copy()

In [26]:
df_sf = df_aux.copy()

In [27]:
df_sf["race"] = [0 if y_i == 'Caucasian' else 1 for y_i in df_sf["race"]]

In [28]:
df_sf

Unnamed: 0,sex,age,race,juv_fel_count,decile_score,juv_misd_count,juv_other_count,priors_count,c_days_from_compas,c_charge_degree,is_recid,is_violent_recid,score_text,days_custody,days_c_jail,days_r_jail
0,0,69,1,0,1,0,0,0,1.0,1,0,0,0,8,2,0
1,0,34,1,0,3,0,0,0,1.0,1,1,1,0,11,11,0
2,0,24,1,0,4,0,1,4,1.0,1,1,0,0,1,2,1
3,0,23,1,0,8,1,0,1,1.0,1,0,0,1,0,0,0
4,0,43,1,0,1,0,0,2,76.0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7187,0,23,1,0,7,0,0,0,1.0,1,0,0,1,3,3,0
7188,0,23,1,0,3,0,0,0,1.0,1,0,0,0,3,3,0
7189,0,57,1,0,1,0,0,0,1.0,1,0,0,0,2,2,0
7190,1,33,1,0,2,0,0,3,1.0,0,0,0,0,2,2,0


In [29]:
def eval_fairness_rates(FPR_a, FPR_b, TPR_a, TPR_b):
    data = {}
    data['Eq.Oportunity'] = abs(TPR_a-TPR_b)
    data['Pred.Equality'] = abs(FPR_a-FPR_b)
    data['Eq.odds'] = data['Eq.Oportunity'] + data['Pred.Equality']
    return data 

def get_cm_metrics(df_bin):
    df_cm_metrics = pd.DataFrame()
    races = list(df_bin["race"].unique())
    for i in range(len(races)):
        race_tmp = races[i]
        # print(f"The df with id {i} is for the race {race_tmp}")
        df_tmp = df_bin.loc[df_bin["race"]==i,]
        # split data
        x_data_tmp = df_tmp.drop(["is_recid"], axis=1)
        y_data_tmp = df_tmp["is_recid"]
        x_train_tmp, x_test_tmp, y_train_tmp, y_test_tmp = train_test_split(x_data_tmp, y_data_tmp, test_size=0.5, random_state=42)
        # model
        m_tmp = xgb.XGBClassifier().fit(x_train_tmp, y_train_tmp)
        m_tmp.score(x_train_tmp,y_train_tmp)
        # pred and confusion matrix
        y_pred_tmp = np.array(m_tmp.predict(x_test_tmp))
        cm_tmp = confusion_matrix(y_pred_tmp, y_test_tmp)
        # Rates
        FPR_tmp = np.sum((y_pred_tmp == 1) * (y_test_tmp == 0)) / np.sum(y_test_tmp == 0)
        TPR_tmp = np.sum((y_pred_tmp == 1) * (y_test_tmp == 1)) / np.sum(y_test_tmp == 1)
        # Add row to df
        new_row_tmp = {'race': race_tmp, 'TP':cm_tmp[1,1], 'TN':cm_tmp[0,0], 'FP':cm_tmp[1,0], 'FN':cm_tmp[0,1], 
        'FPR': FPR_tmp, 'FP/(FP+TN)': cm_tmp[1,0]/(cm_tmp[1,0]+cm_tmp[0,0]),
        'TPR': TPR_tmp, 'TP/(TP+FN)': cm_tmp[1,1]/(cm_tmp[1,1]+cm_tmp[0,1])}
        # df_cm_metrics = df_cm_metrics.append(new_row_tmp, ignore_index=True)
        df_cm_metrics = pd.concat(
            [df_cm_metrics, pd.DataFrame([new_row_tmp])],
            ignore_index=True
        )
    return df_cm_metrics

In [30]:
df_cm_metrics = get_cm_metrics(df_sf)

In [31]:
df_cm_metrics

Unnamed: 0,race,TP,TN,FP,FN,FPR,FP/(FP+TN),TPR,TP/(TP+FN)
0,1,407,646,56,116,0.079772,0.079772,0.778203,0.778203
1,0,993,1024,116,239,0.101754,0.101754,0.806006,0.806006


In [32]:
df_fairness = pd.DataFrame()
data_fairness = {}
FPR_caucasian = list(df_cm_metrics.loc[df_cm_metrics["race"]==0,"FPR"])[0]
TPR_caucasian = list(df_cm_metrics.loc[df_cm_metrics["race"]==0,"TPR"])[0]
FPR_no_caucasian = list(df_cm_metrics.loc[df_cm_metrics["race"]==1,"FPR"])[0]
TPR_no_caucasian = list(df_cm_metrics.loc[df_cm_metrics["race"]==1,"TPR"])[0]
data_fairness = eval_fairness_rates(FPR_caucasian, FPR_no_caucasian, TPR_caucasian, TPR_no_caucasian)
row_cocos = {'race_comb': "caucasian_no-caucasian", 'Eq.Oportunity': data_fairness['Eq.Oportunity'], 'Pred.Equality':data_fairness['Pred.Equality'], 'Eq.odds':data_fairness['Eq.odds']}
# df_fairness = df_fairness.append(row_cocos, ignore_index=True)
df_fairness = pd.concat(
    [df_fairness, pd.DataFrame([row_cocos])],
    ignore_index=True
)

In [33]:
df_fairness # quitar columnas para aumentar la desigualdad

Unnamed: 0,race_comb,Eq.Oportunity,Pred.Equality,Eq.odds
0,caucasian_no-caucasian,0.027804,0.021982,0.049786


In [34]:
# como son números tan pequeños reflejar porcentualmente los cambios

## Disociate

In [35]:
# White -> Caucasian
df_white = df_aux[df_aux['race'] == 'Caucasian'].drop(['race'], axis=1)
# Black -> !Caucasian
df_black = df_aux[df_aux['race'] != 'Caucasian'].drop(['race'], axis=1)

In [36]:
df_black.head()

Unnamed: 0,sex,age,juv_fel_count,decile_score,juv_misd_count,juv_other_count,priors_count,c_days_from_compas,c_charge_degree,is_recid,is_violent_recid,score_text,days_custody,days_c_jail,days_r_jail
0,0,69,0,1,0,0,0,1.0,1,0,0,0,8,2,0
1,0,34,0,3,0,0,0,1.0,1,1,1,0,11,11,0
2,0,24,0,4,0,1,4,1.0,1,1,0,0,1,2,1
3,0,23,0,8,1,0,1,1.0,1,0,0,1,0,0,0
4,0,43,0,1,0,0,2,76.0,1,0,0,0,0,0,0


In [37]:
df_white.head()

Unnamed: 0,sex,age,juv_fel_count,decile_score,juv_misd_count,juv_other_count,priors_count,c_days_from_compas,c_charge_degree,is_recid,is_violent_recid,score_text,days_custody,days_c_jail,days_r_jail
6,0,41,0,6,0,0,14,1.0,1,1,0,1,19,7,19
8,1,39,0,1,0,0,0,1.0,0,0,0,0,4,4,0
9,0,21,0,3,0,0,1,308.0,1,1,1,0,2,2,2
10,0,27,0,4,0,0,0,1.0,1,0,0,0,2,2,0
12,1,37,0,1,0,0,0,0.0,0,0,0,0,2,2,0


In [38]:
print(f"There are {len(df_white)} Caucasian rows")
print(f"There are {len(df_black)} no Caucasian rows")

There are 2449 Caucasian rows
There are 4743 no Caucasian rows
