In [1]:
import os 
os.chdir('../../')
print("Current working directory is now: ", os.getcwd())

import numpy as np 
import pandas as pd 
from broward.models.advanced_models.six_month import stumps

Current working directory is now:  C:\Users\Caroline Wang\OneDrive\Duke\Criminal Recidivism\psa-analysis


## Fairness for Two Year General Recidivism 
Base rate 45.5%, the highest out of all Broward problems. 

In [2]:
# load train & test data
train_stumps = pd.read_csv("broward/data/broward_train_stumps.csv")
test_stumps = pd.read_csv("broward/data/broward_test_stumps.csv")

X_train_stumps, Y_train_stumps = train_stumps.loc[:,:'five_year>=1'], train_stumps['recid_two_year'].values
X_test_stumps, Y_test_stumps = test_stumps.loc[:,:'five_year>=1'], test_stumps['recid_two_year'].values
cols = X_train_stumps.columns[3:]

Y_train_stumps[Y_train_stumps == -1] = 0
Y_test_stumps[Y_test_stumps == -1] = 0

In [3]:
from sklearn.linear_model import LogisticRegression

def stump_preds(X_train, Y_train, X_test, Y_test, c, columns, seed):
    ## remove unused feature in modeling
    # preds df is caroline's addition
    preds_df = X_test
    X_train = X_train.drop(['person_id', 'screening_date', 'race'], axis=1)
    X_test = X_test.drop(['person_id', 'screening_date', 'race'], axis=1)
    
    ## estimator
    lasso = LogisticRegression(class_weight = 'balanced', solver='liblinear', 
                               random_state=seed, penalty='l1', C = c).fit(X_train, Y_train)
    coefs = lasso.coef_[lasso.coef_ != 0]
    features = columns[lasso.coef_[0] != 0].tolist()
    intercept = round(lasso.intercept_[0],3)
     
    ## dictionary
    lasso_dict_rounding = {}
    for i in range(len(features)):
        lasso_dict_rounding.update({features[i]: round(round(coefs[i], 3)*100, 1)})
    
    ## prediction on test set
    prob = 0
    for k in features:
        test_values = X_test[k]*(lasso_dict_rounding[k]/100)
        prob += test_values
    
    holdout_prob = np.exp(prob)/(1+np.exp(prob))
    preds_df['prediction'] = holdout_prob 
    preds_df['label'] = Y_test
#     test_auc = roc_auc_score(Y_test, holdout_prob)
    
    return {'coefs': coefs, 
            'features': features, 
            'intercept': intercept, 
            'dictionary': lasso_dict_rounding, 
#             'test_auc': test_auc,
            'preds_df': preds_df[['person_id',  'screening_date', 'race', 'prediction', 'label']]}

In [30]:
# based on nested CV procedure, best c value was .05
best_stumps_summary = stump_preds(X_train_stumps, Y_train_stumps, 
                                  X_test_stumps, Y_test_stumps, 
                                  c=0.05, columns=cols, seed=816)

# the above gives predictions on test set; I need predictions on train set 
two_year_recid_stumps = best_stumps_summary['preds_df'].rename(columns={"race": "group"})
two_year_recid_stumps = two_year_recid_stumps[(two_year_recid_stumps['group']=="Caucasian") | (two_year_recid_stumps['group']=="African-American")]
two_year_recid_stumps.drop(labels = ["person_id","screening_date"], inplace=True, axis=1)
two_year_recid_stumps['group'] = two_year_recid_stumps['group'].map({'Caucasian': 0, 'African-American': 1})

In [31]:
stumps.stump_table(best_stumps_summary['coefs'], 
                   best_stumps_summary['features'], 
                   best_stumps_summary['intercept'], 
                   best_stumps_summary['dictionary'])

+-----------------------------------+----------------+
| Features                          | Score          |
| p_current_age>=23                 | -23.0          |
| p_current_age>=37                 | -74.2          |
| p_age_first_offense<=20           | 10.9           |
| p_age_first_offense<=29           | -19.6          |
| p_charge>=8                       | -52.9          |
| p_prison>=1                       | 24.9           |
| p_probation>=5                    | 46.6           |
| p_felassault_arrest>=1            | -8.3           |
| p_weapons_arrest>=1               | -2.7           |
| prior_conviction_M>=1             | -1.2           |
| prior_conviction_M>=7             | 29.6           |
| violent_conviction>=5             | -13.5          |
| p_arrest>=30                      | 5.3            |
| p_drug>=2                         | 103.4          |
| p_drug>=3                         | 26.3           |
| p_dui>=1                          | -20.6          |
| p_steali

#### Enforce Equalized Odds (Hardt, Srebo)

In [5]:
from fairness_enforcers.equalized_odds.eq_odds import run_eq_odds
from fairness_enforcers.equalized_odds.calib_eq_odds import run_calib_eq_odds

Questions: Why are the originals different between the original groups when computed for equalized odds vs for calibrated equalized odds?

In [18]:
eq_group_0_test_model, eq_group_1_test_model, eq_odds_group_0_test_model, eq_odds_group_1_test_model = run_eq_odds(test_and_val_data=two_year_recid_stumps)


Original group 0 model:
Accuracy: 0.700
AUC: 0.694
FPR: 0.140
FNR: 0.160
FP cost: 0.441
FN cost: 0.516
Base rate: 0.400
Avg. score: 0.458

Original group 1 model:
Accuracy: 0.617
AUC: 0.614
FPR: 0.262
FNR: 0.121
FP cost: 0.493
FN cost: 0.467
Base rate: 0.495
Avg. score: 0.513

Equalized odds group 0 model:
Accuracy: 0.700
AUC: 0.694
FPR: 0.140
FNR: 0.160
FP cost: 0.441
FN cost: 0.516
Base rate: 0.400
Avg. score: 0.458

Equalized odds group 1 model:
Accuracy: 0.579
AUC: 0.640
FPR: 0.159
FNR: 0.262
FP cost: 0.454
FN cost: 0.505
Base rate: 0.495
Avg. score: 0.474



  group_0_val_model = Model(group_0_val_data['prediction'].as_matrix(), group_0_val_data['label'].as_matrix())
  group_1_val_model = Model(group_1_val_data['prediction'].as_matrix(), group_1_val_data['label'].as_matrix())
  group_0_test_model = Model(group_0_test_data['prediction'].as_matrix(), group_0_test_data['label'].as_matrix())
  group_1_test_model = Model(group_1_test_data['prediction'].as_matrix(), group_1_test_data['label'].as_matrix())


#### Enforce Calibrated Equalized Odds

In [7]:
calib_group_1_test_summary, calib_group_1_test_summary, calib_eq_odds_group_0_test_summary, calib_eq_odds_group_1_test_summary = run_calib_eq_odds(test_and_val_data=two_year_recid_stumps,
                                                                                                                                                   cost_constraint='fpr')

"To measure false-negative or false-positive discrimination, it is enough to check the difference in error rates between groups. To measure calibration, we can compare the average model score with the population's base rate. A necessary (but not sufficient) condition for calibration is that the average model score should match the base rate." 

I thnk they assume that the models are calibrated? 

## Fairness for Six Month Drug Recidivism 
Base rate 4%, the lowest out of all Broward problems. 

In [32]:
# load train & test data
X_train_stumps, Y_train_stumps = train_stumps.loc[:,:'five_year>=1'], train_stumps['recid_drug6'].values
X_test_stumps, Y_test_stumps = test_stumps.loc[:,:'five_year>=1'], test_stumps['recid_drug6'].values
cols = X_train_stumps.columns[3:]

Y_train_stumps[Y_train_stumps == -1] = 0
Y_test_stumps[Y_test_stumps == -1] = 0

In [33]:
# based on nested CV procedure, best c value was .05
best_stumps_summary = stump_preds(X_train_stumps, Y_train_stumps, 
                                  X_test_stumps, Y_test_stumps, 
                                  c=0.05, columns=cols, seed=816)

stumps.stump_table(best_stumps_summary['coefs'], 
                   best_stumps_summary['features'], 
                   best_stumps_summary['intercept'], 
                   best_stumps_summary['dictionary'])


+-----------------------------------+----------------+
| Features                          | Score          |
| p_current_age>=23                 | -23.0          |
| p_current_age>=37                 | -74.2          |
| p_age_first_offense<=20           | 10.9           |
| p_age_first_offense<=29           | -19.6          |
| p_charge>=8                       | -52.9          |
| p_prison>=1                       | 24.9           |
| p_probation>=5                    | 46.6           |
| p_felassault_arrest>=1            | -8.3           |
| p_weapons_arrest>=1               | -2.7           |
| prior_conviction_M>=1             | -1.2           |
| prior_conviction_M>=7             | 29.6           |
| violent_conviction>=5             | -13.5          |
| p_arrest>=30                      | 5.3            |
| p_drug>=2                         | 103.4          |
| p_drug>=3                         | 26.3           |
| p_dui>=1                          | -20.6          |
| p_steali

In [29]:
recid_drug6_stumps = best_stumps_summary['preds_df'].rename(columns={"race": "group"})
recid_drug6_stumps = recid_drug6_stumps[(recid_drug6_stumps['group']=="Caucasian") | (recid_drug6_stumps['group']=="African-American")]
recid_drug6_stumps.drop(labels = ["person_id","screening_date"], inplace=True, axis=1)
recid_drug6_stumps['group'] = recid_drug6_stumps['group'].map({'Caucasian': 0, 'African-American': 1})

eq_group_0_test_model, eq_group_1_test_model, eq_odds_group_0_test_model, eq_odds_group_1_test_model = run_eq_odds(test_and_val_data=two_year_recid_stumps)
calib_group_1_test_summary, calib_group_1_test_summary, calib_eq_odds_group_0_test_summary, calib_eq_odds_group_1_test_summary = run_calib_eq_odds(test_and_val_data=two_year_recid_stumps,
                                                                                                                                                   cost_constraint='fpr')

Original group 0 model:
Accuracy: 0.667
AUC: 0.681
FPR: 0.158
FNR: 0.175
FP cost: 0.445
FN cost: 0.500
Base rate: 0.421
Avg. score: 0.468

Original group 1 model:
Accuracy: 0.640
AUC: 0.678
FPR: 0.280
FNR: 0.080
FP cost: 0.491
FN cost: 0.448
Base rate: 0.440
Avg. score: 0.518

Equalized odds group 0 model:
Accuracy: 0.667
AUC: 0.681
FPR: 0.158
FNR: 0.175
FP cost: 0.445
FN cost: 0.500
Base rate: 0.421
Avg. score: 0.468

Equalized odds group 1 model:
Accuracy: 0.660
AUC: 0.738
FPR: 0.130
FNR: 0.210
FP cost: 0.454
FN cost: 0.477
Base rate: 0.440
Avg. score: 0.484

Cost constraint is fpr
Original group 0 model:
Accuracy: 0.750
AUC: 0.711
FPR: 0.125
FNR: 0.125
FP cost: 0.444
FN cost: 0.500
Base rate: 0.393
Avg. score: 0.466

Original group 1 model:
Accuracy: 0.614
AUC: 0.624
FPR: 0.267
FNR: 0.119
FP cost: 0.496
FN cost: 0.460
Base rate: 0.475
Avg. score: 0.517

Equalized odds group 0 model:
Accuracy: 0.750
AUC: 0.711
FPR: 0.125
FNR: 0.125
FP cost: 0.444
FN cost: 0.500
Base rate: 0.393
Avg. 

  group_0_val_model = Model(group_0_val_data['prediction'].as_matrix(), group_0_val_data['label'].as_matrix())
  group_1_val_model = Model(group_1_val_data['prediction'].as_matrix(), group_1_val_data['label'].as_matrix())
  group_0_test_model = Model(group_0_test_data['prediction'].as_matrix(), group_0_test_data['label'].as_matrix())
  group_1_test_model = Model(group_1_test_data['prediction'].as_matrix(), group_1_test_data['label'].as_matrix())
  group_0_val_model = Model(group_0_val_data['prediction'].as_matrix(), group_0_val_data['label'].as_matrix())
  group_1_val_model = Model(group_1_val_data['prediction'].as_matrix(), group_1_val_data['label'].as_matrix())
  group_0_test_model = Model(group_0_test_data['prediction'].as_matrix(), group_0_test_data['label'].as_matrix())
  group_1_test_model = Model(group_1_test_data['prediction'].as_matrix(), group_1_test_data['label'].as_matrix())
