# Bias Detection Engine Demo: Open Sentencing

In [1]:
import sys
sys.path.append('..')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from server.routes.engine import *  # bias_table, bias_grid, consistency_table, filter_bias

The specified config.json file /Users/shoffman/Projects/External/embrace-call-for-code/bias-detection-engine/notebooks/server/config/mappings.json does not exist


In [2]:
df = pd.read_csv('../data/simulated_data_v0.4.csv', index_col=0).set_index(['Case ID', 'Race', 'Gender', 'Citizenship'])
fp = pd.concat([df.pop(c) for c in df.columns if c.startswith('FP')], axis=1)
sent = pd.concat([df.pop(c) for c in df.columns[-5:]], axis=1)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Alleged Crime,Alleged Crime Category,Alleged Crime Reported,Investigation,Arrest,Charges filed,Diversion,Court initial appearance,Preliminary hearing,Bail or detention hearing,Grand jury,Information stage,Arraignment,Plea bargain,Trial,Convicted,Sentencing
Case ID,Race,Gender,Citizenship,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
10000,Other,Female,U.S. Citizen,Drug trafficking,Felony,Investigation,Unsolved or not arrested OUT,,,,,,,,,,,,,
10001,Hispanic,Male,U.S. Citizen,Drug trafficking,Felony,Investigation,Unsolved or not arrested OUT,,,,,,,,,,,,,
10002,Black,Female,U.S. Citizen,Drug trafficking,Felony,Investigation,Unsolved or not arrested OUT,,,,,,,,,,,,,
10003,White,Female,U.S. Citizen,Drug trafficking,Felony,Investigation,Arrest,Charges filed,Court initial appearance,,Charges dropped or dismissed OUT,,,,,,,,,
10004,White,Male,U.S. Citizen,Drug trafficking,Felony,Investigation,Arrest,Released OUT,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70388,Other,Female,U.S. Citizen,Drug trafficking,Felony,Investigation,Arrest,Charges filed,Court initial appearance,,Move to preliminary hearing,Bail or detention hearing,Move to collect information,,Arraignment,Plea bargain,Convicted,,Sentencing,Prison and alternatives
70389,Hispanic,Female,U.S. Citizen,Drug trafficking,Felony,Investigation,Unsolved or not arrested OUT,,,,,,,,,,,,,
70390,White,Female,U.S. Citizen,Drug trafficking,Felony,Investigation,Arrest,Charges filed,Court initial appearance,,Charges dropped or dismissed OUT,,,,,,,,,
70391,White,Male,U.S. Citizen,Drug trafficking,Felony,Investigation,Arrest,Charges filed,Court initial appearance,,Charges dropped or dismissed OUT,,,,,,,,,


<img src="https://www.bjs.gov/content/downloadchart.cfm">

In [3]:
subset = df['Alleged Crime'] == 'Drug trafficking'
df = df.loc[subset, 'Alleged Crime Reported':]
fp = fp.loc[subset]
sent = sent.loc[subset]

In [4]:
bias_table(df, 'Race', 'defendants')

Unnamed: 0_level_0,proportion of defendants,disparate impact,disparate impact,disparate impact,disparate impact,disparate impact,disparate impact,disparate impact,disparate impact,disparate impact,disparate impact
Unnamed: 0_level_1,Unnamed: 1_level_1,Arrest -> Released OUT,Bail or detention hearing -> Move to grand jury,Arraignment -> Charges dismissed OUT,Arraignment -> Trial,Arraignment -> Reduction of charge with trial,Sentencing -> Probation and alternatives,Sentencing -> Prison and alternatives,Sentencing -> Prison only,Sentencing -> Probations only,Sentencing -> Fine only
Race,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
Black,24.9%,0.609,0.572,0.756,1.023,0.487,0.436,1.269,1.288,0.435,0.685
Hispanic,24.9%,0.983,0.486,0.847,1.188,0.934,0.99,1.057,1.039,1.125,0.7
Other,24.9%,0.985,1.97,0.803,1.089,0.978,1.138,1.025,1.053,0.995,0.709
White,25.4%,1.526,1.413,1.857,0.697,2.002,1.803,0.634,0.608,1.817,2.507


Closer to 1 is better. Disparate impact < 0.8 implies bias against the transition shown (red), > 1.25 implies bias for the transition (blue).

TODO: compare "life" and parole sentences separately

In [5]:
subset = df['Sentencing'].str.contains('Prison', case=False, na=False)
sent = sent.loc[subset & (sent['Estimated Sentence'] != 'life') & (sent['Given Sentence'] != 'life')].astype(int)
mean_difference(sent['Estimated Sentence'], sent['Given Sentence'], 'Race')

Unnamed: 0_level_0,Sentencing bias
Race,Unnamed: 1_level_1
Black,9.22
Hispanic,6.21
Other,9.42
White,-5.41


## Individual Fairness -- Fact Patterns

In [6]:
consistency_table(fp, df)

Unnamed: 0_level_0,consistency,consistency,consistency,consistency,consistency,consistency,consistency,consistency,consistency,consistency,consistency
Unnamed: 0_level_1,Investigation -> Unsolved or not arrested OUT,Investigation -> Arrest,Arrest -> Charges filed,Arrest -> Released OUT,Charges filed -> Court initial appearance,Charges filed -> Diversion,Diversion -> Released OUT,Diversion -> Court initial appearance,Sentencing -> Prison and alternatives,Sentencing -> Prison only,Sentencing -> Fine only
All Defendants,0.559,0.559,0.606,0.606,0.707,0.707,0.744,0.744,0.628,0.576,0.797
