In [5]:
from pdpbox import pdp
import pickle
from sklearn.model_selection import train_test_split
import pandas as pd
from scipy.stats import chi2_contingency

In [2]:
raw_data = pd.read_csv("../../data_clean.csv")
data = raw_data[raw_data['birth date'] < 2020].copy()

data['age'] = data['parole board interview date'] - data['birth date']
data['jail duration'] = data['parole board interview date'] - data['year of entry']

others_parole_type =  ['PIE', 'SP CONSDR', 'ECPDO', 'MEDICAL','RESCISSION', 'DEPORT']
data['parole board interview type'] = data['parole board interview type'].replace(others_parole_type, 'OTHERS').replace('SUPP MERIT', 'MERIT TIME').replace('PV REAPP', 'REAPPEAR')

data = data.dropna(axis=0, subset=['crime 1 - class', 'parole eligibility date'])

df_one_hot = pd.get_dummies(data, columns=[
    "sex", "race / ethnicity"], drop_first=True)

df_one_hot = pd.get_dummies(df_one_hot, columns=[
    "crime 1 - class", "crime 2 - class",
    "crime 3 - class", "crime 4 - class",
    "parole board interview type"])

df_one_hot.drop(columns=['release date','birth date', 'year of entry'],inplace=True)

X_train, X_test, y_train, y_test = train_test_split(df_one_hot.drop('y',axis=1), df_one_hot['y'], 
                                                    stratify=df_one_hot['y'], test_size=0.3, random_state=42)

In [3]:
# Load the model from the .pkl file
xgb_classifier = pickle.load(open("../model.pkl", "rb"))

In [4]:
data_train, _, _, _ = train_test_split(data.drop('y',axis=1), data['y'],
                                                    stratify=data['y'], test_size=0.3, random_state=42)

## Statistical Parity

### Race / Ethnicity

In [6]:
# Create a contingency table
contingency_table = pd.crosstab(data_train['race / ethnicity'], xgb_classifier.predict(X_train))
print(contingency_table)

# Perform the chi-square test
chi2, p, a, b = chi2_contingency(contingency_table)
print(f"Chi-square value: {chi2}")
print(f"P-value: {p}")

col_0                0     1
race / ethnicity            
AMER IND/ALSK      124   112
ASIAN/PACIFIC       19    77
BLACK             6602  2427
HISPANIC          2432  1451
OTHER              101   144
UNKNOWN            156    73
WHITE             2815  4576
Chi-square value: 2183.3999567943815
P-value: 0.0


### Sex

In [7]:
# Create a contingency table
contingency_table = pd.crosstab(data_train['sex'], xgb_classifier.predict(X_train))
print(contingency_table)

# Perform the chi-square test
chi2, p, a, b = chi2_contingency(contingency_table)
print(f"Chi-square value: {chi2}")
print(f"P-value: {p}")

col_0       0     1
sex                
FEMALE    199  1167
MALE    12050  7693
Chi-square value: 1130.6801158733504
P-value: 7.092827025265842e-248


## Conditional Statistical Parity