In [None]:
%load_ext autoreload
%autoreload 2

In [82]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import KFold, train_test_split

from crepes import ConformalPredictiveSystem
from crepes.extras import binning, DifficultyEstimator

from calibrated_explanations import  VennAbers, __version__
from calibrated_explanations.utils import transform_to_numeric

print(f"calibrated_explanations {__version__}")


calibrated_explanations v0.3.1


# COMPAS
Data preprocessing from [this notebook](https://colab.research.google.com/github/pair-code/what-if-tool/blob/master/WIT_COMPAS_with_SHAP.ipynb#scrollTo=KF00pJvkeicT).

In [None]:
df = pd.read_csv('https://storage.googleapis.com/what-if-tool-resources/computefest2019/cox-violent-parsed_filt.csv')
print(df.shape)
# Preprocess the data

# Filter out entries with no indication of recidivism or no compass score
df = df[df['is_recid'] != -1]
df = df[df['decile_score'] != -1]


# Make the COMPASS label column numeric (0 and 1), for use in our model
df['score_text'] = np.where(df['score_text'] == 'Low', 'Low', 'Not Low')

target = 'score_text'

print(df.shape)
df.head()


In [None]:
features_to_keep = ['sex', 'age', 'race', 'juv_fel_count', 'juv_misd_count', 'juv_other_count', 'priors_count', 'score_text']
# 'age_cat', 'is_recid', 'vr_charge_desc',  'c_charge_desc',, 'is_violent_recid''vr_charge_degree', 'c_charge_degree', 'decile_score', 
df = df[features_to_keep]
df, categorical_features, categorical_labels, target_labels, mappings = transform_to_numeric(df, target)
print(categorical_features)
print(*[f"{key}: {value}" for key, value in mappings.items()], sep="\n")
print(target_labels)
df.head()


In [None]:
print(df.columns)
race = 2
age = 1
sex = 0

In [None]:
num_to_test = 20 # number of instances to test

df = df.sample(frac=1, random_state=42).sort_values(by=[target])
Xd, yd = df.drop([target],axis=1), df[target] 
no_of_classes = len(np.unique(yd))
no_of_features = Xd.shape[1]
no_of_instances = Xd.shape[0]

# select test instances from each class and split into train, cal and test

X, y = Xd.values, yd.values

In [None]:
kf = KFold(n_splits=10)

base = 'base'
attributes = [base, race, sex, age]
attribute_names = [base, 'race', 'sex', 'age']
va = {}
results = {}
results['proba'] = {}
results['low'] = {}
results['high'] = {}
for attr in attribute_names:
    va[attr] = None
    results['proba'][attr] = []
    results['low'][attr] = []
    results['high'][attr] = []

for train_index, test_index in kf.split(X):
    X_train_cal, X_test = X[train_index], X[test_index]
    y_train_cal, y_test = y[train_index], y[test_index]
    
    X_train, X_cal, y_train, y_cal = train_test_split(X_train_cal, y_train_cal, test_size=0.3, random_state=42)
    
    model = RandomForestClassifier()

    model.fit(X_train,y_train)
    age_cal_bin, age_cal_boundaries = binning(X_cal[:,age], bins=5)
    age_test_bin = binning(X_test[:,age], age_cal_boundaries)
    cal_bins = {base: None, 'race': X_cal[:,race], 'sex': X_cal[:,sex], 'age': age_cal_bin}
    test_bins = {base: None, 'race': X_test[:,race], 'sex': X_test[:,sex], 'age': age_test_bin}
    
    for attr in attribute_names:
        va[attr] = VennAbers(model.predict_proba(X_cal), y_cal, model, bins=cal_bins[attr])
        proba, low, high = va[attr].predict_proba(X_test, output_interval=True, bins=test_bins[attr])
        results['proba'][attr].append(proba[:,1])
        results['low'][attr].append(low)
        results['high'][attr].append(high)


In [None]:
for attr in attribute_names:
    results['proba'][attr] = np.concatenate(results['proba'][attr]) 
    results['low'][attr] = np.concatenate(results['low'][attr]) 
    results['high'][attr] = np.concatenate(results['high'][attr]) 


In [None]:
widths = {}
for attr in attribute_names:
    widths[attr] = [results['high'][attr][i] - results['low'][attr][i] for i in range(len(results['high'][attr]))]
    print(f'{attr} width: {np.mean(widths[attr]): 2g} ({np.std(widths[attr]): 2g} )')

# Adult

In [None]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
adult = fetch_ucirepo(id=2) 
  
# data (as pandas dataframes) 
X = adult.data.features 
y = adult.data.targets 
  
# metadata 
# print(adult.metadata) 
  
# # variable information 
# print(adult.variables) 


In [None]:
df = X
target = 'Income'
y = y.replace('<=50K.', '<=50K')
y = y.replace('>50K.', '>50K')
df[target] = y
df = df.dropna()
df, categorical_features, categorical_labels, target_labels, _ = transform_to_numeric(df, target)
print(target_labels)

In [None]:
num_to_test = 10 # number of instances to test, one from each class

df = df.sample(frac=1, random_state=42).sort_values(by=[target])
Xd, yd = df.drop(target,axis=1), df[target] 
no_of_classes = len(np.unique(yd))
no_of_features = Xd.shape[1]
no_of_instances = Xd.shape[0]

# select test instances from each class and split into train, cal and test
X, y = Xd.values, yd.values

In [None]:
print(df.columns)
edu = 3
race = 8
sex = 9

In [None]:
kf = KFold(n_splits=10)

base = 'base'
attributes = [base, race, sex, edu]
attribute_names = [base, 'race', 'sex', 'edu']
va = {}
results = {}
results['proba'] = {}
results['low'] = {}
results['high'] = {}
for attr in attribute_names:
    va[attr] = None
    results['proba'][attr] = []
    results['low'][attr] = []
    results['high'][attr] = []

for train_index, test_index in kf.split(X):
    X_train_cal, X_test = X[train_index], X[test_index]
    y_train_cal, y_test = y[train_index], y[test_index]
    
    X_train, X_cal, y_train, y_cal = train_test_split(X_train_cal, y_train_cal, test_size=0.3, random_state=42)
    
    model = RandomForestClassifier()

    model.fit(X_train,y_train)
    cal_bins = {base: None, 'race': X_cal[:,race], 'sex': X_cal[:,sex], 'edu': X_cal[:,edu]}
    test_bins = {base: None, 'race': X_test[:,race], 'sex': X_test[:,sex], 'edu': X_test[:,edu]}
    
    for attr in attribute_names:
        va[attr] = VennAbers(model.predict_proba(X_cal), y_cal, model, bins=cal_bins[attr])
        proba, low, high = va[attr].predict_proba(X_test, output_interval=True, bins=test_bins[attr])
        results['proba'][attr].append(proba[:,1])
        results['low'][attr].append(low)
        results['high'][attr].append(high)


In [None]:
for attr in attribute_names:
    results['proba'][attr] = np.concatenate(results['proba'][attr]) 
    results['low'][attr] = np.concatenate(results['low'][attr]) 
    results['high'][attr] = np.concatenate(results['high'][attr]) 


In [None]:
widths = {}
for attr in attribute_names:
    widths[attr] = [results['high'][attr][i] - results['low'][attr][i] for i in range(len(results['high'][attr]))]
    print(f'{attr} width: {np.mean(widths[attr]): 2g} ({np.std(widths[attr]): 2g} )')

# German


In [None]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
statlog_german_credit_data = fetch_ucirepo(id=144) 
  
# data (as pandas dataframes) 
X = statlog_german_credit_data.data.features 
y = statlog_german_credit_data.data.targets 
  
# # metadata 
# print(statlog_german_credit_data.metadata) 
  
# # variable information 
# print(statlog_german_credit_data.variables) 

In [None]:
feature_names = statlog_german_credit_data.variables.description[:-1]
feature_names[0] = 'Status'
feature_names[7] = 'Installment rate'
feature_names[15] = 'Existing credits'
feature_names[17] = 'Num people liable'
target_labels = {1: 'Good', 0: 'Bad'}

In [None]:
print(feature_names)
age = 12
sex = 8

In [None]:
df = X
target = 'Class'
df[target] = y
df = df.dropna()
df, categorical_features, categorical_labels, _, _ = transform_to_numeric(df, target)

In [72]:
num_to_test = 10 # number of instances to test, one from each class

df = df.sample(frac=1, random_state=42).sort_values(by=[target])
Xd, yd = df.drop(target,axis=1), df[target] 
no_of_classes = len(np.unique(yd))
no_of_features = Xd.shape[1]
no_of_instances = Xd.shape[0]

# select test instances from each class and split into train, cal and test
X, y = Xd.values, yd.values
y[y == 2] = 0

In [73]:
kf = KFold(n_splits=10)

base = 'base'
attributes = [base, sex, age]
attribute_names = [base, 'sex', 'age']
va = {}
results = {}
results['proba'] = {}
results['low'] = {}
results['high'] = {}
for attr in attribute_names:
    va[attr] = None
    results['proba'][attr] = []
    results['low'][attr] = []
    results['high'][attr] = []

for train_index, test_index in kf.split(X):
    X_train_cal, X_test = X[train_index], X[test_index]
    y_train_cal, y_test = y[train_index], y[test_index]
    
    X_train, X_cal, y_train, y_cal = train_test_split(X_train_cal, y_train_cal, test_size=0.3, random_state=42)
    
    model = RandomForestClassifier()

    model.fit(X_train,y_train)
    age_cal_bin, age_cal_boundaries = binning(X_cal[:,age], bins=5)
    age_test_bin = binning(X_test[:,age], age_cal_boundaries)
    cal_bins = {base: None, 'sex': X_cal[:,sex], 'age': age_cal_bin}
    test_bins = {base: None, 'sex': X_test[:,sex], 'age': age_test_bin}
    
    for attr in attribute_names:
        va[attr] = VennAbers(model.predict_proba(X_cal), y_cal, model, bins=cal_bins[attr])
        proba, low, high = va[attr].predict_proba(X_test, output_interval=True, bins=test_bins[attr])
        results['proba'][attr].append(proba[:,1])
        results['low'][attr].append(low)
        results['high'][attr].append(high)


  if np.sum(np.isnan(np.nanmin(grads))) == 0:
  if np.sum(np.isnan(np.nanmin(grads))) == 0:
  if np.sum(np.isnan(np.nanmin(grads))) == 0:


In [74]:
for attr in attribute_names:
    results['proba'][attr] = np.concatenate(results['proba'][attr]) 
    results['low'][attr] = np.concatenate(results['low'][attr]) 
    results['high'][attr] = np.concatenate(results['high'][attr]) 


In [75]:
widths = {}
for attr in attribute_names:
    widths[attr] = [results['high'][attr][i] - results['low'][attr][i] for i in range(len(results['high'][attr]))]
    print(f'{attr} width: {np.mean(widths[attr]): 2g} ({np.std(widths[attr]): 2g} )')

base width:  0.0494171 ( 0.0621556 )
sex width:  0.100241 ( 0.102047 )
age width:  0.128648 ( 0.117976 )


# Boston

In [76]:
df = pd.read_csv('../data/reg/HousingData.csv', na_values='NA')
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,,36.2


In [77]:
feature_names = df.columns[:-1]
print(feature_names)
crm = 0

Index(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
       'PTRATIO', 'B', 'LSTAT'],
      dtype='object')


In [78]:
target = 'MEDV'
df = df.dropna()
df, categorical_features, categorical_labels, target_labels, _ = transform_to_numeric(df, target)

In [80]:
num_to_test = 10 # number of instances to test, one from each class

Xd, yd = df.drop(target,axis=1), df[target]
X, y = df.drop(target,axis=1).values, df[target].values
no_of_classes = len(np.unique(yd))
no_of_features = Xd.shape[1]
no_of_instances = Xd.shape[0]

### Standard Regression
Default confidence of 90%

In [88]:
kf = KFold(n_splits=10)

base = 'base'
attributes = [base, crm]
attribute_names = [base, 'crm']
cps = {}
results = {}
results['median'] = {}
results['low'] = {}
results['high'] = {}
for attr in attribute_names:
    cps[attr] = None
    results['median'][attr] = []
    results['low'][attr] = []
    results['high'][attr] = []

for train_index, test_index in kf.split(X):
    X_train_cal, X_test = X[train_index], X[test_index]
    y_train_cal, y_test = y[train_index], y[test_index]
    
    X_train, X_cal, y_train, y_cal = train_test_split(X_train_cal, y_train_cal, test_size=0.3, random_state=42)
    
    model = RandomForestRegressor()

    model.fit(X_train,y_train)
    crm_cal_bin, crm_cal_boundaries = binning(X_cal[:,crm], bins=3)
    crm_test_bin = binning(X_test[:,crm], crm_cal_boundaries)
    cal_bins = {base: None, 'crm': crm_cal_bin}
    test_bins = {base: None, 'crm': crm_test_bin}
    residuals_cal = model.predict(X_cal) - y_cal
    y_test_hat = model.predict(X_test)
    
    for attr in attribute_names:
        cps[attr] = ConformalPredictiveSystem()
        cps[attr].fit(residuals_cal, bins=cal_bins[attr])
        values = cps[attr].predict(y_test_hat, bins=test_bins[attr], lower_percentiles=[5, 50], higher_percentiles=[95, 50])
        results['median'][attr].append(np.mean(values[:,[1, 3]], axis=1))
        results['low'][attr].append(values[:,0])
        results['high'][attr].append(values[:,2])


In [89]:
for attr in attribute_names:
    results['median'][attr] = np.concatenate(results['median'][attr])
    results['low'][attr] = np.concatenate(results['low'][attr])
    results['high'][attr] = np.concatenate(results['high'][attr])

In [90]:
widths = {}
for attr in attribute_names:
    widths[attr] = [results['high'][attr][i] - results['low'][attr][i] for i in range(len(results['high'][attr]))]
    print(f'{attr} width: {np.mean(widths[attr]): 2g} ({np.std(widths[attr]): 2g} )')

base width:  9.86742 ( 1.08091 )
crm width:  13.6272 ( 2.86241 )


## Normalized Explanations

In [91]:
kf = KFold(n_splits=10)

base = 'base'
attributes = [base, crm]
attribute_names = [base, 'crm']
cps = {}
results = {}
results['median'] = {}
results['low'] = {}
results['high'] = {}
for attr in attribute_names:
    cps[attr] = None
    results['median'][attr] = []
    results['low'][attr] = []
    results['high'][attr] = []

for train_index, test_index in kf.split(X):
    X_train_cal, X_test = X[train_index], X[test_index]
    y_train_cal, y_test = y[train_index], y[test_index]
    
    X_train, X_cal, y_train, y_cal = train_test_split(X_train_cal, y_train_cal, test_size=0.3, random_state=42)
    
    model = RandomForestRegressor()

    model.fit(X_train,y_train)
    crm_cal_bin, crm_cal_boundaries = binning(X_cal[:,crm], bins=3)
    crm_test_bin = binning(X_test[:,crm], crm_cal_boundaries)
    cal_bins = {base: None, 'crm': crm_cal_bin}
    test_bins = {base: None, 'crm': crm_test_bin}
    residuals_cal = model.predict(X_cal) - y_cal
    y_test_hat = model.predict(X_test)
    
    difficulty = DifficultyEstimator().fit(X=X_train, learner=model, scaler=True)
    sigmas_cal = difficulty.apply(X_cal)
    sigmas_test = difficulty.apply(X_test)
    
    for attr in attribute_names:
        cps[attr] = ConformalPredictiveSystem()
        cps[attr].fit(residuals_cal, sigmas=sigmas_cal, bins=cal_bins[attr])
        values = cps[attr].predict(y_test_hat, sigmas=sigmas_test, bins=test_bins[attr], lower_percentiles=[5, 50], higher_percentiles=[95, 50])
        results['median'][attr].append(np.mean(values[:,[1, 3]], axis=1))
        results['low'][attr].append(values[:,0])
        results['high'][attr].append(values[:,2])


In [92]:
for attr in attribute_names:
    results['median'][attr] = np.concatenate(results['median'][attr])
    results['low'][attr] = np.concatenate(results['low'][attr])
    results['high'][attr] = np.concatenate(results['high'][attr])

In [93]:
widths = {}
for attr in attribute_names:
    widths[attr] = [results['high'][attr][i] - results['low'][attr][i] for i in range(len(results['high'][attr]))]
    print(f'{attr} width: {np.mean(widths[attr]): 2g} ({np.std(widths[attr]): 2g} )')

base width:  17.4181 ( 19.6481 )
crm width:  27.1914 ( 29.7635 )
