In [111]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [112]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import log_loss

from crepes import ConformalPredictiveSystem
from crepes.extras import binning, DifficultyEstimator

from calibrated_explanations import  VennAbers, __version__
from calibrated_explanations.utils import transform_to_numeric
from time import time

import warnings

# Ignore all warnings
warnings.filterwarnings('ignore')

print(f"calibrated_explanations {__version__}")



calibrated_explanations v0.3.1


In [113]:

datasets = {'COMPAS': 'classification', 'adult': 'classification', 'german': 'classification', 'boston': 'regression', }
results = {}
for name, type in datasets.items():
    results[name] = {}
    results[name]['type'] = type
    for category in ['mondrian', 'model_split']:
        results[name][category] = {'time': 0, 'base': 0, 'rand': 0, 'race': 0, 'sex': 0, 'age': 0, 'edu': 0, 'crm': 0}

num_rep = 10

# COMPAS
Data preprocessing from [this notebook](https://colab.research.google.com/github/pair-code/what-if-tool/blob/master/WIT_COMPAS_with_SHAP.ipynb#scrollTo=KF00pJvkeicT).

In [114]:
df = pd.read_csv('https://storage.googleapis.com/what-if-tool-resources/computefest2019/cox-violent-parsed_filt.csv')
print(df.shape)
# Preprocess the data

# Filter out entries with no indication of recidivism or no compass score
df = df[df['is_recid'] != -1]
df = df[df['decile_score'] != -1]


# Make the COMPASS label column numeric (0 and 1), for use in our model
df['score_text'] = np.where(df['score_text'] == 'Low', 'Low', 'Not Low')

target = 'score_text'

(18316, 40)


In [115]:
features_to_keep = ['sex', 'age', 'race', 'juv_fel_count', 'juv_misd_count', 'juv_other_count', 'priors_count', 'score_text']
# 'age_cat', 'is_recid', 'vr_charge_desc',  'c_charge_desc',, 'is_violent_recid''vr_charge_degree', 'c_charge_degree', 'decile_score', 
df = df[features_to_keep]
df, categorical_features, categorical_labels, target_labels, mappings = transform_to_numeric(df, target)


In [116]:
print(df.columns)
race = 2
age = 1
sex = 0

Index(['sex', 'age', 'race', 'juv_fel_count', 'juv_misd_count',
       'juv_other_count', 'priors_count', 'score_text'],
      dtype='object')


In [117]:
num_to_test = 20 # number of instances to test

df = df.sample(frac=1, random_state=42).sort_values(by=[target])
Xd, yd = df.drop([target],axis=1), df[target] 
no_of_classes = len(np.unique(yd))
no_of_features = Xd.shape[1]
no_of_instances = Xd.shape[0]

# select test instances from each class and split into train, cal and test

X, y = Xd.values, yd.values

In [118]:
kf = KFold(n_splits=10)

base = 'base'
rand = 'rand'
attributes = [base, rand, race, sex, age]
attribute_names = [base, rand, 'race', 'sex', 'age']
va = {}
mondrian = {}
mondrian['proba'] = {}
mondrian['low'] = {}
mondrian['high'] = {}
mondrian['width'] = {}
mondrian['y'] = []
time_mondrian = time()
for attr in attribute_names:
    va[attr] = None
    mondrian['proba'][attr] = []
    mondrian['low'][attr] = []
    mondrian['high'][attr] = []
    mondrian['width'][attr] = []

for l in range(num_rep):
    np.random.seed = l
    indeces = np.random.permutation(no_of_instances)
    X = X[indeces,:]
    y = y[indeces]
    for train_index, test_index in kf.split(X):
        X_train_cal, X_test = X[train_index], X[test_index]
        y_train_cal, y_test = y[train_index], y[test_index]
        
        X_train, X_cal, y_train, y_cal = train_test_split(X_train_cal, y_train_cal, test_size=0.3, random_state=42)
        
        model = RandomForestClassifier()

        model.fit(X_train,y_train)
        age_cal_bin, age_cal_boundaries = binning(X_cal[:,age], bins=5)
        age_test_bin = binning(X_test[:,age], age_cal_boundaries)
        rand_cal_bin, rand_cal_boundaries = binning(np.random.rand(len(y_cal)), bins=5)
        rand_test_bin = binning(np.random.rand(len(y_test)), rand_cal_boundaries)
        cal_bins = {base: None, rand: rand_cal_bin, 'race': X_cal[:,race], 'sex': X_cal[:,sex], 'age': age_cal_bin}
        test_bins = {base: None, rand: rand_test_bin, 'race': X_test[:,race], 'sex': X_test[:,sex], 'age': age_test_bin}
        
        mondrian['y'].append(y_test)
        
        for attr in attribute_names:
            va[attr] = VennAbers(model.predict_proba(X_cal), y_cal, model, bins=cal_bins[attr])
            proba, low, high = va[attr].predict_proba(X_test, output_interval=True, bins=test_bins[attr])
            mondrian['proba'][attr].append(proba[:,1])
            mondrian['low'][attr].append(low)
            mondrian['high'][attr].append(high)
            mondrian['width'][attr].append(high-low)

results['COMPAS']['mondrian']['time'] = time() - time_mondrian

In [119]:
kf = KFold(n_splits=10)

base = 'base'
rand = 'rand'
attributes = [base, rand, race, sex, age]
attribute_names = [base, rand, 'race', 'sex', 'age']
model_split = {}
model_split['proba'] = {}
model_split['low'] = {}
model_split['high'] = {}
model_split['width'] = {}
model_split['y'] = {}
for attr in attribute_names:
    model_split['proba'][attr] = []
    model_split['low'][attr] = []
    model_split['high'][attr] = []
    model_split['width'][attr] = []
    model_split['y'][attr] = []
    
time_split = time()
for l in range(num_rep):
    np.random.seed = l
    indeces = np.random.permutation(no_of_instances)
    X = X[indeces,:]
    y = y[indeces]
    for train_index, test_index in kf.split(X):
        X_train_cal, X_test = X[train_index], X[test_index]
        y_train_cal, y_test = y[train_index], y[test_index]
        
        X_train, X_cal, y_train, y_cal = train_test_split(X_train_cal, y_train_cal, test_size=0.3, random_state=42)
        
        age_cal_bin, age_cal_boundaries = binning(X_cal[:,age], bins=5)
        age_train_bin = binning(X_train[:,age], age_cal_boundaries)
        age_test_bin = binning(X_test[:,age], age_cal_boundaries)
        rand_cal_bin, rand_cal_boundaries = binning(np.random.rand(len(y_cal)), bins=5)
        rand_train_bin = binning(np.random.rand(len(y_train)), rand_cal_boundaries)
        rand_test_bin = binning(np.random.rand(len(y_test)), rand_cal_boundaries)
        train_bins = {base: np.ones(len(y_train)), rand: rand_train_bin, 'race': X_train[:,race], 'sex': X_train[:,sex], 'age': age_train_bin}
        cal_bins = {base: np.ones(len(y_cal)), rand: rand_cal_bin, 'race': X_cal[:,race], 'sex': X_cal[:,sex], 'age': age_cal_bin}
        test_bins = {base: np.ones(len(y_test)), rand: rand_test_bin, 'race': X_test[:,race], 'sex': X_test[:,sex], 'age': age_test_bin}
        
        
        for attr in attribute_names:
            for bin in np.unique(train_bins[attr]):
                X_train_bin = X_train[train_bins[attr] == bin]
                y_train_bin = y_train[train_bins[attr] == bin]
                X_cal_bin = X_cal[cal_bins[attr] == bin]
                y_cal_bin = y_cal[cal_bins[attr] == bin]
                X_test_bin = X_test[test_bins[attr] == bin]
                y_test_bin = y_test[test_bins[attr] == bin]
                
                if len(y_train_bin) == 0 or len(y_cal_bin) == 0 or len(y_test_bin) == 0:
                    continue
                
                model = RandomForestClassifier()
                
                model.fit(X_train_bin,y_train_bin)
                va = VennAbers(model.predict_proba(X_cal_bin), y_cal_bin, model)
                proba, low, high = va.predict_proba(X_test_bin, output_interval=True)
                model_split['proba'][attr].append(proba[:,1])
                model_split['low'][attr].append(low)
                model_split['high'][attr].append(high)
                model_split['width'][attr].append(high - low)
                model_split['y'][attr].append(y_test_bin)

results['COMPAS']['model_split']['time'] = time() - time_split

In [120]:
for attr in attribute_names:
    mondrian['proba'][attr] = np.concatenate(mondrian['proba'][attr]) 
    mondrian['low'][attr] = np.concatenate(mondrian['low'][attr]) 
    mondrian['high'][attr] = np.concatenate(mondrian['high'][attr]) 
    mondrian['width'][attr] = np.concatenate(mondrian['width'][attr]) 
    model_split['proba'][attr] = np.concatenate(model_split['proba'][attr]) 
    model_split['low'][attr] = np.concatenate(model_split['low'][attr]) 
    model_split['high'][attr] = np.concatenate(model_split['high'][attr]) 
    model_split['width'][attr] = np.concatenate(model_split['width'][attr]) 
    model_split['y'][attr] = np.concatenate(model_split['y'][attr]) 

mondrian['y'] = np.concatenate(mondrian['y']) 

In [121]:
for attr in attribute_names:
    results['COMPAS']['mondrian'][attr] = np.mean(mondrian['width'][attr])
    results['COMPAS']['model_split'][attr] = np.mean(model_split['width'][attr])
    print(f'{attr} width: \t{np.mean(mondrian["width"][attr]): 2g} \t{np.mean(model_split["width"][attr]): 2g}')

base width: 	 0.00736803 	 0.00746037
rand width: 	 0.0227246 	 0.0211287
race width: 	 0.0192262 	 0.0188397
sex width: 	 0.0111202 	 0.0115664
age width: 	 0.0215532 	 0.0210299


# Adult

In [122]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
adult = fetch_ucirepo(id=2) 
  
# data (as pandas dataframes) 
X = adult.data.features 
y = adult.data.targets 

In [123]:
df = X
target = 'Income'
y = y.replace('<=50K.', '<=50K')
y = y.replace('>50K.', '>50K')
df[target] = y
df = df.dropna()
df, categorical_features, categorical_labels, target_labels, _ = transform_to_numeric(df, target)
print(target_labels)

{0: '<=50K', 1: '>50K'}


In [124]:
num_to_test = 10 # number of instances to test, one from each class

df = df.sample(frac=1, random_state=42).sort_values(by=[target])
Xd, yd = df.drop(target,axis=1), df[target] 
no_of_classes = len(np.unique(yd))
no_of_features = Xd.shape[1]
no_of_instances = Xd.shape[0]

# select test instances from each class and split into train, cal and test
X, y = Xd.values, yd.values

In [125]:
print(df.columns)
edu = 3
race = 8
sex = 9

Index(['age', 'workclass', 'fnlwgt', 'education', 'education-num',
       'marital-status', 'occupation', 'relationship', 'race', 'sex',
       'capital-gain', 'capital-loss', 'hours-per-week', 'native-country',
       'Income'],
      dtype='object')


In [127]:
kf = KFold(n_splits=10)

base = 'base'
rand = 'rand'
attributes = [base, rand, race, sex, edu]
attribute_names = [base, rand, 'race', 'sex', 'edu']
va = {}
mondrian = {}
mondrian['proba'] = {}
mondrian['low'] = {}
mondrian['high'] = {}
mondrian['width'] = {}
mondrian['y'] = []
for attr in attribute_names:
    va[attr] = None
    mondrian['proba'][attr] = []
    mondrian['low'][attr] = []
    mondrian['high'][attr] = []
    mondrian['width'][attr] = []

time_mondrian = time()
for train_index, test_index in kf.split(X):
    X_train_cal, X_test = X[train_index], X[test_index]
    y_train_cal, y_test = y[train_index], y[test_index]
    
    X_train, X_cal, y_train, y_cal = train_test_split(X_train_cal, y_train_cal, test_size=0.3, random_state=42)
    
    model = RandomForestClassifier()
    model.fit(X_train,y_train)
    
    rand_cal_bin, rand_cal_boundaries = binning(np.random.rand(len(y_cal)), bins=5)
    rand_test_bin = binning(np.random.rand(len(y_test)), rand_cal_boundaries)
    cal_bins = {base: None, rand: rand_cal_bin, 'race': X_cal[:,race], 'sex': X_cal[:,sex], 'edu': X_cal[:,edu]}
    test_bins = {base: None, rand: rand_test_bin, 'race': X_test[:,race], 'sex': X_test[:,sex], 'edu': X_test[:,edu]}
    
    mondrian['y'].append(y_test)
    
    for attr in attribute_names:
        va[attr] = VennAbers(model.predict_proba(X_cal), y_cal, model, bins=cal_bins[attr])
        proba, low, high = va[attr].predict_proba(X_test, output_interval=True, bins=test_bins[attr])
        mondrian['proba'][attr].append(proba[:,1])
        mondrian['low'][attr].append(low)
        mondrian['high'][attr].append(high)
        mondrian['width'][attr].append(high-low)

results['adult']['mondrian']['time'] = time() - time_mondrian

In [128]:
kf = KFold(n_splits=10)

base = 'base'
rand = 'rand'
attributes = [base, rand, race, sex, edu]
attribute_names = [base, rand, 'race', 'sex', 'edu']
model_split = {}
model_split['proba'] = {}
model_split['low'] = {}
model_split['high'] = {}
model_split['width'] = {}
model_split['y'] = {}
for attr in attribute_names:
    model_split['proba'][attr] = []
    model_split['low'][attr] = []
    model_split['high'][attr] = []
    model_split['width'][attr] = []
    model_split['y'][attr] = []

time_split = time()
for train_index, test_index in kf.split(X):
    X_train_cal, X_test = X[train_index], X[test_index]
    y_train_cal, y_test = y[train_index], y[test_index]
    
    X_train, X_cal, y_train, y_cal = train_test_split(X_train_cal, y_train_cal, test_size=0.3, random_state=42)
    

    rand_cal_bin, rand_cal_boundaries = binning(np.random.rand(len(y_cal)), bins=5)
    rand_train_bin = binning(np.random.rand(len(y_train)), rand_cal_boundaries)
    rand_test_bin = binning(np.random.rand(len(y_test)), rand_cal_boundaries)
    train_bins = {base: np.ones(len(y_train)), rand: rand_train_bin, 'race': X_train[:,race], 'sex': X_train[:,sex], 'edu': X_train[:,edu]}
    cal_bins = {base: np.ones(len(y_cal)), rand: rand_cal_bin, 'race': X_cal[:,race], 'sex': X_cal[:,sex], 'edu': X_cal[:,edu]}
    test_bins = {base: np.ones(len(y_test)), rand: rand_test_bin, 'race': X_test[:,race], 'sex': X_test[:,sex], 'edu': X_test[:,edu]}
    
    try:
        for attr in attribute_names:
            for bin in np.unique(train_bins[attr]):
                X_train_bin = X_train[train_bins[attr] == bin]
                y_train_bin = y_train[train_bins[attr] == bin]
                X_cal_bin = X_cal[cal_bins[attr] == bin]
                y_cal_bin = y_cal[cal_bins[attr] == bin]
                X_test_bin = X_test[test_bins[attr] == bin]
                y_test_bin = y_test[test_bins[attr] == bin]
            
                if len(y_train_bin) == 0 or len(y_cal_bin) == 0 or len(y_test_bin) == 0:
                    continue
                
                model = RandomForestClassifier()
                model.fit(X_train_bin,y_train_bin)
                va = VennAbers(model.predict_proba(X_cal_bin), y_cal_bin, model)
                proba, low, high = va.predict_proba(X_test_bin, output_interval=True)
                model_split['proba'][attr].append(proba[:,1])
                model_split['low'][attr].append(low)
                model_split['high'][attr].append(high)
                model_split['width'][attr].append(high - low)
                model_split['y'][attr].append(y_test_bin)
    except Exception as e:
        print(e)
        print(f'attr: {attr} bin: {bin}')    

results['adult']['model_split']['time'] = time() - time_split

index 1 is out of bounds for axis 1 with size 1
attr: edu bin: 14
index 1 is out of bounds for axis 1 with size 1
attr: edu bin: 14
index 1 is out of bounds for axis 1 with size 1
attr: edu bin: 14
index 1 is out of bounds for axis 1 with size 1
attr: edu bin: 14
index 1 is out of bounds for axis 1 with size 1
attr: edu bin: 14
index 1 is out of bounds for axis 1 with size 1
attr: edu bin: 14
index 1 is out of bounds for axis 1 with size 1
attr: edu bin: 14
index 1 is out of bounds for axis 1 with size 1
attr: edu bin: 14
index 1 is out of bounds for axis 1 with size 1
attr: edu bin: 14


In [129]:
for attr in attribute_names:
    mondrian['proba'][attr] = np.concatenate(mondrian['proba'][attr]) 
    mondrian['low'][attr] = np.concatenate(mondrian['low'][attr]) 
    mondrian['high'][attr] = np.concatenate(mondrian['high'][attr]) 
    mondrian['width'][attr] = np.concatenate(mondrian['width'][attr]) 
    model_split['proba'][attr] = np.concatenate(model_split['proba'][attr]) 
    model_split['low'][attr] = np.concatenate(model_split['low'][attr]) 
    model_split['high'][attr] = np.concatenate(model_split['high'][attr]) 
    model_split['width'][attr] = np.concatenate(model_split['width'][attr]) 
    model_split['y'][attr] = np.concatenate(model_split['y'][attr]) 

mondrian['y'] = np.concatenate(mondrian['y']) 

In [130]:
for attr in attribute_names:
    results['adult']['mondrian'][attr] = np.mean(mondrian['width'][attr])
    results['adult']['model_split'][attr] = np.mean(model_split['width'][attr])
    print(f'{attr} width: \t{np.mean(mondrian["width"][attr]): 2g} \t{np.mean(model_split["width"][attr]): 2g}')

base width: 	 0.00326076 	 0.00354104
rand width: 	 0.0109691 	 0.0112308
race width: 	 0.00751947 	 0.00761529
sex width: 	 0.00522884 	 0.00529675
edu width: 	 0.0178852 	 0.0170594


# German


In [131]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
statlog_german_credit_data = fetch_ucirepo(id=144) 
  
# data (as pandas dataframes) 
X = statlog_german_credit_data.data.features 
y = statlog_german_credit_data.data.targets 

In [132]:
feature_names = statlog_german_credit_data.variables.description[:-1]
feature_names[0] = 'Status'
feature_names[7] = 'Installment rate'
feature_names[15] = 'Existing credits'
feature_names[17] = 'Num people liable'
target_labels = {1: 'Good', 0: 'Bad'}

In [133]:
print(feature_names)
age = 12
sex = 8

0                         Status
1                       Duration
2                 Credit history
3                        Purpose
4                  Credit amount
5          Savings account/bonds
6       Present employment since
7               Installment rate
8        Personal status and sex
9     Other debtors / guarantors
10       Present residence since
11                      Property
12                           Age
13       Other installment plans
14                       Housing
15              Existing credits
16                           Job
17             Num people liable
18                     Telephone
19                foreign worker
Name: description, dtype: object


In [134]:
df = X
target = 'Class'
df[target] = y
df = df.dropna()
df, categorical_features, categorical_labels, _, _ = transform_to_numeric(df, target)

In [135]:
num_to_test = 10 # number of instances to test, one from each class

df = df.sample(frac=1, random_state=42).sort_values(by=[target])
Xd, yd = df.drop(target,axis=1), df[target] 
no_of_classes = len(np.unique(yd))
no_of_features = Xd.shape[1]
no_of_instances = Xd.shape[0]

# select test instances from each class and split into train, cal and test
X, y = Xd.values, yd.values
y[y == 2] = 0

In [136]:
kf = KFold(n_splits=10)

base = 'base'
rand = 'rand'
attributes = [base, rand, sex, age]
attribute_names = [base, rand, 'sex', 'age']
va = {}
mondrian = {}
mondrian['proba'] = {}
mondrian['low'] = {}
mondrian['high'] = {}
mondrian['width'] = {}
mondrian['y'] = []
for attr in attribute_names:
    va[attr] = None
    mondrian['proba'][attr] = []
    mondrian['low'][attr] = []
    mondrian['high'][attr] = []
    mondrian['width'][attr] = []

time_mondrian = time()
for train_index, test_index in kf.split(X):
    X_train_cal, X_test = X[train_index], X[test_index]
    y_train_cal, y_test = y[train_index], y[test_index]
    
    X_train, X_cal, y_train, y_cal = train_test_split(X_train_cal, y_train_cal, test_size=0.3, random_state=42)
    
    model = RandomForestClassifier()
    model.fit(X_train,y_train)
    
    age_cal_bin, age_cal_boundaries = binning(X_cal[:,age], bins=5)
    age_test_bin = binning(X_test[:,age], age_cal_boundaries)
    rand_cal_bin, rand_cal_boundaries = binning(np.random.rand(len(y_cal)), bins=5)
    rand_test_bin = binning(np.random.rand(len(y_test)), rand_cal_boundaries)
    cal_bins = {base: None, rand: rand_cal_bin, 'sex': X_cal[:,sex], 'age': age_cal_bin}
    test_bins = {base: None, rand: rand_test_bin, 'sex': X_test[:,sex], 'age': age_test_bin}
    
    mondrian['y'].append(y_test)
    
    for attr in attribute_names:
        va[attr] = VennAbers(model.predict_proba(X_cal), y_cal, model, bins=cal_bins[attr])
        proba, low, high = va[attr].predict_proba(X_test, output_interval=True, bins=test_bins[attr])
        mondrian['proba'][attr].append(proba[:,1])
        mondrian['low'][attr].append(low)
        mondrian['high'][attr].append(high)
        mondrian['width'][attr].append(high-low)

results['german']['mondrian']['time'] = time() - time_mondrian

In [137]:
kf = KFold(n_splits=10)

base = 'base'
rand = 'rand'
attributes = [base, rand, sex, age]
attribute_names = [base, rand, 'sex', 'age']
model_split = {}
model_split['proba'] = {}
model_split['low'] = {}
model_split['high'] = {}
model_split['width'] = {}
model_split['y'] = {}
for attr in attribute_names:
    model_split['proba'][attr] = []
    model_split['low'][attr] = []
    model_split['high'][attr] = []
    model_split['width'][attr] = []
    model_split['y'][attr] = []

time_split = time()
for train_index, test_index in kf.split(X):
    X_train_cal, X_test = X[train_index], X[test_index]
    y_train_cal, y_test = y[train_index], y[test_index]
    
    X_train, X_cal, y_train, y_cal = train_test_split(X_train_cal, y_train_cal, test_size=0.3, random_state=42)
    
    age_cal_bin, age_cal_boundaries = binning(X_cal[:,age], bins=5)
    age_train_bin = binning(X_train[:,age], age_cal_boundaries)
    age_test_bin = binning(X_test[:,age], age_cal_boundaries)
    rand_cal_bin, rand_cal_boundaries = binning(np.random.rand(len(y_cal)), bins=5)
    rand_train_bin = binning(np.random.rand(len(y_train)), rand_cal_boundaries)
    rand_test_bin = binning(np.random.rand(len(y_test)), rand_cal_boundaries)
    train_bins = {base: np.ones(len(y_train)), rand: rand_train_bin, 'sex': X_train[:,sex], 'age': age_train_bin}
    cal_bins = {base: np.ones(len(y_cal)), rand: rand_cal_bin, 'sex': X_cal[:,sex], 'age': age_cal_bin}
    test_bins = {base: np.ones(len(y_test)), rand: rand_test_bin, 'sex': X_test[:,sex], 'age': age_test_bin}
    
    
    for attr in attribute_names:
        for bin in np.unique(train_bins[attr]):
            X_train_bin = X_train[train_bins[attr] == bin]
            y_train_bin = y_train[train_bins[attr] == bin]
            X_cal_bin = X_cal[cal_bins[attr] == bin]
            y_cal_bin = y_cal[cal_bins[attr] == bin]
            X_test_bin = X_test[test_bins[attr] == bin]
            y_test_bin = y_test[test_bins[attr] == bin]
            
            model = RandomForestClassifier()
            model.fit(X_train_bin,y_train_bin)
            va = VennAbers(model.predict_proba(X_cal_bin), y_cal_bin, model)
            proba, low, high = va.predict_proba(X_test_bin, output_interval=True)
            model_split['proba'][attr].append(proba[:,1])
            model_split['low'][attr].append(low)
            model_split['high'][attr].append(high)
            model_split['width'][attr].append(high - low)
            model_split['y'][attr].append(y_test_bin)

results['german']['model_split']['time'] = time() - time_split

In [138]:
for attr in attribute_names:
    mondrian['proba'][attr] = np.concatenate(mondrian['proba'][attr]) 
    mondrian['low'][attr] = np.concatenate(mondrian['low'][attr]) 
    mondrian['high'][attr] = np.concatenate(mondrian['high'][attr]) 
    mondrian['width'][attr] = np.concatenate(mondrian['width'][attr]) 
    model_split['proba'][attr] = np.concatenate(model_split['proba'][attr]) 
    model_split['low'][attr] = np.concatenate(model_split['low'][attr]) 
    model_split['high'][attr] = np.concatenate(model_split['high'][attr]) 
    model_split['width'][attr] = np.concatenate(model_split['width'][attr]) 
    model_split['y'][attr] = np.concatenate(model_split['y'][attr]) 

mondrian['y'] = np.concatenate(mondrian['y']) 

In [139]:
for attr in attribute_names:
    results['german']['mondrian'][attr] = np.mean(mondrian['width'][attr])
    results['german']['model_split'][attr] = np.mean(model_split['width'][attr])
    print(f'{attr} width: \t{np.mean(mondrian["width"][attr]): 2g} \t{np.mean(model_split["width"][attr]): 2g}')

base width: 	 0.0449486 	 0.0469801
rand width: 	 0.134121 	 0.119509
sex width: 	 0.099784 	 0.101003
age width: 	 0.131775 	 0.125053


# Boston

In [140]:
df = pd.read_csv('../data/reg/HousingData.csv', na_values='NA')
# df.head()

In [141]:
feature_names = df.columns[:-1]
print(feature_names)
crm = 0

Index(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
       'PTRATIO', 'B', 'LSTAT'],
      dtype='object')


In [142]:
target = 'MEDV'
df = df.dropna()
df, categorical_features, categorical_labels, target_labels, _ = transform_to_numeric(df, target)

In [143]:
num_to_test = 10 # number of instances to test, one from each class

Xd, yd = df.drop(target,axis=1), df[target]
X, y = df.drop(target,axis=1).values, df[target].values
no_of_classes = len(np.unique(yd))
no_of_features = Xd.shape[1]
no_of_instances = Xd.shape[0]

### Standard Regression
Default confidence of 90%

In [144]:
kf = KFold(n_splits=10)

base = 'base'
rand = 'rand'
attributes = [base, rand, crm]
attribute_names = [base, rand, 'crm']
cps = {}
mondrian = {}
mondrian['median'] = {}
mondrian['low'] = {}
mondrian['high'] = {}
mondrian['width'] = {}
mondrian['y'] = []
for attr in attribute_names:
    cps[attr] = None
    mondrian['median'][attr] = []
    mondrian['low'][attr] = []
    mondrian['high'][attr] = []
    mondrian['width'][attr] = []

time_mondrian = time()
for train_index, test_index in kf.split(X):
    X_train_cal, X_test = X[train_index], X[test_index]
    y_train_cal, y_test = y[train_index], y[test_index]
    
    X_train, X_cal, y_train, y_cal = train_test_split(X_train_cal, y_train_cal, test_size=0.3, random_state=42)
    
    model = RandomForestRegressor()

    model.fit(X_train,y_train)
    crm_cal_bin, crm_cal_boundaries = binning(X_cal[:,crm], bins=3)
    crm_test_bin = binning(X_test[:,crm], crm_cal_boundaries)    
    rand_cal_bin, rand_cal_boundaries = binning(np.random.rand(len(y_cal)), bins=5)
    rand_test_bin = binning(np.random.rand(len(y_test)), rand_cal_boundaries)
    cal_bins = {base: None, rand: rand_cal_bin, 'crm': crm_cal_bin}
    test_bins = {base: None, rand: rand_test_bin, 'crm': crm_test_bin}
    residuals_cal = model.predict(X_cal) - y_cal
    y_test_hat = model.predict(X_test)
    mondrian['y'].append(y_test)
    
    for attr in attribute_names:
        cps[attr] = ConformalPredictiveSystem()
        cps[attr].fit(residuals_cal, bins=cal_bins[attr])
        values = cps[attr].predict(y_test_hat, bins=test_bins[attr], lower_percentiles=[5, 50], higher_percentiles=[95, 50])
        mondrian['median'][attr].append(np.mean(values[:,[1, 3]], axis=1))
        mondrian['low'][attr].append(values[:,0])
        mondrian['high'][attr].append(values[:,2])
        mondrian['width'][attr].append(values[:,2]-values[:,0])

results['boston']['mondrian']['time'] = time() - time_mondrian

In [145]:
kf = KFold(n_splits=10)

base = 'base'
rand = 'rand'
attributes = [base, rand, crm]
attribute_names = [base, rand, 'crm']
cps = {}
model_split = {}
model_split['median'] = {}
model_split['low'] = {}
model_split['high'] = {}
model_split['width'] = {}
model_split['y'] = {}
for attr in attribute_names:
    cps[attr] = None
    model_split['median'][attr] = []
    model_split['low'][attr] = []
    model_split['high'][attr] = []
    model_split['width'][attr] = []
    model_split['y'][attr] = []

time_split = time()
for train_index, test_index in kf.split(X):
    X_train_cal, X_test = X[train_index], X[test_index]
    y_train_cal, y_test = y[train_index], y[test_index]
    
    X_train, X_cal, y_train, y_cal = train_test_split(X_train_cal, y_train_cal, test_size=0.3, random_state=42)
    
    
    crm_cal_bin, crm_cal_boundaries = binning(X_cal[:,crm], bins=3)
    crm_train_bin = binning(X_train[:,crm], crm_cal_boundaries)
    crm_test_bin = binning(X_test[:,crm], crm_cal_boundaries)    
    rand_cal_bin, rand_cal_boundaries = binning(np.random.rand(len(y_cal)), bins=5)
    rand_train_bin = binning(np.random.rand(len(y_train)), rand_cal_boundaries)
    rand_test_bin = binning(np.random.rand(len(y_test)), rand_cal_boundaries)
    train_bins = {base: np.ones(len(y_train)), rand: rand_train_bin, 'crm': crm_train_bin}
    cal_bins = {base: np.ones(len(y_cal)), rand: rand_cal_bin, 'crm': crm_cal_bin}
    test_bins = {base: np.ones(len(y_test)), rand: rand_test_bin, 'crm': crm_test_bin}
    
    
    for attr in attribute_names:
        for bin in np.unique(train_bins[attr]):
            X_train_bin = X_train[train_bins[attr] == bin,:]
            y_train_bin = y_train[train_bins[attr] == bin]
            X_cal_bin = X_cal[cal_bins[attr] == bin,:]
            y_cal_bin = y_cal[cal_bins[attr] == bin]
            X_test_bin = X_test[test_bins[attr] == bin,:]
            y_test_bin = y_test[test_bins[attr] == bin]
            
            if len(y_train_bin) == 0 or len(y_cal_bin) == 0 or len(y_test_bin) == 0:
                continue
            
            model = RandomForestRegressor()

            model.fit(X_train_bin, y_train_bin)
            
            residuals_cal = model.predict(X_cal_bin) - y_cal_bin
            y_test_bin_hat = model.predict(X_test_bin)
        
            cps[attr] = ConformalPredictiveSystem()
            cps[attr].fit(residuals_cal)
            values = cps[attr].predict(y_test_bin_hat, lower_percentiles=[5, 50], higher_percentiles=[95, 50])
            model_split['median'][attr].append(np.mean(values[:,[1, 3]], axis=1))
            model_split['low'][attr].append(values[:,0])
            model_split['high'][attr].append(values[:,2])
            model_split['width'][attr].append(values[:,2]-values[:,0])
            model_split['y'][attr].append(y_test_bin)

results['boston']['model_split']['time'] = time() - time_split

In [146]:
for attr in attribute_names:
    mondrian['median'][attr] = np.concatenate(mondrian['median'][attr]) 
    mondrian['low'][attr] = np.concatenate(mondrian['low'][attr]) 
    mondrian['high'][attr] = np.concatenate(mondrian['high'][attr]) 
    mondrian['width'][attr] = np.concatenate(mondrian['width'][attr]) 
    model_split['median'][attr] = np.concatenate(model_split['median'][attr]) 
    model_split['low'][attr] = np.concatenate(model_split['low'][attr]) 
    model_split['high'][attr] = np.concatenate(model_split['high'][attr]) 
    model_split['width'][attr] = np.concatenate(model_split['width'][attr]) 
    model_split['y'][attr] = np.concatenate(model_split['y'][attr]) 

mondrian['y'] = np.concatenate(mondrian['y']) 

In [147]:
for attr in attribute_names:
    results['boston']['mondrian'][attr] = np.mean(mondrian['width'][attr])
    results['boston']['model_split'][attr] = np.mean(model_split['width'][attr])
    print(f'{attr} width: \t{np.mean(mondrian["width"][attr]): 2g} \t{np.mean(model_split["width"][attr]): 2g} ')

base width: 	 9.90189 	 9.83211 
rand width: 	 12.7567 	 17.8389 
crm width: 	 13.8097 	 15.034 


## Summary
### Time

In [151]:
categories = ['model_split', 'mondrian', ]
print('Dataset\t Model Split\t Mondrian\tType')
for name, type in datasets.items():
    print(f'{name}\t', end='')
    for category in categories:
        print(f'{results[name][category]["time"]: 0.2f}\t\t', end='')
    print(f'{type}')

Dataset	 Model Split	 Mondrian	Type
COMPAS	 70.26		 12.93		classification
adult	 333.01		 72.51		classification
german	 29.83		 3.97		classification
boston	 22.18		 6.18		regression


### Width
#### Mondrian vs model split

In [152]:
print('', end='\t')
for attr in results[name]['mondrian'].keys():
    if attr == 'time':
        continue
    for category in categories:
        print(f' {category}', end='\t')
print('\nDataset', end='\t')
for attr in results[name]['mondrian'].keys():
    if attr == 'time':
        continue
    for category in categories:
        print(f' {attr}', end='\t\t')
print('Type')
for name, type in datasets.items():
    print(f'{name}\t', end='')
    for attr in results[name]['mondrian'].keys():
        if attr == 'time':
            continue
        for category in categories:
            val = results[name][category][attr]
            print(f'{val: 0.3f}', end='\t\t') if val > 0 else print(' -', end='\t\t')
    print(f'{type}')

	 model_split	 mondrian	 model_split	 mondrian	 model_split	 mondrian	 model_split	 mondrian	 model_split	 mondrian	 model_split	 mondrian	 model_split	 mondrian	
Dataset	 base		 base		 rand		 rand		 race		 race		 sex		 sex		 age		 age		 edu		 edu		 crm		 crm		Type
COMPAS	 0.007		 0.007		 0.021		 0.023		 0.019		 0.019		 0.012		 0.011		 0.021		 0.022		 -		 -		 -		 -		classification
adult	 0.004		 0.003		 0.011		 0.011		 0.008		 0.008		 0.005		 0.005		 -		 -		 0.017		 0.018		 -		 -		classification
german	 0.047		 0.045		 0.120		 0.134		 -		 -		 0.101		 0.100		 0.125		 0.132		 -		 -		 -		 -		classification
boston	 9.832		 9.902		 17.839		 12.757		 -		 -		 -		 -		 -		 -		 -		 -		 15.034		 13.810		regression


#### Only Mondrian

In [153]:
print('', end='\t')
for attr in results[name]['mondrian'].keys():
    if attr == 'time':
        continue
    for category in ['mondrian']:
        print(f' {category}', end='\t')
print('\nDataset', end='\t')
for attr in results[name]['mondrian'].keys():
    if attr == 'time':
        continue
    for category in ['mondrian']:
        print(f' {attr}', end='\t\t')
print('Type')
for name, type in datasets.items():
    print(f'{name}\t', end='')
    for attr in results[name]['mondrian'].keys():
        if attr == 'time':
            continue
        for category in ['mondrian']:
            val = results[name][category][attr]
            print(f'{val: 0.3f}', end='\t\t') if val > 0 else print(' -', end='\t\t')
    print(f'{type}')

	 mondrian	 mondrian	 mondrian	 mondrian	 mondrian	 mondrian	 mondrian	
Dataset	 base		 rand		 race		 sex		 age		 edu		 crm		Type
COMPAS	 0.007		 0.023		 0.019		 0.011		 0.022		 -		 -		classification
adult	 0.003		 0.011		 0.008		 0.005		 -		 0.018		 -		classification
german	 0.045		 0.134		 -		 0.100		 0.132		 -		 -		classification
boston	 9.902		 12.757		 -		 -		 -		 -		 13.810		regression
