In [1]:
from interpretml_tools import *

from interpret.glassbox import ExplainableBoostingClassifier, ExplainableBoostingRegressor, merge_ebms

import pandas as pd
import numpy as np  
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# Loading dataset
### (German)

In [2]:
# Load German Credit Dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.data"
columns = [
    'checking_status', 'duration', 'credit_history', 'purpose', 'credit_amount',
    'savings_account', 'employment', 'installment_rate', 'personal_status_sex',
    'other_debtors', 'present_residence', 'property', 'age', 'other_installment_plans',
    'housing', 'existing_credits', 'job', 'num_maintenance', 'telephone', 'foreign_worker', 'target'
]

df = pd.read_csv(url, sep=' ', names=columns, header=None)

# Preprocessing
# Create binary sex feature (Male=1, Female=0)
df['sex'] = df['personal_status_sex'].apply(lambda x: 'male' if x in ['A91', 'A93', 'A94'] else 'female')

# Convert target to binary (Good credit=1, Bad credit=0)
df['target'] = df['target'].replace({1: 1, 2: 0})

features = df.columns.tolist()
features.remove('target')

X = df[features]
y = df['target']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Training baseline models

In [3]:
male_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
male_model.fit(X_train[X_train['sex'] == 'male'], y_train[X_train['sex'] == 'male'])

female_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
female_model.fit(X_train[X_train['sex'] == 'female'], y_train[X_train['sex'] == 'female'])

normal_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
normal_model.fit(X_train, y_train)

print("done")

done


In [4]:
ff_model = CombinedEBM([male_model, female_model], [0.5, 0.5])

In [5]:
combined = merge_ebms([male_model, female_model])

# Displaying with custom EBMVisualizer

In [6]:
%matplotlib widget
plt.ioff()
visualizer = InterpretmlEBMVisualizer([male_model, female_model, normal_model, ff_model, combined], ["Male Model", "Female Model", "Normal Model", "50-50 Model", "Combined"])
visualizer.show()

HBox(children=(VBox(children=(Dropdown(description='Feature:', options=(('checking_status', 0), ('duration', 1…

# Group Performance Plots

In [3]:
male_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
male_model.fit(X_train[X_train['sex'] == 'male'], y_train[X_train['sex'] == 'male'])

female_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
female_model.fit(X_train[X_train['sex'] == 'female'], y_train[X_train['sex'] == 'female'])

female_model_eps = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
eps = 1e-10
female_model_eps.fit(X_train, y_train, sample_weight=X_train['sex'].map(lambda x: eps if x == 'male' else 1 - eps))

male_model_eps = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
male_model_eps.fit(X_train, y_train, sample_weight=X_train['sex'].map(lambda x: 1 - eps if x == 'male' else eps))

normal_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
normal_model.fit(X_train, y_train)

print("done")

done


In [4]:
foi = 'sex'
_x = X_train
_y = y_train

male_mask = _x[foi] == 'male'
female_mask = _x[foi] == 'female'

In [5]:
%matplotlib widget
plt.ioff()
analyzer = GenericGroupPerformanceAnalyzer(
    models_to_combine=[
        ("Male Model", male_model),
        ("Female Model", female_model),
        ("Normal Model", normal_model),
    ],
    baseline_models=[
    ],
    X_test=_x, y_test=_y,
    male_mask=male_mask, female_mask=female_mask,
    feature_of_interest='sex',
    metric='log_likelihood'
)
analyzer.generate_plot(n_combinations=100)

Processing Group 1/3:   0%|          | 0/100 [00:00<?, ?it/s]

Processing Group 1/3: 100%|██████████| 100/100 [00:01<00:00, 58.85it/s]
Processing Group 2/3: 100%|██████████| 100/100 [00:01<00:00, 71.94it/s]
Processing Group 3/3: 100%|██████████| 100/100 [00:00<00:00, 114.75it/s]

Plotting group: combination_group_0
x: [-0.3717883417190313, -0.46947013404517995, -0.3678328710181683, -0.3679864440356917, -0.31666355763211096, -0.43519853658237845, -0.37884064550624963, -0.3993504158535459, -0.38450833884283575, -0.37627820414358726, -0.30024957214426073, -0.3371499961444665, -0.33394674974645605, -0.4468027258337729, -0.48348552812579043, -0.3844119820544019, -0.37365652075636213, -0.3844411274777292, -0.35851090689439535, -0.36162008406344437, -0.384202727381546, -0.4135075398790273, -0.38358688512745964, -0.4051669661485202, -0.38032337844060726, -0.3382643423204916, -0.35982893229491775, -0.3281193168763148, -0.3491609234809819, -0.3747067730138439, -0.4280488218609826, -0.36211375383082905, -0.44540756084906946, -0.42132672737369614, -0.3933341933399665, -0.3385901275430678, -0.38057447408016976, -0.2959429472427394, -0.3243291146669097, -0.32727130101077795, -0.4178925993546543, -0.31512468412912714, -0.3563704291426934, -0.40543624429982894, -0.512692938348




HBox(children=(VBox(children=(HTML(value='<b>Model Details:</b>'), Output(), HTML(value='<b>Show/Hide Groups:<…

# Adding more trained models

In [6]:
import random

def generate_pairs(N, random_state=None):
    if random_state is not None:
        random.seed(random_state)
    pairs = [(random.uniform(0, 1), 0) for _ in range(N)]
    pairs = [(x, 1 - x) for x, _ in pairs]
    return pairs

In [5]:
from tqdm.notebook import tqdm
import pickle

additional_models = []

for (mw, fw) in tqdm(generate_pairs(100, 42), desc="Training models"):
    new_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
    # Create sample_weights based on sex
    sample_weights = X_train['sex'].map(lambda x: mw if x == 'male' else fw)

    # Fit the model with sample weights
    new_model.fit(X_train, y_train, sample_weight=sample_weights)


    # Add this model to our collection with the weights used
    additional_models.append((f"M: {mw:.2f}, F: {fw:.2f}", new_model))
    
    # Save the additional_models list to a pickle file
    with open("additional_models.pkl", "wb") as f:
        pickle.dump(additional_models, f)

NameError: name 'generate_pairs' is not defined

In [5]:
import pickle

with open("pickles/german/additional_models.pkl", "rb") as f:
    additional_models = pickle.load(f)

print(f"Loaded {len(additional_models)} models")

Loaded 50 models


In [6]:
%matplotlib widget
plt.ioff()
analyzer = GenericGroupPerformanceAnalyzer(
    models_to_combine=[
        ("Male Model", male_model_eps),
        ("Normal Model", normal_model),
        ("Female Model", female_model_eps),
    ],
    baseline_models=additional_models,
    X_test=_x, y_test=_y,
    male_mask=male_mask, female_mask=female_mask,
    feature_of_interest='sex',
    metric='log_likelihood'
)
analyzer.generate_plot(n_combinations=100)

Processing Group 1/3:   0%|          | 0/100 [00:00<?, ?it/s]

Processing Group 1/3: 100%|██████████| 100/100 [00:01<00:00, 74.66it/s]
Processing Group 2/3: 100%|██████████| 100/100 [00:00<00:00, 119.44it/s]
Processing Group 3/3: 100%|██████████| 100/100 [00:00<00:00, 119.15it/s]


Plotting group: combination_group_0
x: [-0.4130110843898731, -0.3397050069704964, -0.34953302320366014, -0.35479853417819834, -0.39464161320947466, -0.33910716229064175, -0.41824316155409935, -0.35712949704794855, -0.3572156401792701, -0.4117513063121821, -0.40873232384466157, -0.3773646094239637, -0.45606084018280996, -0.3457048069638752, -0.33143810586764527, -0.40505900157405605, -0.3584075566768362, -0.35689486241109325, -0.34210859535898736, -0.35415765695178114, -0.3176806547119956, -0.43552219976952644, -0.3373587627719486, -0.4709626300546987, -0.345508208819255, -0.3671556490254576, -0.36436619823378646, -0.41618792155768813, -0.33172748341665337, -0.31503806688460506, -0.3805353544936975, -0.33125842413287065, -0.39951357973608803, -0.369502346911483, -0.37502967227989514, -0.33954508698345703, -0.34928443278034116, -0.43565266813512415, -0.3154720755415833, -0.43967605850496977, -0.34773169065885967, -0.3663552741117681, -0.3335007129495658, -0.3364911211808214, -0.397369117

Output()

In [None]:
%matplotlib widget
plt.ioff()
analyzer = GenericGroupPerformanceAnalyzer(
    models_to_combine=[
        ("Male Model", male_model),
        ("Female Model", female_model),
        ("Normal Model", normal_model),
        *additional_models
    ],
    baseline_models=[],
    X_test=_x, y_test=_y,
    male_mask=male_mask, female_mask=female_mask,
    feature_of_interest='sex',
    metric='log_likelihood'
)
analyzer.generate_plot(n_combinations=100)

Evaluating All Models:   0%|          | 0/100 [00:00<?, ?it/s]

Evaluating All Models:   5%|▌         | 5/100 [00:05<01:38,  1.04s/it]


KeyboardInterrupt: 