In [1]:
from interpretml_tools import *

from interpret.glassbox import ExplainableBoostingClassifier, ExplainableBoostingRegressor, merge_ebms

import pandas as pd
import numpy as np  
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# Loading dataset
### (German)

In [2]:
# Load German Credit Dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.data"
columns = [
    'checking_status', 'duration', 'credit_history', 'purpose', 'credit_amount',
    'savings_account', 'employment', 'installment_rate', 'personal_status_sex',
    'other_debtors', 'present_residence', 'property', 'age', 'other_installment_plans',
    'housing', 'existing_credits', 'job', 'num_maintenance', 'telephone', 'foreign_worker', 'target'
]

df = pd.read_csv(url, sep=' ', names=columns, header=None)

# Preprocessing
# Create binary sex feature (Male=1, Female=0)
df['sex'] = df['personal_status_sex'].apply(lambda x: 'male' if x in ['A91', 'A93', 'A94'] else 'female')

# Convert target to binary (Good credit=1, Bad credit=0)
df['target'] = df['target'].replace({1: 1, 2: 0})

features = df.columns.tolist()
features.remove('target')

X = df[features]
y = df['target']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Training baseline models

In [3]:
male_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
male_model.fit(X_train[X_train['sex'] == 'male'], y_train[X_train['sex'] == 'male'])

female_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
female_model.fit(X_train[X_train['sex'] == 'female'], y_train[X_train['sex'] == 'female'])

normal_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
normal_model.fit(X_train, y_train)

print("done")

done


In [4]:
ff_model = CombinedEBM([male_model, female_model], [0.5, 0.5])

In [5]:
combined = merge_ebms([male_model, female_model])

# Displaying with custom EBMVisualizer

In [6]:
%matplotlib widget
plt.ioff()
visualizer = InterpretmlEBMVisualizer([male_model, female_model, normal_model, ff_model, combined], ["Male Model", "Female Model", "Normal Model", "50-50 Model", "Combined"])
visualizer.show()

HBox(children=(VBox(children=(Dropdown(description='Feature:', options=(('checking_status', 0), ('duration', 1…

# Group Performance Plots

In [3]:
male_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
male_model.fit(X_train[X_train['sex'] == 'male'], y_train[X_train['sex'] == 'male'])

female_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
female_model.fit(X_train[X_train['sex'] == 'female'], y_train[X_train['sex'] == 'female'])

female_model_eps = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
eps = 1e-10
female_model_eps.fit(X_train, y_train, sample_weight=X_train['sex'].map(lambda x: eps if x == 'male' else 1 - eps))

male_model_eps = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
male_model_eps.fit(X_train, y_train, sample_weight=X_train['sex'].map(lambda x: 1 - eps if x == 'male' else eps))

normal_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
normal_model.fit(X_train, y_train)

print("done")

done


In [4]:
foi = 'sex'
_x = X_train
_y = y_train

male_mask = _x[foi] == 'male'
female_mask = _x[foi] == 'female'

In [6]:
%matplotlib widget
plt.ioff()
analyzer = GenericGroupPerformanceAnalyzer(
    models_to_combine=[
        ("Male Model", male_model),
        ("Female Model", female_model),
        ("Normal Model", normal_model),
    ],
    baseline_models=[
    ],
    X_test=_x, y_test=_y,
    male_mask=male_mask, female_mask=female_mask,
    feature_of_interest='sex',
    metric='log_likelihood'
)
analyzer.generate_plot(n_combinations=100)

Processing Group 1/3:   0%|          | 0/100 [00:00<?, ?it/s]

Processing Group 1/3: 100%|██████████| 100/100 [00:01<00:00, 85.05it/s]
Processing Group 2/3: 100%|██████████| 100/100 [00:00<00:00, 124.82it/s]
Processing Group 3/3: 100%|██████████| 100/100 [00:00<00:00, 126.27it/s]


Plotting group: combination_group_0
x: [-0.2920827133114866, -0.33388088357618706, -0.40925240227658527, -0.3546026966954601, -0.33946982123197994, -0.31968126959586524, -0.3720399134669217, -0.4106260816623078, -0.3272828253953994, -0.45223949795842744, -0.31264940743899533, -0.40139154729967297, -0.3859319918035215, -0.3840233423204378, -0.37891087033742904, -0.30222018171896603, -0.40946109405433, -0.498680573577322, -0.349775200025059, -0.3770713590051749, -0.481230601733841, -0.47504857207522116, -0.4064614581837298, -0.40265856814553985, -0.3344813733560821, -0.2999140436176358, -0.4822237893572128, -0.33489826800698597, -0.38054764582850104, -0.3167699826594459, -0.3570715558081565, -0.3918977657961417, -0.40251501620786495, -0.3319696589778909, -0.3713623116620817, -0.4617271313249563, -0.31811886892907165, -0.33745974667205536, -0.3934761411921109, -0.3648087934514013, -0.35923942584401714, -0.31477466968542595, -0.3043073165586108, -0.4585985313296553, -0.29388186366971114, -

Output()

# Adding more trained models

In [7]:
import random

def generate_pairs(N, random_state=None):
    if random_state is not None:
        random.seed(random_state)
    pairs = [(random.uniform(0, 1), 0) for _ in range(N)]
    pairs = [(x, 1 - x) for x, _ in pairs]
    return pairs

In [5]:
from tqdm.notebook import tqdm
import pickle

additional_models = []

for (mw, fw) in tqdm(generate_pairs(100, 42), desc="Training models"):
    new_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
    # Create sample_weights based on sex
    sample_weights = X_train['sex'].map(lambda x: mw if x == 'male' else fw)

    # Fit the model with sample weights
    new_model.fit(X_train, y_train, sample_weight=sample_weights)


    # Add this model to our collection with the weights used
    additional_models.append((f"M: {mw:.2f}, F: {fw:.2f}", new_model))
    
    # Save the additional_models list to a pickle file
    with open("additional_models.pkl", "wb") as f:
        pickle.dump(additional_models, f)

NameError: name 'generate_pairs' is not defined

In [5]:
import pickle

with open("pickles/german/additional_models.pkl", "rb") as f:
    additional_models = pickle.load(f)

print(f"Loaded {len(additional_models)} models")

Loaded 50 models


In [None]:
%matplotlib widget
plt.ioff()
analyzer = GenericGroupPerformanceAnalyzer(
    models_to_combine=[
        ("Male Model", male_model_eps),
        ("Normal Model", normal_model),
        ("Female Model", female_model_eps),
        ("Additional Models", additional_models[0]),
    ],
    baseline_models=additional_models[1:],
    X_test=_x, y_test=_y,
    male_mask=male_mask, female_mask=female_mask,
    feature_of_interest='sex',
    metric='log_likelihood'
)
analyzer.generate_plot(n_combinations=100)

Processing Group 1/3: 100%|██████████| 100/100 [00:01<00:00, 86.61it/s]
Processing Group 2/3: 100%|██████████| 100/100 [00:00<00:00, 129.80it/s]
Processing Group 3/3: 100%|██████████| 100/100 [00:00<00:00, 130.00it/s]


Plotting group: combination_group_0
x: [-0.3778732869314107, -0.36703579309634854, -0.36407785648794216, -0.3702333685798498, -0.37772886525535876, -0.3450588913519, -0.4016853890107789, -0.3272401920986516, -0.3185372770679507, -0.5508334727176343, -0.29381712201402854, -0.39342835006062443, -0.40288042468491686, -0.3315187205079045, -0.29387430445798174, -0.3931069636110529, -0.36706902196740376, -0.40298394042080155, -0.33638110633604695, -0.3662827742306121, -0.36467066463759906, -0.37437479079659136, -0.41818383448048607, -0.36732248921742466, -0.35602455022241125, -0.36021134185378284, -0.39025335089090724, -0.41646169333139393, -0.3859867876779593, -0.3258619355375601, -0.37714352524071076, -0.36407785648794216, -0.35401086684732574, -0.4161786329467377, -0.34228535074868083, -0.37309955806659106, -0.375018750485248, -0.33223949306340206, -0.34640955579884763, -0.36161941786410545, -0.42888528318598484, -0.40115183586531317, -0.3206812897016676, -0.3909592354973482, -0.551784894

Output()

In [7]:
%matplotlib widget
plt.ioff()
analyzer = GenericGroupPerformanceAnalyzer(
    models_to_combine=[
        ("Male Model", male_model),
        ("Female Model", female_model),
        ("Normal Model", normal_model),
        *additional_models
    ],
    baseline_models=[],
    X_test=_x, y_test=_y,
    male_mask=male_mask, female_mask=female_mask,
    feature_of_interest='sex',
    metric='log_likelihood'
)
analyzer.generate_plot(n_combinations=100)

Processing Group 1/53:  83%|████████▎ | 83/100 [00:15<00:03,  5.29it/s]


KeyboardInterrupt: 