In [None]:
from interpretml_tools import *

from interpret.glassbox import ExplainableBoostingClassifier, ExplainableBoostingRegressor, merge_ebms

import pandas as pd
import numpy as np  
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# Loading dataset
### (German)

In [2]:
# Load German Credit Dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.data"
columns = [
    'checking_status', 'duration', 'credit_history', 'purpose', 'credit_amount',
    'savings_account', 'employment', 'installment_rate', 'personal_status_sex',
    'other_debtors', 'present_residence', 'property', 'age', 'other_installment_plans',
    'housing', 'existing_credits', 'job', 'num_maintenance', 'telephone', 'foreign_worker', 'target'
]

df = pd.read_csv(url, sep=' ', names=columns, header=None)

# Preprocessing
# Create binary sex feature (Male=1, Female=0)
df['sex'] = df['personal_status_sex'].apply(lambda x: 'male' if x in ['A91', 'A93', 'A94'] else 'female')

# Convert target to binary (Good credit=1, Bad credit=0)
df['target'] = df['target'].replace({1: 1, 2: 0})

features = df.columns.tolist()
features.remove('target')

X = df[features]
y = df['target']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Training baseline models

In [4]:
male_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
male_model.fit(X_train[X_train['sex'] == 'male'], y_train[X_train['sex'] == 'male'])

female_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
female_model.fit(X_train[X_train['sex'] == 'female'], y_train[X_train['sex'] == 'female'])

normal_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
normal_model.fit(X_train, y_train)

print("done")

done


In [5]:
ff_model = CombinedEBM([male_model, female_model], [0.5, 0.5]).get_model_object()

In [6]:
combined = merge_ebms([male_model, female_model])

# Displaying with custom EBMVisualizer

In [7]:
%matplotlib widget
plt.ioff()
visualizer = InterpretmlEBMVisualizer([male_model, female_model, normal_model, ff_model, combined], ["Male Model", "Female Model", "Normal Model", "50-50 Model", "Combined"])
visualizer.show()

HBox(children=(VBox(children=(Dropdown(description='Feature:', options=(('checking_status', 0), ('duration', 1…

# Group Performance Plots

In [3]:
male_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
male_model.fit(X_train[X_train['sex'] == 'male'], y_train[X_train['sex'] == 'male'])

female_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
female_model.fit(X_train[X_train['sex'] == 'female'], y_train[X_train['sex'] == 'female'])

female_model_eps = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
eps = 1e-10
female_model_eps.fit(X_train, y_train, sample_weight=X_train['sex'].map(lambda x: eps if x == 'male' else 1 - eps))

male_model_eps = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
male_model_eps.fit(X_train, y_train, sample_weight=X_train['sex'].map(lambda x: 1 - eps if x == 'male' else eps))

normal_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
normal_model.fit(X_train, y_train)

print("done")

done


In [4]:
foi = 'sex'
_x = X_train
_y = y_train

male_mask = _x[foi] == 'male'
female_mask = _x[foi] == 'female'

In [5]:
%matplotlib widget
plt.ioff()  # Avoids duplicate plots
analyzer = GroupPerformanceAnalyzer(
    male_model, female_model, normal_model,
    _x, _y,
    male_mask=male_mask, female_mask=female_mask,
    feature_of_interest='sex',
    combine_strategy='post',
    metric='log_likelihood',
)
analyzer.generate_plot(n_combinations=100)

Evaluating combinations:   0%|          | 0/100 [00:00<?, ?it/s]

Evaluating combinations: 100%|██████████| 100/100 [00:01<00:00, 54.61it/s]


AttributeError: 'GroupPerformanceAnalyzer' object has no attribute 'scatter_plots'

In [6]:
%matplotlib widget
plt.ioff()
analyzer = GenericGroupPerformanceAnalyzer(
    models_to_combine=[
        ("Male Model", male_model),
        ("Female Model", female_model),
        ("Normal Model", normal_model),
    ],
    baseline_models=[
    ],
    X_test=_x, y_test=_y,
    male_mask=male_mask, female_mask=female_mask,
    feature_of_interest='sex',
    metric='log_likelihood'
)
analyzer.generate_plot(n_combinations=100)

Evaluating All Models: 100%|██████████| 100/100 [00:02<00:00, 35.28it/s]
Evaluating Without Male Model: 100%|██████████| 33/33 [00:00<00:00, 36.24it/s]
Evaluating Without Female Model: 100%|██████████| 33/33 [00:00<00:00, 36.32it/s]
Evaluating Without Normal Model: 100%|██████████| 33/33 [00:00<00:00, 35.67it/s]


HBox(children=(VBox(children=(HTML(value='<b>Model Details:</b>'), Output(), HTML(value='<b>Show/Hide Groups:<…

# Adding more trained models

In [None]:
import random

def generate_pairs(N, random_state=None):
    if random_state is not None:
        random.seed(random_state)
    pairs = [(random.uniform(0, 1), 0) for _ in range(N)]
    pairs = [(x, 1 - x) for x, _ in pairs]
    return pairs

In [None]:
from tqdm.notebook import tqdm
import pickle

additional_models = []

for (mw, fw) in tqdm(generate_pairs(100, 42), desc="Training models"):
    new_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
    # Create sample_weights based on sex
    sample_weights = X_train['sex'].map(lambda x: mw if x == 'male' else fw)

    # Fit the model with sample weights
    new_model.fit(X_train, y_train, sample_weight=sample_weights)


    # Add this model to our collection with the weights used
    additional_models.append((f"M: {mw:.2f}, F: {fw:.2f}", new_model))
    
    # Save the additional_models list to a pickle file
    with open("additional_models.pkl", "wb") as f:
        pickle.dump(additional_models, f)

Training models:   0%|          | 0/100 [00:00<?, ?it/s]

In [7]:
import pickle

with open("additional_models.pkl", "rb") as f:
    additional_models = pickle.load(f)

print(f"Loaded {len(additional_models)} models")

Loaded 100 models


In [None]:
%matplotlib widget
plt.ioff()
analyzer = GenericGroupPerformanceAnalyzer(
    models_to_combine=[
        ("Male Model", male_model_eps),
        ("Female Model", female_model_eps),
        ("Normal Model", normal_model),
    ],
    baseline_models=additional_models,
    X_test=_x, y_test=_y,
    male_mask=male_mask, female_mask=female_mask,
    feature_of_interest='sex',
    metric='log_likelihood'
)
analyzer.generate_plot(n_combinations=100)

Evaluating All Models:   0%|          | 0/100 [00:00<?, ?it/s]

Evaluating All Models: 100%|██████████| 100/100 [00:02<00:00, 48.81it/s]
Evaluating Without Male Model: 100%|██████████| 33/33 [00:00<00:00, 64.35it/s]
Evaluating Without Female Model: 100%|██████████| 33/33 [00:00<00:00, 60.02it/s]
Evaluating Without Normal Model: 100%|██████████| 33/33 [00:00<00:00, 51.36it/s]


HBox(children=(VBox(children=(HTML(value='<b>Model Details:</b>'), Output(), HTML(value='<b>Show/Hide Groups:<…

In [9]:
%matplotlib widget
plt.ioff()
analyzer = GenericGroupPerformanceAnalyzer(
    models_to_combine=[
        ("Male Model", male_model),
        ("Female Model", female_model),
        ("Normal Model", normal_model),
        *additional_models
    ],
    baseline_models=[],
    X_test=_x, y_test=_y,
    male_mask=male_mask, female_mask=female_mask,
    feature_of_interest='sex',
    metric='log_likelihood'
)
analyzer.generate_plot(n_combinations=100)

Evaluating All Models:   0%|          | 0/100 [00:00<?, ?it/s]

Evaluating All Models:   5%|▌         | 5/100 [00:05<01:38,  1.04s/it]


KeyboardInterrupt: 