In [4]:
from interpretml_tools import *

from interpret.glassbox import ExplainableBoostingClassifier, ExplainableBoostingRegressor, merge_ebms

import pandas as pd
import numpy as np  
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# Loading dataset
### (German)

In [33]:
# Load German Credit Dataset
# url = "https://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.data"
columns = [
    'checking_status', 'duration', 'credit_history', 'purpose', 'credit_amount',
    'savings_account', 'employment', 'installment_rate', 'personal_status_sex',
    'other_debtors', 'present_residence', 'property', 'age', 'other_installment_plans',
    'housing', 'existing_credits', 'job', 'num_maintenance', 'telephone', 'foreign_worker', 'target'
]

df = pd.read_excel("../datasets/german.xlsx", names=columns)

# Preprocessing
# Create binary sex feature (Male=1, Female=0)
df['sex'] = df['personal_status_sex'].apply(lambda x: 'male' if x in ['A91', 'A93', 'A94'] else 'female')

# Convert target to binary (Good credit=1, Bad credit=0)
df['target'] = df['target'].replace({1: 1, 2: 0})

features = df.columns.tolist()
features.remove('target')

X = df[features]
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Training baseline models

In [34]:
male_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
male_model.fit(X_train[X_train['sex'] == 'male'], y_train[X_train['sex'] == 'male'])

female_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
female_model.fit(X_train[X_train['sex'] == 'female'], y_train[X_train['sex'] == 'female'])

normal_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
normal_model.fit(X_train, y_train)

ff_model = CombinedEBM([male_model, female_model], [0.5, 0.5])
combined = merge_ebms([male_model, female_model])

female_model_eps = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
eps = 1e-10
female_model_eps.fit(X_train, y_train, sample_weight=X_train['sex'].map(lambda x: eps if x == 'male' else 1 - eps))

male_model_eps = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
male_model_eps.fit(X_train, y_train, sample_weight=X_train['sex'].map(lambda x: 1 - eps if x == 'male' else eps))

print("done")

done


# Displaying with custom EBMVisualizer

In [4]:
%matplotlib widget
plt.ioff()
visualizer = InterpretmlEBMVisualizer([male_model, female_model, normal_model, ff_model, combined], ["Male Model", "Female Model", "Normal Model", "50-50 Model", "Combined"])
visualizer.show()

HBox(children=(VBox(children=(Dropdown(description='Feature:', options=(('checking_status', 0), ('duration', 1…

# Group Performance Plots

In [35]:
foi = 'sex'
_x = X_train
_y = y_train

male_mask = _x[foi] == 'male'
female_mask = _x[foi] == 'female'

In [5]:
%matplotlib widget
plt.ioff()
analyzer = GenericGroupPerformanceAnalyzer(
    models_to_combine=[
        ("Male Model", male_model),
        ("Female Model", female_model),
        ("Normal Model", normal_model),
    ],
    baseline_models=[
    ],
    X_test=_x, y_test=_y,
    male_mask=male_mask, female_mask=female_mask,
    feature_of_interest='sex',
    metric='log_likelihood'
)
analyzer.generate_plot()

Processing Group 1/3: 100%|██████████| 100/100 [00:01<00:00, 83.45it/s]
Processing Group 2/3: 100%|██████████| 10/10 [00:00<00:00, 130.15it/s]
Processing Group 3/3: 100%|██████████| 10/10 [00:00<00:00, 62.20it/s]


Output()

# Adding more trained models

In [28]:
import random

def generate_pairs(N, random_state=None):
    if random_state is not None:
        random.seed(random_state)
    pairs = [(random.uniform(0, 1), 0) for _ in range(N)]
    pairs = [(x, 1 - x) for x, _ in pairs]
    return pairs

In [30]:
from tqdm.notebook import tqdm
import pickle

additional_models = []

for (mw, fw) in tqdm(generate_pairs(50, 42), desc="Training models"):
    new_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
    # Create sample_weights based on sex
    sample_weights = X_train['sex'].map(lambda x: mw if x == 'male' else fw)

    # Fit the model with sample weights
    new_model.fit(X_train, y_train, sample_weight=sample_weights)


    # Add this model to our collection with the weights used
    additional_models.append((f"M: {mw:.2f}, F: {fw:.2f}", new_model))
    
    # Save the additional_models list to a pickle file
    with open("additional_models.pkl", "wb") as f:
        pickle.dump(additional_models, f)

Training models:   0%|          | 0/50 [00:00<?, ?it/s]

In [36]:
import pickle

with open("pickles/german/additional_models.pkl", "rb") as f:
    additional_models = pickle.load(f)

print(f"Loaded {len(additional_models)} models")

Loaded 50 models


In [37]:
%matplotlib widget
plt.ioff()
analyzer = GenericGroupPerformanceAnalyzer(
    models_to_combine=[
        ("Male Model", male_model_eps),
        ("Normal Model", normal_model),
        ("Female Model", female_model_eps),
    ],
    baseline_models=additional_models[5:],
    X_test=_x, y_test=_y,
    n_combination_main=100, n_combination_sub=10,
    male_mask=male_mask, female_mask=female_mask,
    feature_of_interest='sex',
    metric='log_likelihood',
)
analyzer.generate_plot()

Processing Group 1/3: 100%|██████████| 100/100 [00:01<00:00, 73.50it/s]
Processing Group 2/3: 100%|██████████| 10/10 [00:00<00:00, 108.44it/s]
Processing Group 3/3: 100%|██████████| 10/10 [00:00<00:00, 106.42it/s]


Output()

In [38]:
import dill

with open("visible_models_20250408_120843.pkl", "rb") as f:
    visible_models = dill.load(f)

In [39]:
models = [m['model'] for m in visible_models]

In [None]:
import fairlearn.metrics

# For each model in models, compute 
y_true = y_test
y_pred = models[0].predict(X_test)

fairlearn.metrics.equalized_odds_difference(y_true, y_pred, sensitive_features=X_test['sex'])

Equalized Odds Difference Report:
Min: 0.017345399698340813
Mean: 0.08049897536756111
Max: 0.17086834733893563
