In [2]:
from interpretml_utils import *
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# Loading dataset
### (German)

In [3]:
# Load German Credit Dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.data"
columns = [
    'checking_status', 'duration', 'credit_history', 'purpose', 'credit_amount',
    'savings_account', 'employment', 'installment_rate', 'personal_status_sex',
    'other_debtors', 'present_residence', 'property', 'age', 'other_installment_plans',
    'housing', 'existing_credits', 'job', 'num_maintenance', 'telephone', 'foreign_worker', 'target'
]

df = pd.read_csv(url, sep=' ', names=columns, header=None)

# Preprocessing
# Create binary sex feature (Male=1, Female=0)
df['sex'] = df['personal_status_sex'].apply(lambda x: 'male' if x in ['A91', 'A93', 'A94'] else 'female')

# Convert target to binary (Good credit=1, Bad credit=0)
df['target'] = df['target'].replace({1: 1, 2: 0})

features = df.columns.tolist()
features.remove('target')

X = df[features]
y = df['target']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Training baseline models

In [10]:
male_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
male_model.fit(X_train[X_train['sex'] == 'male'], y_train[X_train['sex'] == 'male'])

female_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
female_model.fit(X_train[X_train['sex'] == 'female'], y_train[X_train['sex'] == 'female'])

normal_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
normal_model.fit(X_train, y_train)

print("done")

done


In [5]:
ff_model = CombinedEBM([male_model, female_model], [0.5, 0.5])
ff_model_obj = ff_model.get_model_object()

merged_model = merge_ebms([male_model, female_model])

# Displaying with custom EBMVisualizer

In [6]:
visualizer = InterpretmlEBMVisualizer([male_model, female_model, normal_model, ff_model_obj, merged_model], ["Male Model", "Female Model", "Normal Model", "50-50 Model", "Merged Model"])
visualizer.show()

HBox(children=(VBox(children=(Dropdown(description='Feature:', options=(('checking_status', 0), ('duration', 1…

# Group Performance Plots

In [12]:
male_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
male_model.fit(X_train[X_train['sex'] == 'male'], y_train[X_train['sex'] == 'male'])

female_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
female_model.fit(X_train[X_train['sex'] == 'female'], y_train[X_train['sex'] == 'female'])

normal_model = ExplainableBoostingClassifier(feature_names=X.columns.tolist())
normal_model.fit(X_train, y_train)

print("done")

done


In [13]:
foi = 'sex'
_x = X_train
_y = y_train

male_mask = _x[foi] == 'male'
female_mask = _x[foi] == 'female'

In [14]:
%matplotlib widget
plt.ioff()
analyzer = GroupPerformanceAnalyzer(
    male_model, female_model, normal_model,
    _x, _y,
    male_mask=male_mask, female_mask=female_mask,
    feature_of_interest='sex',
    combine_strategy='post',
    metric='log_likelihood',
)
analyzer.generate_plot(n_combinations=100)

Evaluating combinations: 100%|██████████| 100/100 [00:00<00:00, 107.44it/s]


HBox(children=(VBox(children=(HTML(value='<b>Model Details:</b>'), Output()), layout=Layout(margin='0 20px', w…