# Notebook 03: Fairness Baseline

Train a baseline model (Logistic Regression) and compute standard performance and fairness metrics.

In [10]:
df['credit_risk'].value_counts()

credit_risk
good    700
bad     300
Name: count, dtype: int64

In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report
from fairlearn.metrics import demographic_parity_difference

# Load dataset
df = pd.read_parquet('../data/silver/SouthGermanCredit_en.parquet')

# Define target and features
y = df['credit_risk'].map({"good": 1, "bad": 0})  # 1 = good, 0 = bad
X = df.drop(columns=['credit_risk'])

# Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

# Identify categorical columns
categorical_cols = X.select_dtypes(include='object').columns.tolist()

# Create preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(drop='first', sparse_output=False), categorical_cols)
    ],
    remainder='passthrough'  # numeric columns stay as is
)

# Model pipeline
pipeline = Pipeline(steps=[
    ('preprocessing', preprocessor),
    ('model', RandomForestClassifier(n_estimators=100, random_state=42))
])

# Fit
pipeline.fit(X_train, y_train)

# Predict
y_pred = pipeline.predict(X_test)

# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# Extract transformed X_test to get encoded sensitive column
X_test_transformed = pd.DataFrame(
    pipeline.named_steps['preprocessing'].transform(X_test),
    columns=pipeline.named_steps['preprocessing'].get_feature_names_out()
)

# Fairness evaluation — example with foreign_worker_Yes (adjust if label changed)
foreign_worker_col = [col for col in X_test_transformed.columns if "foreign_worker" in col][0]

dpd = demographic_parity_difference(
    y_test, y_pred, sensitive_features=X_test_transformed[foreign_worker_col]
)
print("Demographic Parity Difference (foreign_worker):", dpd)


Accuracy: 0.76
              precision    recall  f1-score   support

           0       0.69      0.37      0.48        90
           1       0.77      0.93      0.84       210

    accuracy                           0.76       300
   macro avg       0.73      0.65      0.66       300
weighted avg       0.75      0.76      0.73       300

Demographic Parity Difference (foreign_worker): 0.006802721088435382


In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder, KBinsDiscretizer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report
from fairlearn.metrics import demographic_parity_difference
from fairlearn.metrics import MetricFrame
from sklearn.impute import SimpleImputer

# Features
sensitive_features = ['age', 'foreign_worker', 'personal_status_sex']
proxy_feature = 'credit_amount'
X = df.drop(columns=['credit_risk'])

# Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, stratify=y, random_state=42
)

# Binarize age (needed for fairness metric)
age_bins = KBinsDiscretizer(n_bins=2, encode='ordinal', strategy='quantile')
X_train['age_bin'] = age_bins.fit_transform(X_train[['age']])
X_test['age_bin'] = age_bins.transform(X_test[['age']])

# Preprocessing pipeline
categorical_cols = X.select_dtypes(include='object').columns.tolist()
numeric_cols = [col for col in X.columns if col not in categorical_cols + ['credit_risk']]

# Optional: remove proxy if desired
# numeric_cols.remove('credit_amount')

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(drop='first', sparse_output=False), categorical_cols),
        ('num', 'passthrough', numeric_cols)
    ]
)

# Model pipeline
pipeline = Pipeline(steps=[
    ('preprocessing', preprocessor),
    ('model', RandomForestClassifier(
        n_estimators=200,
        max_depth=8,
        min_samples_leaf=5,
        random_state=42))
])

# Fit and predict
pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)

# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# Extract transformed test data
X_test_processed = pd.DataFrame(
    pipeline.named_steps['preprocessing'].transform(X_test),
    columns=pipeline.named_steps['preprocessing'].get_feature_names_out()
)

# Fairness evaluation across multiple sensitive attributes
sf_dict = {
    'foreign_worker': X_test['foreign_worker'],
    'personal_status_sex': X_test['personal_status_sex'],
    'age_bin': X_test['age_bin'].astype(int)
}

for feature_name, feature_data in sf_dict.items():
    dpd = demographic_parity_difference(y_test, y_pred, sensitive_features=feature_data)
    print(f'DPD for {feature_name}: {dpd:.4f}')


Accuracy: 0.7533333333333333
              precision    recall  f1-score   support

           0       0.81      0.23      0.36        90
           1       0.75      0.98      0.85       210

    accuracy                           0.75       300
   macro avg       0.78      0.60      0.60       300
weighted avg       0.77      0.75      0.70       300

DPD for foreign_worker: 0.2517
DPD for personal_status_sex: 0.1512
DPD for age_bin: 0.0580
