# Introduction - German Credit Dataset

The German Credit Dataset classifies people described by a set of attributes as good or bad credit risks. 

It is commonly used for fairness tasks and  "personal_status_sex", which combined gender and marital status, is usually the protected attribute

### Imports

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from fairlearn.metrics import demographic_parity_difference, equalized_odds_difference
import fairness_functions as fp


### Load dataset

In [18]:
from ucimlrepo import fetch_ucirepo
  
# Fetch dataset
statlog_german_credit_data = fetch_ucirepo(id=144)
  
# Data (as pandas dataframes)
X = statlog_german_credit_data.data.features
y = statlog_german_credit_data.data.targets
  

# Manually define column names based on the dataset's variable information.
# Here is an example list of column names:
feature_col_names = [
    "checking_account_status",    # Attribute1: Status of existing checking account
    "duration",                   # Attribute2: Duration (months)
    "credit_history",             # Attribute3: Credit history
    "purpose",                    # Attribute4: Purpose
    "credit_amount",              # Attribute5: Credit amount
    "savings_account_bonds",      # Attribute6: Savings account/bonds
    "present_employment_since",   # Attribute7: Present employment since (Other)
    "installment_rate",           # Attribute8: Installment rate in percentage of disposable income
    "personal_status_sex",        # Attribute9: Marital Status / Personal status and sex
    "other_debtors",              # Attribute10: Other debtors / guarantors
    "present_residence_since",    # Attribute11: Present residence since
    "property",                   # Attribute12: Property
    "age",                        # Attribute13: Age (years)
    "other_installment_plans",    # Attribute14: Other installment plans
    "housing",                    # Attribute15: Housing (Other)
    "number_of_existing_credits", # Attribute16: Number of existing credits at this bank
    "occupation",                 # Attribute17: Occupation (Job)
    "number_of_people_liable",    # Attribute18: Number of people being liable to provide maintenance for
    "telephone",                  # Attribute19: Telephone (Binary)
    "foreign_worker"              # Attribute20: foreign worker (Binary)
]

# Assign these column names to the features DataFrame if not already set.
X.columns = feature_col_names

sensitive_col ='personal_status_sex'

X = X.dropna(subset=[sensitive_col])



#### Adjust target column to be binary

In [None]:
if isinstance(y, pd.DataFrame):
    y = y.squeeze()

y = y.map({1: 0, 2: 1})
print("Unique target values after mapping:", y.unique())

print(y.value_counts())



### Impute Nan Values

Imputes numeric Nan values with column mean and Nans in categorical columns with column mode

In [None]:
# Define which columns are categorical based on domain knowledge for German Credit data.
categorical_cols = [
    "checking_account_status",  # e.g., categorical status of checking account
    "credit_history",           # credit history categories
    "purpose",                  # purpose of credit
    "savings_account_bonds",    # savings account/bonds categories
    "present_employment_since", # employment status (categorical)
    "personal_status_sex",      # combined personal status and sex
    "other_debtors",            # categorical: other debtors/guarantors
    "property",                 # property information (categorical)
    "other_installment_plans",  # other installment plans
    "housing",                  # housing situation (categorical)
    "occupation",               # occupation categories
    "telephone",                # binary, but treated as categorical
    "foreign_worker"            # binary, but treated as categorical
]

# All remaining columns are considered numeric.
numeric_cols = [col for col in X.columns if col not in categorical_cols]

print("Numeric columns:", numeric_cols)
print("Categorical columns:", categorical_cols)

# Convert numeric columns to numeric dtype (forcing non-numeric values to NaN)
X_numeric = X[numeric_cols].apply(lambda col: pd.to_numeric(col, errors='coerce'))

# Fill missing values in numeric columns with the mean of each column.
X_numeric = X_numeric.fillna(X_numeric.mean())

# Filter the categorical columns: drop any that have high cardinality (threshold = 20 unique values)
max_unique_threshold = 20
filtered_categorical_cols = [col for col in categorical_cols if X[col].nunique() <= max_unique_threshold]
print("Filtered Categorical columns (<=20 unique values):", filtered_categorical_cols)

# Process the categorical columns: fill missing values with the mode.
X_categorical = X[filtered_categorical_cols].copy()
for col in filtered_categorical_cols:
    X_categorical[col] = X_categorical[col].fillna(X_categorical[col].mode()[0])


### One-hot encode categorical features

In [None]:

# One-hot encode the filtered categorical columns using pandas' get_dummies, dropping the first category.
X_categorical_encoded = pd.get_dummies(X_categorical, drop_first=True)

# Combine numeric and one-hot encoded categorical columns.
X_processed = pd.concat([X_numeric, X_categorical_encoded], axis=1)

# Fill any remaining NaN values with 0.
X_processed = X_processed.fillna(0)

# Preserve the sensitive attribute for fairness evaluation.
sens = X[sensitive_col]

print("Shape of processed features:", X_processed.shape)


### Split data to train & test sets

In [None]:
# Split data and also split the sensitive attribute for evaluation
X_train, X_test, y_train, y_test, sens_train, sens_test = train_test_split(
    X_processed, y, sens, test_size=0.3, random_state=42
)


print("X train shape: ",X_train.shape)
print("X test shape: ",X_test.shape)

### Train and evaluate baseline model

In [None]:
# Train the logistic regression model
lr = LogisticRegression(random_state=42, max_iter=10000)
lr.fit(X_train, y_train)

# Predict on the test set with the baseline model
y_pred_baseline = lr.predict(X_test)

# Evaluate baseline performance metrics
baseline_accuracy = accuracy_score(y_test, y_pred_baseline)
f1_score_baseline = f1_score(y_test, y_pred_baseline)

# Evaluate fairness metrics for the baseline model
baseline_dp_diff = demographic_parity_difference(y_test, y_pred_baseline, sensitive_features=sens_test)
baseline_eo_diff = equalized_odds_difference(y_test, y_pred_baseline, sensitive_features=sens_test)

print("=== Baseline Model Metrics ===")
print("Accuracy:", baseline_accuracy)
print("F1 score:",f1_score_baseline) 
print("Demographic Parity Difference:", baseline_dp_diff)
print("Equalized Odds Difference:", baseline_eo_diff)


### Naive solution - drop sensitive column

In [None]:
# Process X_processed as before
# Drop sensitive columns from the entire processed dataset
sensitive_encoded_cols = [col for col in X_processed.columns if col.startswith(sensitive_col + '_')]
X_processed_no_sensitive = X_processed.drop(columns=sensitive_encoded_cols)

# Split the data
X_train, X_test, y_train, y_test, sens_train, sens_test = train_test_split(
    X_processed_no_sensitive, y, sens, test_size=0.3, random_state=42
)

# Train the logistic regression model
lr = LogisticRegression(random_state=42,max_iter=10000)
lr.fit(X_train, y_train)

# Predict on the test set
y_pred_naive = lr.predict(X_test)

# Evaluate baseline performance metrics
naive_accuracy = accuracy_score(y_test, y_pred_naive)
f1_score_naive = f1_score(y_test, y_pred_naive)

# Evaluate fairness metrics for the baseline model
naive_dp_diff = demographic_parity_difference(y_test, y_pred_naive, sensitive_features=sens_test)
naive_eo_diff = equalized_odds_difference(y_test, y_pred_naive, sensitive_features=sens_test)

print("=== Naive Model Metrics ===")
print("Accuracy:", naive_accuracy)
print("F1 score:",f1_score_naive) 
print("Demographic Parity Difference:", naive_dp_diff)
print("Equalized Odds Difference:", naive_eo_diff)


### Optimum fairness search

In [None]:
# Define candidate methods for each stage.
pre_methods = {
    "None": fp.pre_none,
    "CorrRemover": fp.pre_correlation_remover,
    "SensitiveResampling": fp.pre_sensitive_resampling  # new candidate
}

in_methods = {
    "Baseline": fp.in_baseline,
    "Reweighting": fp.in_reweighting,
    "ExpGrad_DP": fp.in_expgrad_dp,
    "ExpGrad_EO": fp.in_expgrad_eo
}

post_methods = {
    "None": fp.post_none,
    "Threshold_DP": fp.post_threshold_dp,
    "Threshold_EO": fp.post_threshold_eo
}

# Run experiments:
results = fp.run_experiments(pre_methods, in_methods, post_methods,
                             X_train, y_train, sens_train,
                             X_test, y_test, sens_test)


### Select only pareto optimal methods

In [None]:

objectives = {"f1_score": True, "Demographic_parity": False, "Equalized_odds": False}

frontier = fp.pareto_frontier(results, objectives)

print("Pareto Frontier configurations:")
for config, metrics in frontier.items():
    print(f"{config}: {metrics}")

### Apply thresholds on biase and portion of retained accuracy

### Set thresholds on accurcy, demographic parity and equalized odds

In [21]:
f1_threshold = 0.4
demographic_parity_threshold = 0.2
equalized_odds_threshold = 0.2

In [None]:
# Filter results based on thresholds.
filtered = fp.filter_results(frontier, f1_threshold=f1_threshold,
                            dp_threshold=demographic_parity_threshold, eo_threshold=equalized_odds_threshold)

print("\nFiltered Results (satisfying thresholds):")
for config, metrics in filtered.items():
    print(config, metrics)