In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from data_cleaning import fill_missing_values, rename_columns
from data_science_skript import preprocess_data

from sklearn.model_selection import train_test_split 
from imblearn.over_sampling import SMOTE
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score, roc_auc_score

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from imblearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

import optuna
import time
from sklearn.decomposition import PCA
from sklearn.model_selection import cross_val_score
from optuna.samplers import TPESampler


from fairlearn.metrics import equalized_odds_difference
from fairlearn.metrics import demographic_parity_difference, demographic_parity_ratio
from fairlearn.postprocessing import ThresholdOptimizer



In [None]:
df = pd.read_csv(r"C:\Users\kimko\PortfolioProjekt\adult.csv", na_values=["?"]) 
df = fill_missing_values(df) 
df = rename_columns(df)

In [None]:
# Diskriminierung beim Datensatz

plt.style.use("dark_background")  
colors = ["silver", "teal"]

# Einkommensverteilung nach Geschlecht
income_gender = df.groupby(["sex", "income"]).size().unstack()
income_gender.plot(kind="bar", stacked=True, color = colors, figsize=(8, 5))
plt.title("Einkommensverteilung nach Geschlecht")
plt.xlabel("Geschlecht")
plt.ylabel("Anzahl")
plt.xticks(rotation=0)
plt.legend(title="Einkommen")
plt.show()

# Einkommensverteilung nach Ethnie
income_race = df.groupby(["race","income"]).size().unstack()
income_race.plot(kind="bar", stacked=True, color = colors, figsize=(10, 5))
plt.title("Einkommensverteilung nach Ethnie")
plt.xlabel("Ethnie")
plt.ylabel("Anzahl")
plt.xticks(rotation=45)
plt.legend(title="Einkommen")
plt.show()


In [None]:
# Demographic Parity für Geschlecht
gender_parity = df[df["income"] == '>50K']['sex'].value_counts(normalize=True)
print(gender_parity)

# Demographic Parity für Ethnie
race_parity = df[df["income"] == '>50K']['race'].value_counts(normalize=True)
print(race_parity)

In [None]:
sns.pairplot(df)

Dies deutet auf starke Unterschiede in der Einkommensverteilung hin und muss beim Feature-Engeneering mit beachtet werden
- Geschlecht und Ethnie sollte als sensible Merkmale beachtet werden
- verschiedene Korkkekturne: Reweightung, Fairness Constraints

### Fairness in der Vorhersage messen:
- Falsch Positive und Falsch Negativ messen
- Disparate Impact Score = Rate der pos Ergebnisse für die benachteiligten Gruppe / Rate der pos Ergebnisse für die bevorzugte Gruppe (>0.8)

# Modellauswahl
- Logistisches Modell 
- Decision Tree
- Random Forest
- Neuronale Netze?
### Fairness-optimierte Modelle
- Fair Logistic Regression

In [None]:
# Variablen umwandeln
df = preprocess_data(df)

In [None]:
df_numeric = df.select_dtypes(include=["int64"])
sns.heatmap(df_numeric.corr(),cmap="plasma", vmax=0.8)

### Bewertung der Feature-Correlation
- education_num
- age
- sex, hours_per_week, age, income scheinen eine gewisse Korrelation zu haben

In [None]:
# Train-Test-Split
target = df["income"]
features = df.drop(columns=["income"])

features_train, features_test, target_train, target_test = train_test_split(features, target, test_size=0.2, stratify=target)

In [None]:
print("Trainingsdaten:\n",features_train.shape)
print("\nTestdaten:\n",features_test.shape)

In [None]:
df.dtypes

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(12, 5))

sns.countplot(x=target_train, ax=ax[0])
ax[0].set_title("Klassenverteilung im Training-Set")

sns.countplot(x=target_test, ax=ax[1])
ax[1].set_title("Klassenverteilung im Test-Set")

plt.show()



In [None]:
train_crosstab_income = pd.crosstab(index=target_train, columns = "count", normalize = "columns")
test_crosstab_income = pd.crosstab(index=target_test, columns = "count", normalize = "columns")
display(train_crosstab_income)
display(test_crosstab_income)

### Kalssenverteilung
Es ist ein deutliches Ungleichgewicht der Zielkategorie zu erkennen:
- class_weight = balanced
- SMOTE

In [None]:
# Vorbereitung
num_cols = features_train.select_dtypes(include=["int64"]).columns
cat_cols = features_train.select_dtypes(include=["object"]).columns 

# Preprocessor
preprocessor = ColumnTransformer([
    ("num", StandardScaler(), num_cols),
    ("cat", OneHotEncoder(), cat_cols)
])

# Baselinemodell Logistische Regression

pipeline_log_base = Pipeline([
    ("preprocessor", preprocessor),
    ("model", LogisticRegression(class_weight="balanced", random_state = 42))
     ])

pipeline_log_base.fit(features_train,target_train)
target_pred_log_base = pipeline_log_base.predict(features_test)

print("Accuracy", accuracy_score(target_test, target_pred_log_base))
print("Classification report\n", classification_report(target_test, target_pred_log_base))

# Erste Bewertung 
Das Modell zeigt eine klare Diskriminierung:

### Precision
    - Einkommen > 50K werden ungenauer (nur mit 57% Wahrscheinlichkeit richtig vorhergesagt)
    --> Diskriminierung gegen zu hoch verdienende ?

### Recall
    - relativ gut und ausgeglichen

### F1-Score
    - auch hier werden Hochverdiener deutlich schlechter erkannt

### Ursachen:
- Datenungleichgewicht
- Feature Bias
- andere Modelle können eventuell besser unterscheiden (RandomForest)


In [None]:
features_train.columns

In [None]:
# Feature-Anpassung

crit_cols = ["sex", "race"]
features_train_crit = features_train.drop(columns=crit_cols)
features_test_crit = features_test.drop(columns=crit_cols)

num_cols_crit = features_train_crit.select_dtypes(include=["int64"]).columns
cat_cols_crit = features_train_crit.select_dtypes(include=["object"]).columns 

preprocessor_crit = ColumnTransformer([
    ("num", StandardScaler(), num_cols_crit),
    ("cat", OneHotEncoder(), cat_cols_crit)
])

pipeline_log_base_crit = Pipeline([
    ("preprocessor", preprocessor_crit),
    ("model", LogisticRegression(class_weight="balanced", random_state = 42))
     ])

pipeline_log_base_crit.fit(features_train_crit, target_train)
target_pred_log_base_crit = pipeline_log_base_crit.predict(features_test_crit)

print("LogistischeRegression\n", classification_report(target_test, target_pred_log_base_crit))

In [None]:
# Feature-Importance des Base-Line Modells
feature_names = list(num_cols) + list(pipeline_log_base.named_steps['preprocessor'].named_transformers_['cat'].get_feature_names_out(cat_cols))
coefficients = pipeline_log_base.named_steps["model"].coef_[0]
feature_importance = pd.Series(data = pipeline_log_base.named_steps["model"].coef_[0],
                               index = feature_names).sort_values(ascending=False)
# Feature Importance OHNE sex and race
feature_names_crit = list(num_cols_crit) + list(pipeline_log_base_crit.named_steps['preprocessor'].named_transformers_['cat'].get_feature_names_out(cat_cols_crit))
coefficients_crit = pipeline_log_base_crit.named_steps["model"].coef_[0]
feature_importance_crit = pd.Series(data = pipeline_log_base_crit.named_steps["model"].coef_[0],
                               index = feature_names_crit).sort_values(ascending=False)

# Plot
fig, ax = plt.subplots(2, 1, figsize=(10, 10)) 
feature_importance.head(20).plot(kind='barh', ax = ax[0])
feature_importance_crit.head(20).plot(kind='barh', ax = ax[1])

ax[0].set_title("Feature Importance mit allen Features")
ax[1].set_title("Feature Importance ohne 'sex' und 'race'")



### Bewertung der Feature-Importance
- Herkunftsländer scheinen eine auffällig große Rolle zu spielen
- das Geschlecht und die Rasse dafür nicht direkt.
- --> es gibt aber andere Metriken, die indirekt auf ein Geschlecht hinweisen(Education, Occupation, Relationship)

In [None]:
# Fairness-Metriken berechnen für Geschlecht: 0 - Female / 1- Male

for group in features_test["sex"].unique():
    mask = (features_test["sex"] == group)
    acc = accuracy_score(target_test[mask], target_pred_log_base[mask])
    prec = precision_score(target_test[mask], target_pred_log_base[mask])
    rec = recall_score(target_test[mask], target_pred_log_base[mask])

    print(f"{group} - Accuracy: {acc:.3f}, Precision: {prec:.3f}, Recall: {rec:.3f}")

In [None]:
# Fairness-Metriken für Ethnie

for group in features_test["race"].unique():  
    mask = (features_test["race"] == group)  
    
    acc = accuracy_score(target_test[mask], target_pred_log_base[mask])
    prec = precision_score(target_test[mask], target_pred_log_base[mask])
    rec = recall_score(target_test[mask], target_pred_log_base[mask])
    
    print(f"{group} - Accuracy: {acc:.3f}, Precision: {prec:.3f}, Recall: {rec:.3f}")

# Fazit zum ersten Baselinemodell Logistische Regression
## Geschlecht

#### Leichte Diskriminierung erkennbar:
Recall (Female: 74,7%, Male: 86,7%)
- Frauen, die tatsächlich Hochverdiener sind, werden schlechter erkannt (niedrigerer Recall)
- Männer haben einen höheren Recall, das heißt Männer werden besser als Hochverdiener erkannt

Accuracy (Female: 91,3%, Male: 75,6%):
- Frauen werden besser klassifiziert als Männer:
    - die meisten Frauen sind in der Kategorie <=50K
    - weitere Features deuten auf Frauen hin (Relationship)

Precision (Female: 59,8%, Male: 56,4%):
- Ergebnisse sind sehr ähnlich

## Ethnie
- Das Modell erkennt Hochverdiener aus bestimmten ethnischen Gruppen schlechter:
    - Recall für White besonders gut (85,8%) im Gegensatz zu indigenen und "other"
    - Precision für Indigene besonders schlecht
    -Precision für Other extrem gut - Datenset-Problem?

In [None]:
# DecisionTree
# Modell
pipeline_dt = Pipeline([
    ("preprocessor", preprocessor),
    ("model", DecisionTreeClassifier(class_weight="balanced", random_state=42))
])

#Anpassen des Modells
pipeline_dt.fit(features_train, target_train)

#Vorhersage
target_pred_dt = pipeline_dt.predict(features_test)

#Kennzahlen
print("DecisionTree:\n", classification_report(target_test, target_pred_dt))

In [None]:
# RandomForest
# Modell
pipeline_rf = Pipeline([
    ("preprocessor", preprocessor),
    ("model", RandomForestClassifier(class_weight="balanced", random_state=42))
])

#Anpassen des Modells
pipeline_rf.fit(features_train, target_train)

#Vorhersage
target_pred_rf = pipeline_rf.predict(features_test)

#Kennzahlen
print("RandomForest:\n", classification_report(target_test, target_pred_rf))

In [None]:
# Feature Importance
feature_names = list(num_cols) + list(pipeline_rf.named_steps['preprocessor'].named_transformers_['cat'].get_feature_names_out(cat_cols))

# Für den DecisionTree
feature_importance_dt = pd.Series(data=pipeline_dt.named_steps["model"].feature_importances_,
                               index=feature_names).sort_values(ascending=False)

#RandomForest
feature_importance_rf = pd.Series(data=pipeline_rf.named_steps["model"].feature_importances_,
                               index=feature_names).sort_values(ascending=False)


# Plot
fig, ax = plt.subplots(2, 1, figsize=(10, 10)) 
feature_importance_dt.head(20).plot(kind='barh', ax = ax[0])
feature_importance_rf.head(20).plot(kind='barh', ax = ax[1])

ax[0].set_title("Feature Importance Decision Tree")
ax[1].set_title("Feature Importance Random Forest")

In [None]:
# Fairness-Metriken für DecisionTree
# Fairness-Metriken berechnen für Geschlecht: 0 - Female / 1- Male
print("DecisionTree")
for group in features_test["sex"].unique():
    mask = (features_test["sex"] == group)
    acc = accuracy_score(target_test[mask], target_pred_dt[mask])
    prec = precision_score(target_test[mask], target_pred_dt[mask])
    rec = recall_score(target_test[mask], target_pred_dt[mask])

    print(f"{group} - Accuracy: {acc:.3f}, Precision: {prec:.3f}, Recall: {rec:.3f}")

print("\n")
for group in features_test["race"].unique():  
    mask = (features_test["race"] == group)  
    
    acc = accuracy_score(target_test[mask], target_pred_dt[mask])
    prec = precision_score(target_test[mask], target_pred_dt[mask])
    rec = recall_score(target_test[mask], target_pred_dt[mask])
    
    print(f"{group} - Accuracy: {acc:.3f}, Precision: {prec:.3f}, Recall: {rec:.3f}")

In [None]:
# Fairness-Metriken für RandomForest
# Fairness-Metriken berechnen für Geschlecht: 0 - Female / 1- Male
print("RandomForest")
for group in features_test["sex"].unique():
    mask = (features_test["sex"] == group)
    acc = accuracy_score(target_test[mask], target_pred_rf[mask])
    prec = precision_score(target_test[mask], target_pred_rf[mask])
    rec = recall_score(target_test[mask], target_pred_rf[mask])

    print(f"{group} - Accuracy: {acc:.3f}, Precision: {prec:.3f}, Recall: {rec:.3f}")

print("\n")
for group in features_test["race"].unique():  
    mask = (features_test["race"] == group)  
    
    acc = accuracy_score(target_test[mask], target_pred_rf[mask])
    prec = precision_score(target_test[mask], target_pred_rf[mask])
    rec = recall_score(target_test[mask], target_pred_rf[mask])
    
    print(f"{group} - Accuracy: {acc:.3f}, Precision: {prec:.3f}, Recall: {rec:.3f}")

In [None]:
print("LogisticRegression:\n", accuracy_score(target_test, target_pred_log_base), precision_score(target_test, target_pred_log_base), recall_score(target_test, target_pred_log_base))
print("DecisionTree:\n", accuracy_score(target_test, target_pred_dt), precision_score(target_test, target_pred_dt), recall_score(target_test, target_pred_dt))
print("RandomForest:\n", accuracy_score(target_test, target_pred_rf), precision_score(target_test, target_pred_rf), recall_score(target_test, target_pred_rf))

### Bewertung der einzelnen Basismodelle
|Modell | Accuracy | Precision | Recall|
|:------|:---------|:----------|:------|
|Logistische Regression|0.808|0.568|0.848|
|DecisionTree|0.815|0.619|0.605|
|RandomForest|0.856|0.748|0.607|

Aufgrund dieser Werte werde ich mich weiter dem RandomForest widmen, da dies die zuverlässigsten Ergebnisse liefert (hohe Precision) und hohe Gesamtgenauigkeit

## Implementierung von SMOTE, um den Bias weiter zu reduzieren

In [None]:
pipeline_rf_smote = Pipeline([
    ("preprocessor", preprocessor),
    ("smote", SMOTE(sampling_strategy="auto", random_state=42)),
    ("model", RandomForestClassifier(class_weight="balanced", random_state=42))
])

pipeline_rf_smote.fit(features_train, target_train)

target_pred_rf_smote = pipeline_rf_smote.predict(features_test)

print("Classification Report Random Forest with SMOTE:\n", classification_report(target_test, target_pred_rf_smote))
print("RandomForest mit SMOTE:\n", accuracy_score(target_test, target_pred_rf_smote), precision_score(target_test, target_pred_rf_smote), recall_score(target_test, target_pred_rf_smote))


#### Bewertung SMOTE
|Modell | Accuracy | Precision | Recall|
|:------|:---------|:----------|:------|
|RandomForest|0.856|0.748|0.607|
|RandomForest SMOTE|0.845|0.675|0.688|


In [None]:
# Fairness-Metriken Equalized Odds Difference für RandomForest
eq_odds_diff_sex = equalized_odds_difference(y_true = target_test, 
                                               y_pred = target_pred_rf,
                                           sensitive_features= features_test["sex"])

eq_odds_diff_race = equalized_odds_difference(y_true = target_test, 
                                               y_pred = target_pred_rf,
                                           sensitive_features= features_test["race"])

print(f"\nEqualized Odds Difference (EOD) für 'sex': {eq_odds_diff_sex:.4f}")
print(f"\nEqualized Odds Difference (EOD) für 'race': {eq_odds_diff_race:.4f}")

In [None]:
# Fairness-Metriken Equalized Odds Difference für RandomForest mit SMOTE
eq_odds_diff_sex = equalized_odds_difference(y_true = target_test, 
                                               y_pred = target_pred_rf_smote,
                                           sensitive_features= features_test["sex"])

eq_odds_diff_race = equalized_odds_difference(y_true = target_test, 
                                               y_pred = target_pred_rf_smote,
                                           sensitive_features= features_test["race"])

print(f"\nEqualized Odds Difference (EOD) für 'sex': {eq_odds_diff_sex:.4f}")
print(f"\nEqualized Odds Difference (EOD) für 'race': {eq_odds_diff_race:.4f}")

In [None]:
# Fairness-Metriken Equalized Odds Difference für DecisionTree
eq_odds_diff_sex = equalized_odds_difference(y_true = target_test, 
                                               y_pred = target_pred_dt,
                                           sensitive_features= features_test["sex"])

eq_odds_diff_race = equalized_odds_difference(y_true = target_test, 
                                               y_pred = target_pred_dt,
                                           sensitive_features= features_test["race"])

print(f"\nEqualized Odds Difference (EOD) für 'sex': {eq_odds_diff_sex:.4f}")
print(f"\nEqualized Odds Difference (EOD) für 'race': {eq_odds_diff_race:.4f}")

In [None]:
# Fairness-Metriken Equalized Odds Difference für Logistische Regression
eq_odds_diff_sex = equalized_odds_difference(y_true = target_test, 
                                               y_pred = target_pred_log_base,
                                           sensitive_features= features_test["sex"])

eq_odds_diff_race = equalized_odds_difference(y_true = target_test, 
                                               y_pred = target_pred_log_base,
                                           sensitive_features= features_test["race"])

print(f"\nEqualized Odds Difference (EOD) für 'sex': {eq_odds_diff_sex:.4f}")
print(f"\nEqualized Odds Difference (EOD) für 'race': {eq_odds_diff_race:.4f}")

In [None]:
#Fairness-Metriken Demographic Parity Difference für RandomForest
dp_diff_sex = demographic_parity_difference(
    y_pred=target_pred_rf, 
    y_true=target_test,
    sensitive_features=features_test["sex"])
                                     
dp_diff_race = demographic_parity_difference(
    y_pred=target_pred_rf, 
    y_true=target_test,
    sensitive_features=features_test["race"])

print(f"\nDemographic Parity Difference für 'sex': {dp_diff_sex:.4f}")
print(f"Demographic Parity Difference für 'race': {dp_diff_race:.4f}")

In [None]:
#Fairness-Metriken Demographic Parity Difference für RandomForest mit SMOTE
dp_diff_sex = demographic_parity_difference(
    y_pred=target_pred_rf_smote, 
    y_true=target_test,
    sensitive_features=features_test["sex"])
                                     
dp_diff_race = demographic_parity_difference(
    y_pred=target_pred_rf_smote, 
    y_true=target_test,
    sensitive_features=features_test["race"])

print(f"\nDemographic Parity Difference für 'sex': {dp_diff_sex:.4f}")
print(f"Demographic Parity Difference für 'race': {dp_diff_race:.4f}")

In [None]:
#Fairness-Metriken Demographic Parity Difference für DecisionTree
dp_diff_sex = demographic_parity_difference(
    y_pred=target_pred_dt, 
    y_true=target_test,
    sensitive_features=features_test["sex"])
                                     
dp_diff_race = demographic_parity_difference(
    y_pred=target_pred_dt, 
    y_true=target_test,
    sensitive_features=features_test["race"])

print(f"\nDemographic Parity Difference für 'sex': {dp_diff_sex:.4f}")
print(f"Demographic Parity Difference für 'race': {dp_diff_race:.4f}")

In [None]:
#Fairness-Metriken Demographic Parity Difference für LogistischeRegression
dp_diff_sex = demographic_parity_difference(
    y_pred=target_pred_log_base, 
    y_true=target_test,
    sensitive_features=features_test["sex"])
                                     
dp_diff_race = demographic_parity_difference(
    y_pred=target_pred_log_base, 
    y_true=target_test,
    sensitive_features=features_test["race"])

print(f"\nDemographic Parity Difference für 'sex': {dp_diff_sex:.4f}")
print(f"Demographic Parity Difference für 'race': {dp_diff_race:.4f}")

In [None]:
def objective(trial):
    """Optimization of the RandomForest Hyperparameter"""

    #Searchspace
    n_estimators = trial.suggest_int("n_estimators", 50, 250)
    max_depth = trial.suggest_int("max_depth", 3, 15)
    max_features = trial.suggest_categorical("max_features", ["log2", "sqrt"])
    min_samples_split = trial.suggest_int("min_samples_split", 2, 10, step=2)
    min_samples_leaf = trial.suggest_int("min_samples_leaf", 1, 4)

    #Model
    params = {
        "n_estimators": n_estimators,
        "max_features": max_features,
        "max_depth": max_depth,
        "min_samples_split": min_samples_split,
        "min_samples_leaf": min_samples_leaf
    }

    model_rf = RandomForestClassifier(class_weight="balanced", random_state=42, **params)

    num_cols = features_train.select_dtypes(include=["int64"]).columns
    cat_cols = features_train.select_dtypes(include=["object"]).columns 

    preprocessor = ColumnTransformer([
        ("num", StandardScaler(), num_cols),
        ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols)
    ])
    
    pipeline = Pipeline([
        ("preprocessor", preprocessor),
        ("model", model_rf)
    ])

    score = cross_val_score(
        estimator=pipeline, 
        X=features_train, 
        y=target_train, 
        scoring="balanced_accuracy",
        cv=3,
        n_jobs=1
    ).mean()

    return score

# create a study and setting a seed for reproduceability
study = optuna.create_study(sampler=TPESampler(seed=42), direction='maximize')

# perform hyperparameter tuning
time_start = time.time()

# starting optimization process with our defined function 
study.optimize(objective, n_trials=20)
time_bayesian = time.time() - time_start

# store result in a data frame 
values_bayesian = [
    20, 
    study.best_trial.number, 
    study.best_trial.value, 
    time_bayesian
]

results_bayesian = pd.DataFrame([values_bayesian], columns=[
    "Number of iterations", 
    "Iteration Number of Optimal Hyperparameters", 
    "Score", 
    "Time Elapsed (s)"
])

# best hyperparameter
print("\nBeste Hyperparameter für Random Forest:")
print(study.best_trial.params)

# show results
print("\nOptimierungsergebnisse:")
print(results_bayesian)


In [None]:
# Anwendung der Hyperparameter

best_params = study.best_params

model_rf_ba = RandomForestClassifier(class_weight="balanced", random_state=42, **best_params)
pipeline_rf_ba = Pipeline([
    ("preprocessor", preprocessor),
    ("model", model_rf_ba)
])

pipeline_rf_ba.fit(features_train, target_train)
target_pred_rf_ba = pipeline_rf_ba.predict(features_test)


In [None]:
# Fairness-Metriken
# Fairness-Metriken Equalized Odds Difference für Logistische Regression
eq_odds_diff_sex = equalized_odds_difference(y_true = target_test, 
                                               y_pred = target_pred_rf_ba,
                                           sensitive_features= features_test["sex"])

eq_odds_diff_race = equalized_odds_difference(y_true = target_test, 
                                               y_pred = target_pred_rf_ba,
                                           sensitive_features= features_test["race"])

print(f"\nEqualized Odds Difference (EOD) für 'sex': {eq_odds_diff_sex:.4f}")
print(f"\nEqualized Odds Difference (EOD) für 'race': {eq_odds_diff_race:.4f}")

dp_diff_sex = demographic_parity_difference(
    y_pred=target_pred_rf_ba, 
    y_true=target_test,
    sensitive_features=features_test["sex"])
                                     
dp_diff_race = demographic_parity_difference(
    y_pred=target_pred_rf_ba, 
    y_true=target_test,
    sensitive_features=features_test["race"])

print(f"\nDemographic Parity Difference für 'sex': {dp_diff_sex:.4f}")
print(f"Demographic Parity Difference für 'race': {dp_diff_race:.4f}")

In [None]:
# Test ohne die Spalte fnlwgt für RandomForest
# Features:
features_train_mod = features_train.drop("fnlwgt", axis=1)
features_test_mod = features_test.drop("fnlwgt", axis=1)

# preprocessor
num_cols_mod = features_train_mod.select_dtypes(include=["int64"]).columns
cat_cols_mod = features_train_mod.select_dtypes(include=["object"]).columns 

preprocessor_mod = ColumnTransformer([
    ("num", StandardScaler(), num_cols_mod),
    ("cat", OneHotEncoder(), cat_cols_mod)
])
# Modell
pipeline_rf_mod = Pipeline([
    ("preprocessor", preprocessor_mod),
    ("model", RandomForestClassifier(class_weight="balanced", random_state=42))
])

#Anpassen des Modells
pipeline_rf_mod.fit(features_train_mod, target_train)

#Vorhersage
target_pred_rf_mod = pipeline_rf_mod.predict(features_test_mod)



In [None]:
def objective(trial):
    """Optimization of the RandomForest Hyperparameter"""

    #Searchspace
    n_estimators = trial.suggest_int("n_estimators", 50, 250)
    max_depth = trial.suggest_int("max_depth", 3, 15)
    max_features = trial.suggest_categorical("max_features", ["log2", "sqrt"])
    min_samples_split = trial.suggest_int("min_samples_split", 2, 10, step=2)
    min_samples_leaf = trial.suggest_int("min_samples_leaf", 1, 4)

    #Model
    params = {
        "n_estimators": n_estimators,
        "max_features": max_features,
        "max_depth": max_depth,
        "min_samples_split": min_samples_split,
        "min_samples_leaf": min_samples_leaf
    }

    model_rf_mod = RandomForestClassifier(class_weight="balanced", random_state=42, **params)

    num_cols_mod = features_train_mod.select_dtypes(include=["int64"]).columns
    cat_cols_mod = features_train_mod.select_dtypes(include=["object"]).columns 

    preprocessor_mod = ColumnTransformer([
        ("num", StandardScaler(), num_cols_mod),
        ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols_mod)
    ])

    
    pipeline_mod = Pipeline([
        ("preprocessor", preprocessor_mod),
        ("model", model_rf_mod)
    ])

    score = cross_val_score(
        estimator=pipeline_mod, 
        X=features_train_mod, 
        y=target_train, 
        scoring="balanced_accuracy",
        cv=3,
        n_jobs=1
    ).mean()

    return score

# create a study and setting a seed for reproduceability
study = optuna.create_study(sampler=TPESampler(seed=42), direction='maximize')

# perform hyperparameter tuning
time_start = time.time()

# starting optimization process with our defined function 
study.optimize(objective, n_trials=20)
time_bayesian = time.time() - time_start

# store result in a data frame 
values_bayesian = [
    20, 
    study.best_trial.number, 
    study.best_trial.value, 
    time_bayesian
]

results_bayesian = pd.DataFrame([values_bayesian], columns=[
    "Number of iterations", 
    "Iteration Number of Optimal Hyperparameters", 
    "Score", 
    "Time Elapsed (s)"
])

# best hyperparameter
print("\nBeste Hyperparameter für Random Forest:")
print(study.best_trial.params)

# show results
print("\nOptimierungsergebnisse:")
print(results_bayesian)

In [None]:
best_params_mod = study.best_params

model_rf_ba_mod = RandomForestClassifier(class_weight="balanced", random_state=42, **best_params)
pipeline_rf_ba_mod = Pipeline([
    ("preprocessor", preprocessor_mod),
    ("model", model_rf_ba_mod)
])

pipeline_rf_ba_mod.fit(features_train_mod, target_train)
target_pred_rf_ba_mod = pipeline_rf_ba_mod.predict(features_test_mod)

In [None]:
pred_target_list = [target_pred_log_base, target_pred_dt, target_pred_rf, target_pred_rf_ba, target_pred_rf_mod, target_pred_rf_ba_mod]
model_name = ["LogisticRegression", "DecisionTree", "RandomForest", "RandomForestBaysian", "Modifizierter RandomForest", "Modifizierter RandomForestBaysian"]
mod_qual = []

for target in pred_target_list:
    precision = precision_score(target_test, target)
    recall = recall_score(target_test, target)
    f1 = f1_score(target_test, target)
    roc_auc = roc_auc_score(target_test, target)
    
    mask = features_test["sex"] == 1
    roc_auc_male = roc_auc_score(target_test[mask], target[mask])

    mask = features_test["sex"] == 0
    roc_auc_female = roc_auc_score(target_test[mask], target[mask])

    mask = features_test["race"] == "White"
    roc_auc_White = roc_auc_score(target_test[mask], target[mask])

    mask = features_test["race"] != "White"
    roc_auc_Non_White = roc_auc_score(target_test[mask], target[mask])

    eq_odds_diff_sex = equalized_odds_difference(y_true = target_test, 
                                               y_pred = target,
                                           sensitive_features= features_test["sex"])

    eq_odds_diff_race = equalized_odds_difference(y_true = target_test, 
                                               y_pred = target,
                                           sensitive_features= features_test["race"])
    di_sex = demographic_parity_ratio(
        y_pred = target,
        y_true = target_test,
        sensitive_features=features_test["sex"]
    )
    
    di_race = demographic_parity_ratio(
        y_pred = target,
        y_true = target_test,
        sensitive_features=features_test["race"]
    )

    dp_diff_sex = demographic_parity_difference(
        y_pred=target, 
        y_true=target_test,
        sensitive_features=features_test["sex"])
                                     
    dp_diff_race = demographic_parity_difference(
        y_pred=target, 
        y_true=target_test,
        sensitive_features=features_test["race"])
    
    mod_qual.append({
                    "precision": (precision*100), "recall": (recall*100), "f1":(f1*100), "ROC-AUC": (roc_auc*100),
                    "ROC-AUC-Male": (roc_auc_male*100),
                    "ROC-AUC-Female": (roc_auc_female*100),
                    "ROC-AUC-White": (roc_auc_White*100),
                    "ROC-AUC-Non-White": (roc_auc_Non_White*100),
                    "EOD 'sex'": (eq_odds_diff_sex),
                    "EOD 'race'": (eq_odds_diff_race),
                    "DI 'sex'": (di_sex),
                    "DI 'race'": (di_race),
                    "DPD 'sex'": (dp_diff_sex),
                    "DPD 'race'": (dp_diff_race)
                    })
    
df_mod_qual = pd.DataFrame(mod_qual, index=model_name)
df_mod_qual

#### Bewertung
1. Wichtigste Erkenntnisse aus den Fairness-Metriken:
- DecisionTree ist am fairsten für "race", hat aber die schlechteste ROC-AUC (74.71)
- RandomForest ist am fairsten für "sex", zeigt aber eine starke Diskriminierung für "race"
- Logistic Regression und Bayesian-Modelle sind die am stärksten diskriminierenden Modelle.
- Der Modifizierte RandomForest bietet eine gute Balance zwischen Fairness und Performance.

2. Abwägung zwischen Fairness und Performance
- Höchste Performance: RandomForestBayesian (83.03 ROC-AUC), aber hohe Diskriminierung.
- Fairstes Modell für "race": DecisionTree, aber schwache Performance.
- Fairstes Modell für "sex": RandomForest.
- Bester Kompromiss: Modifizierter RandomForest (gute Balance zwischen Fairness und Leistung).

3. VWeiterführung:
- Fainess-Korrekturen an dem optimierten RandomForest 
    - Reweighing (durch die Spalte "fnlwgt") bereits implementiert
    - In-Processing-Techniken: Fainess Constraints 
    - Post-Processing: Fainess-Korrektur





In [None]:
from fairlearn.reductions import ExponentiatedGradient, DemographicParity

# RandomForest mit einer Fairness-Beschränkung
constraint = DemographicParity()  # Alternativ: EqualizedOdds() -> würde noch länger laufen
fair_model_rf = ExponentiatedGradient(RandomForestClassifier(n_estimators=100, random_state=42), constraints=constraint)

# Preprocessor
X_train_processed = preprocessor.fit_transform(features_train)
X_train_processed = X_train_processed.toarray()

X_test_processed = preprocessor.transform(features_test)
X_test_processed = X_test_processed.toarray()

# Trainiere das Modell
fair_model_rf.fit(X_train_processed, target_train, sensitive_features=features_train[["sex", "race"]])

# Vorhersagen
target_pred_fair_rf = fair_model_rf.predict(X_test_processed)

In [None]:
# Logistische Regression mit einer Fairness-Beschränkung
constraint = DemographicParity()  # Alternativ: EqualizedOdds()
fair_model_log = ExponentiatedGradient(LogisticRegression(max_iter=1000, random_state=42), constraints=constraint)

# Preprocessor -> wurde bereits durchgeführt

# Trainiere das Modell
fair_model_log.fit(X_train_processed, target_train, sensitive_features=features_train[["sex", "race"]])

# Vorhersagen
target_pred_fair_log = fair_model_log.predict(X_test_processed)

In [None]:
pred_target_list_fair = [target_pred_log_base, target_pred_fair_log, target_pred_rf, target_pred_rf_ba, target_pred_fair_rf]
model_name = ["LogisticRegression", "Faire LogiticRegression", "RandomForest", "RandomForestBaysian", "Fairer RandomForest"]
mod_qual = []

for target in pred_target_list_fair:
    precision = precision_score(target_test, target)
    recall = recall_score(target_test, target)
    f1 = f1_score(target_test, target)
    roc_auc = roc_auc_score(target_test, target)
    
    mask = features_test["sex"] == 1
    roc_auc_male = roc_auc_score(target_test[mask], target[mask])

    mask = features_test["sex"] == 0
    roc_auc_female = roc_auc_score(target_test[mask], target[mask])

    mask = features_test["race"] == "White"
    roc_auc_White = roc_auc_score(target_test[mask], target[mask])

    mask = features_test["race"] != "White"
    roc_auc_Non_White = roc_auc_score(target_test[mask], target[mask])

    eq_odds_diff_sex = equalized_odds_difference(y_true = target_test, 
                                               y_pred = target,
                                           sensitive_features= features_test["sex"])

    eq_odds_diff_race = equalized_odds_difference(y_true = target_test, 
                                               y_pred = target,
                                           sensitive_features= features_test["race"])
    di_sex = demographic_parity_ratio(
        y_pred = target,
        y_true = target_test,
        sensitive_features=features_test["sex"]
    )
    
    di_race = demographic_parity_ratio(
        y_pred = target,
        y_true = target_test,
        sensitive_features=features_test["race"]
    )

    dp_diff_sex = demographic_parity_difference(
        y_pred=target, 
        y_true=target_test,
        sensitive_features=features_test["sex"])
                                     
    dp_diff_race = demographic_parity_difference(
        y_pred=target, 
        y_true=target_test,
        sensitive_features=features_test["race"])
    
    mod_qual.append({
                    "precision": (precision*100), "recall": (recall*100), "f1":(f1*100), "ROC-AUC": (roc_auc*100),
                    "ROC-AUC-Male": (roc_auc_male*100),
                    "ROC-AUC-Female": (roc_auc_female*100),
                    "ROC-AUC-White": (roc_auc_White*100),
                    "ROC-AUC-Non-White": (roc_auc_Non_White*100),
                    "EOD 'sex'": (eq_odds_diff_sex),
                    "EOD 'race'": (eq_odds_diff_race),
                    "DI 'sex'": (di_sex),
                    "DI 'race'": (di_race),
                    "DPD 'sex'": (dp_diff_sex),
                    "DPD 'race'": (dp_diff_race)
                    })
    
df_mod_qual = pd.DataFrame(mod_qual, index=model_name)
df_mod_qual

### Bewertung nach Fairness Constraints
1. Fairness-Verbesserung
- Logistische Regression
    - EOD-Werte gesunken für "race", aber leicht gestiegen für "sex".
    - Disparate Impact (DI) hat sich stark verbessert → von 0.277 auf 0.838 (für "sex") und 0.375 auf 0.655 (für "race").
    - DPD-Werte sind stark gesunken, was bedeutet, dass das Modell jetzt deutlich fairere Vorhersageverteilungen macht.
    FAZIT: Starke Verbesserung der Fairness, aber auf Kosten der Performance.
- RandomForest
    - OD-Werte haben sich verbessert (besonders für "race", von 0.464 auf 0.363).
    - Disparate Impact (DI) ist fast perfekt für "sex" (0.9487) und stark verbessert für "race" (0.7839).
    - DPD-Werte sind extrem gesunken, also kaum noch Unterschiede in den positiven Vorhersagen.
    FAZIT: Beste Fairness-Verbesserung mit minimalem Performance-Verlust!

2. Performance-Verlust durch Fairness-Optimierung
- Logistic Regression:
    - ROC-AUC ist stark gesunken von 82.51 auf 71.98 → Bedeutender Verlust an Modellqualität.
    - Precision ist gestiegen, aber Recall ist stark gefallen → Das Modell ist nun sehr konservativ mit positiven Vorhersagen.
    FAZIT: Starke Fairness-Verbesserung, aber das Modell ist insgesamt schwächer.

- Random Forest:
    - ROC-AUC hat sich nur leicht verschlechtert (77.19 → 73.91).
    - Precision hat stark gelitten (73.16 → 55.32), aber Recall ist leicht gestiegen.
    FAZIT: Besserer Trade-off zwischen Fairness und Leistung als Logistic Regression.

Fairer Random Forest ist der beste Kompromiss zwischen Fairness und Modellqualität.

In [None]:
# Test, ob Postprocessing weiter die Fairness beeinflusst

# Threshold Optimizer mit Demographic Parity
postprocess_model = ThresholdOptimizer(
    estimator=fair_model_rf,  
    constraints="demographic_parity",  # Alternativ: "equalized_odds"
    prefit=True  # Modell ist schon trainiert
)

# Trainiere den Postprocessor
postprocess_model.fit(X_train_processed, target_train, sensitive_features=features_train[["sex", "race"]])

# Berechne faire Vorhersagen
target_pred_fair_rf_post = postprocess_model.predict(X_test_processed, sensitive_features=features_test[["sex", "race"]])



In [None]:
# Threshold Optimizer mit Demographic Parity
postprocess_model_eq = ThresholdOptimizer(
    estimator=fair_model_rf,  
    constraints="equalized_odds",  
    prefit=True 
)

# Trainiere den Postprocessor
postprocess_model_eq.fit(X_train_processed, target_train, sensitive_features=features_train[["sex", "race"]])

# Berechne faire Vorhersagen
target_pred_fair_rf_post_eq = postprocess_model.predict(X_test_processed, sensitive_features=features_test[["sex", "race"]])



In [None]:
pred_target_list_fair = [target_pred_rf, target_pred_rf_ba, target_pred_fair_rf, target_pred_fair_rf_post, target_pred_fair_rf_post_eq]
model_name = ["RandomForest", "RandomForestBaysian", "Fairer RandomForest", "Fairer RandomForest Postprocessing", "Fairer RandomForest Postprocessing_Eq"]
mod_qual = []

for target in pred_target_list_fair:
    precision = precision_score(target_test, target)
    recall = recall_score(target_test, target)
    f1 = f1_score(target_test, target)
    roc_auc = roc_auc_score(target_test, target)
    
    mask = features_test["sex"] == 1
    roc_auc_male = roc_auc_score(target_test[mask], target[mask])

    mask = features_test["sex"] == 0
    roc_auc_female = roc_auc_score(target_test[mask], target[mask])

    mask = features_test["race"] == "White"
    roc_auc_White = roc_auc_score(target_test[mask], target[mask])

    mask = features_test["race"] != "White"
    roc_auc_Non_White = roc_auc_score(target_test[mask], target[mask])

    eq_odds_diff_sex = equalized_odds_difference(y_true = target_test, 
                                               y_pred = target,
                                           sensitive_features= features_test["sex"])

    eq_odds_diff_race = equalized_odds_difference(y_true = target_test, 
                                               y_pred = target,
                                           sensitive_features= features_test["race"])
    di_sex = demographic_parity_ratio(
        y_pred = target,
        y_true = target_test,
        sensitive_features=features_test["sex"]
    )
    
    di_race = demographic_parity_ratio(
        y_pred = target,
        y_true = target_test,
        sensitive_features=features_test["race"]
    )

    dp_diff_sex = demographic_parity_difference(
        y_pred=target, 
        y_true=target_test,
        sensitive_features=features_test["sex"])
                                     
    dp_diff_race = demographic_parity_difference(
        y_pred=target, 
        y_true=target_test,
        sensitive_features=features_test["race"])
    
    mod_qual.append({
                    "precision": (precision*100), "recall": (recall*100), "f1":(f1*100), "ROC-AUC": (roc_auc*100),
                    "ROC-AUC-Male": (roc_auc_male*100),
                    "ROC-AUC-Female": (roc_auc_female*100),
                    "ROC-AUC-White": (roc_auc_White*100),
                    "ROC-AUC-Non-White": (roc_auc_Non_White*100),
                    "EOD 'sex'": (eq_odds_diff_sex),
                    "EOD 'race'": (eq_odds_diff_race),
                    "DI 'sex'": (di_sex),
                    "DI 'race'": (di_race),
                    "DPD 'sex'": (dp_diff_sex),
                    "DPD 'race'": (dp_diff_race)
                    })
    
df_mod_qual = pd.DataFrame(mod_qual, index=model_name)
df_mod_qual

### Bewertung
Equalized Odds ist die beste Fairness-Korrektur für "race" (EOD stark verbessert von 0.7143 auf 0.2230).
Hinsichtlich Performance nur geringe Einbuße durch Postprocessing!

ToDos:
Visualisierung der Metriken
Aufräumen

Vergleich mit LangChain

Projekt-Struktur weiter aufbauen

optional: neuronales Netz
Claude Code?