In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score
from aif360.algorithms.preprocessing import Reweighing, DisparateImpactRemover
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.datasets import BinaryLabelDataset
import aif360.datasets.german_credit_dataset as gcr
from aif360.datasets import StandardDataset
import alibi.explainers # ALIBI implementation
import gif # Google Impact Framework implementation

# Load the dataset
data_orig = gcr.GermanCreditDataset(protected_attribute_names=['Gender', 'Race'],
         privileged_classes=[['Male'], ['White']])
data_orig_train, data_orig_test = data_orig.split([0.7], shuffle=True)

# Impute missing values
imputer = SimpleImputer(strategy='mean')
data_orig_train.features = imputer.fit_transform(data_orig_train.features)
data_orig_test.features = imputer.transform(data_orig_test.features)

# Train fairness-aware model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(data_orig_train.features, data_orig_train.labels.ravel())

# Statistical analysis with AIF360
privileged_groups = [{'Gender': 1, 'Race': 1}] # Assuming 1 represents privileged
unprivileged_groups = [{'Gender': 0, 'Race': 0}] # Assuming 0 represents unprivileged

# Create BinaryLabelDataset for AIF360 metrics
data_pred = data_orig_test.copy()
data_pred.labels = model.predict(data_orig_test.features)

metric_orig_train = BinaryLabelDatasetMetric(data_orig_train, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)
metric_orig_test = BinaryLabelDatasetMetric(data_orig_test, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)
metric_pred = BinaryLabelDatasetMetric(data_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)

print("Original training set: Disparate Impact =", metric_orig_train.disparate_impact())
print("Original test set: Disparate Impact =", metric_orig_test.disparate_impact())
print("Predicted test set: Disparate Impact =", metric_pred.disparate_impact())

# Bias mitigation with AIF360
rw = Reweighing(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)
data_reweighted_train = rw.fit_transform(data_orig_train)

di_removal = DisparateImpactRemover(repair_level=1.0)
data_repaired_train = di_removal.fit_transform(data_orig_train)

model_reweighted = RandomForestClassifier(n_estimators=100, random_state=42)
model_repaired = RandomForestClassifier(n_estimators=100, random_state=42)

model_reweighted.fit(data_reweighted_train.features, data_reweighted_train.labels.ravel())
model_repaired.fit(data_repaired_train.features, data_repaired_train.labels.ravel())



# Use Google Impact Framework for broader ethical analysis
ethical_analyzer = gif.EthicalAnalyzer()
contextual_insights = ethical_analyzer.analyze(data_orig_test.features, data_orig_test.labels)

# Comprehensive ethical analysis with AIF360
print("Group Fairness Metrics:")
print("----------------------------------")
print("Statistical Parity Difference:", metric_orig_test.statistical_parity_difference())
print("Equal Opportunity Difference:", metric_orig_test.equal_opportunity_difference())
print("Average Odds Difference:", metric_orig_test.average_odds_difference())
print("Theil Index:", ClassificationMetric(metric_orig_test, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups).theil_index())
print("----------------------------------")

# Subgroup Analysis for Intersectional Biases
subgroup_metric = metric_orig_test.get_metric_by_group(unprivileged_groups=[{'Gender': 0, 'Race': 0}], privileged_groups=[{'Gender': 1, 'Race': 1}])
print("Subgroup Analysis (Gender=0, Race=0):")
print("Statistical Parity Difference:", subgroup_metric.statistical_parity_difference())
print("Equal Opportunity Difference:", subgroup_metric.equal_opportunity_difference())
print("Average Odds Difference:", subgroup_metric.average_odds_difference())
print("----------------------------------")


# Expand to include more diverse subgroups
for gender in [0, 1]:
    for race in [0, 1]:
        subgroup_metric = metric_orig_test.get_metric_by_group(
            unprivileged_groups=[{'Gender': gender, 'Race': race}],
            privileged_groups=[{'Gender': 1 - gender, 'Race': 1 - race}]
        )
        print(f"Subgroup Analysis (Gender={gender}, Race={race}):")
        print("Statistical Parity Difference:", subgroup_metric.statistical_parity_difference())
        print("Equal Opportunity Difference:", subgroup_metric.equal_opportunity_difference())
        print("Average Odds Difference:", subgroup_metric.average_odds_difference())
        print("----------------------------------")


# Use ALIBI for model explainability
explainer = alibi.explainers.TreeExplainer(model_repaired)
shap_values = explainer.shap_values(data_orig_test.features)