In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from aif360.datasets import BinaryLabelDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.algorithms.preprocessing import Reweighing

# Load the dataset from local file
file_path = "bank-additional/bank-additional-full.csv"  # Update the file path with your local file location
df = pd.read_csv(file_path, delimiter=";")

# Preprocess the data
# Convert categorical variables into dummy/indicator variables
df = pd.get_dummies(df, columns=["job", "marital", "education", "default", "housing", "loan", "contact", "month", "day_of_week", "poutcome"])

# Convert 'yes' and 'no' values to 1 and 0
df.replace({'yes': 1, 'no': 0}, inplace=True)

# Split data into features and target variable
X = df.drop(columns=["y"])
y = df["y"]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest classifier
clf = RandomForestClassifier()
clf.fit(X_train, y_train)

# Predict on the test set
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Convert pandas DataFrame to AIF360 BinaryLabelDataset
train_dataset = BinaryLabelDataset(df=pd.concat([X_train, y_train], axis=1), label_names=['y'], protected_attribute_names=['job_blue-collar'],
                                   favorable_label=1, unfavorable_label=0)
test_dataset = BinaryLabelDataset(df=pd.concat([X_test, y_test], axis=1), label_names=['y'], protected_attribute_names=['job_blue-collar'],
                                  favorable_label=1, unfavorable_label=0)

# Train a Random Forest classifier on the AIF360 dataset
clf_aif360 = RandomForestClassifier()
clf_aif360.fit(train_dataset.features, train_dataset.labels.ravel())

# Predict on the AIF360 test set
y_pred_aif360 = clf_aif360.predict(test_dataset.features)

# Evaluate the AIF360 model
accuracy_aif360 = accuracy_score(test_dataset.labels.ravel(), y_pred_aif360)
print("Fairness-aware Accuracy:", accuracy_aif360)

# Calculate fairness metrics
metric = BinaryLabelDatasetMetric(test_dataset, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)
print("Disparate Impact:", metric.disparate_impact())
print("Mean Difference:", metric.mean_difference())


  df.replace({'yes': 1, 'no': 0}, inplace=True)


Accuracy: 0.9075018208302986
Fairness-aware Accuracy: 0.9101723719349356
Disparate Impact: 0.5111180796504041
Mean Difference: -0.062185354028587866
