In [11]:
# Install required packages
!pip install pandas scikit-learn aif360 --quiet

# Some dependencies for AIF360
!apt-get install -y -qq libatlas-base-dev

# Verify installations
import pandas as pd
import sklearn
import aif360

print("Pandas version:", pd.__version__)
print("Scikit-learn version:", sklearn.__version__)
print("AIF360 version:", aif360.__version__)


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/259.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m256.0/259.7 kB[0m [31m8.2 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m259.7/259.7 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25hPandas version: 2.2.2
Scikit-learn version: 1.6.1
AIF360 version: 0.6.1


In [19]:
# Install dependencies if you haven't
# !pip install pandas scikit-learn aif360

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
from aif360.datasets import BinaryLabelDataset
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric

# 1. Load dataset
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
columns = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness',
           'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome']
data = pd.read_csv(url, names=columns)



In [20]:
# 2. Define features and target
X = data.drop('Outcome', axis=1)
y = data['Outcome']

# 3. Create a sensitive attribute for fairness checking
# Let's assume "Age >= 30" as privileged group, "Age < 30" as unprivileged
X['Age_group'] = (X['Age'] >= 30).astype(int)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train.drop('Age_group', axis=1))
X_test_scaled = scaler.transform(X_test.drop('Age_group', axis=1))



In [21]:

# 4. Train a logistic regression model
model = LogisticRegression()
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)



In [22]:
# 5. Evaluate accuracy
acc = accuracy_score(y_test, y_pred)
print(f"Accuracy: {acc:.4f}")

# 6. Prepare data for AIF360
# Combine features with sensitive attribute for AIF360
X_train_aif = pd.DataFrame(X_train_scaled, columns=X_train.drop('Age_group', axis=1).columns)
X_train_aif['Age_group'] = X_train['Age_group'].values

X_test_aif = pd.DataFrame(X_test_scaled, columns=X_test.drop('Age_group', axis=1).columns)
X_test_aif['Age_group'] = X_test['Age_group'].values

train_bld = BinaryLabelDataset(
    df=pd.concat([X_train_aif, y_train.reset_index(drop=True)], axis=1),
    label_names=['Outcome'],
    protected_attribute_names=['Age_group']
)

test_bld = BinaryLabelDataset(
    df=pd.concat([X_test_aif, y_test.reset_index(drop=True)], axis=1),
    label_names=['Outcome'],
    protected_attribute_names=['Age_group']
)

# 7. Fairness metrics
metric_train = BinaryLabelDatasetMetric(train_bld, unprivileged_groups=[{'Age_group': 0}],
                                        privileged_groups=[{'Age_group': 1}])
print("Train dataset mean outcome for unprivileged group:", metric_train.mean_outcome_unprivileged())
print("Train dataset mean outcome for privileged group:", metric_train.mean_outcome_privileged())
print("Disparate impact (train):", metric_train.disparate_impact())

# Evaluate model fairness on test set
test_bld_pred = test_bld.copy()
test_bld_pred.labels = y_pred.reshape(-1, 1)

classified_metric = ClassificationMetric(test_bld, test_bld_pred,
                                         unprivileged_groups=[{'Age_group': 0}],
                                         privileged_groups=[{'Age_group': 1}])
print("Accuracy:", acc)
print("Statistical parity difference:", classified_metric.statistical_parity_difference())
print("Equal opportunity difference:", classified_metric.equal_opportunity_difference())
print("Average odds difference:", classified_metric.average_odds_difference())

Accuracy: 0.7143


In [27]:
# Fairness metrics using current AIF360 API
metric_train = BinaryLabelDatasetMetric(train_bld,
                                        unprivileged_groups=[{'Age_group': 0}],
                                        privileged_groups=[{'Age_group': 1}])

# Disparate impact
print("Disparate impact (train):", metric_train.disparate_impact)

# Mean outcomes
unpriv_mean = metric_train.base_rate(privileged=False)  # unprivileged group mean
priv_mean = metric_train.base_rate(privileged=True)     # privileged group mean
print("Train dataset mean outcome for unprivileged group:", unpriv_mean)
print("Train dataset mean outcome for privileged group:", priv_mean)


Disparate impact (train): <bound method BinaryLabelDatasetMetric.disparate_impact of <aif360.metrics.binary_label_dataset_metric.BinaryLabelDatasetMetric object at 0x7f3dc41bacf0>>
Train dataset mean outcome for unprivileged group: 0.21864951768488747
Train dataset mean outcome for privileged group: 0.48184818481848185


In [28]:

# Apply Reweighing for bias mitigation
from aif360.algorithms.preprocessing import Reweighing

# Define privileged and unprivileged groups
privileged_groups = [{'Age_group': 1}]
unprivileged_groups = [{'Age_group': 0}]

# Initialize and fit Reweighing
rw = Reweighing(unprivileged_groups=unprivileged_groups,
                privileged_groups=privileged_groups)
rw.fit(train_bld)

# Transform training dataset with instance weights
train_bld_transf = rw.transform(train_bld)

# Retrain logistic regression using instance weights
model_rw = LogisticRegression()
model_rw.fit(X_train_scaled, y_train, sample_weight=train_bld_transf.instance_weights)

# Predict and evaluate after mitigation
y_pred_rw = model_rw.predict(X_test_scaled)
print("\nAccuracy after mitigation:", accuracy_score(y_test, y_pred_rw))

# Fairness metrics after mitigation
test_bld_pred_rw = test_bld.copy()
test_bld_pred_rw.labels = y_pred_rw.reshape(-1, 1)

classified_metric_rw = ClassificationMetric(test_bld, test_bld_pred_rw,
                                            unprivileged_groups=unprivileged_groups,
                                            privileged_groups=privileged_groups)

print("Statistical Parity Difference after mitigation:", classified_metric_rw.statistical_parity_difference())
print("Equal Opportunity Difference after mitigation:", classified_metric_rw.equal_opportunity_difference())
print("Average Odds Difference after mitigation:", classified_metric_rw.average_odds_difference())



Accuracy after mitigation: 0.7077922077922078
Statistical Parity Difference after mitigation: -0.17681159420289855
Equal Opportunity Difference after mitigation: -0.0625
Average Odds Difference after mitigation: -0.07168945769050958
