In [2]:
%pip install fairlearn

Collecting fairlearn
  Downloading fairlearn-0.12.0-py3-none-any.whl.metadata (7.0 kB)
Downloading fairlearn-0.12.0-py3-none-any.whl (240 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/240.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.7/240.0 kB[0m [31m2.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m240.0/240.0 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fairlearn
Successfully installed fairlearn-0.12.0


In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from fairlearn.metrics import MetricFrame, selection_rate, demographic_parity_difference, demographic_parity_ratio
import zipfile
import os

# Load and preprocess
url = ('/content/adult.zip')

# Extract the desired file from the zip archive
with zipfile.ZipFile(url, 'r') as zip_ref:
    zip_ref.extract('adult.data', '/content/') # Extract 'adult.data' to /content/

# Construct the path to the extracted file
extracted_file_path = '/content/adult.data'

# Read the extracted CSV file
data = pd.read_csv(extracted_file_path, header=None, na_values=' ?')
data.columns = ['age', 'workclass', 'fnlwgt', 'education', 'education.num', 'marital.status', 'occupation', 'relationship', 'race', 'sex', 'capital.gain', 'capital.loss', 'hours.per.week', 'native.country', 'income']
data = data[['age', 'education.num', 'hours.per.week', 'sex', 'income']]
data = pd.get_dummies(data, drop_first=True)
data['sex_Female'] = 1 - data['sex_ Male']

X = data.drop(columns=['income_ >50K', 'sex_ Male'])
y = data['income_ >50K']
sensitive = data['sex_Female']

# Train-test split
X_train, X_test, y_train, y_test, sens_train, sens_test = train_test_split(
    X, y, sensitive, test_size=0.3, random_state=42, stratify=y
)

# Model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Fairness metrics
metric_frame = MetricFrame(metrics=selection_rate, y_true=y_test, y_pred=y_pred, sensitive_features=sens_test)
print("Selection Rates by Gender (Female=1, Male=0):\n", metric_frame.by_group)

dp_diff = demographic_parity_difference(y_test, y_pred, sensitive_features=sens_test)
dp_ratio = demographic_parity_ratio(y_test, y_pred, sensitive_features=sens_test)
print(f"Demographic Parity Difference: {dp_diff:.3f}")
print(f"Demographic Parity Ratio: {dp_ratio:.3f}")

Selection Rates by Gender (Female=1, Male=0):
 sex_Female
0    0.200913
1    0.015005
Name: selection_rate, dtype: float64
Demographic Parity Difference: 0.186
Demographic Parity Ratio: 0.075
