In [None]:
# Measurment of Demographic Parity in Machine Learning Predictions

In [None]:
!pip install fairlearn

Collecting fairlearn
  Downloading fairlearn-0.13.0-py3-none-any.whl.metadata (7.3 kB)
Collecting scipy<1.16.0,>=1.9.3 (from fairlearn)
  Downloading scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Downloading fairlearn-0.13.0-py3-none-any.whl (251 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m251.6/251.6 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (37.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m37.3/37.3 MB[0m [31m52.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: scipy, fairlearn
  Attempting uninstall: scipy
    Found existing installation: scipy 1.16.3
    Uninstalling scipy-1.16.3:
      Successfully uninstalled scipy-1.16.3
Successfully installed fairlearn-0.13.0 sc

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from fairlearn.metrics import MetricFrame, demographic_parity_ratio, demographic_parity_difference, selection_rate
import seaborn as sns

In [None]:
df = sns.load_dataset('titanic')
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [None]:
# Step 2: Data preprocessing

# dropping unnecessary columns
df.drop(columns=['class', 'who', 'adult_male', 'deck', 'embark_town', 'alive', 'alone'], inplace=True)

# dropping rows with missing values
df.dropna(inplace=True)

# Label Encoding the sex and embarked columns
le = LabelEncoder()
df['sex'] = le.fit_transform(df['sex'])
df['embarked'] = le.fit_transform(df['embarked'])

# Creating feature and target variables
X = df.drop(columns=['survived'])
y = df['survived']

# train test split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

# using standard scaler for feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

print("Data preprocessing done successfully")

Data preprocessing done successfully


In [None]:
# Train the Logistic Regression model

# Initialize the model
model = LogisticRegres

# Train the model
model.fit(X_train, y_train)

# Test the model
y_pred = model.predict(X_test)

# get the metrics
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: ", accuracy)



Accuracy:  0.7990654205607477


In [None]:
# Measure Demographic Parity

# Sensitive feature: sex
sensitive_feature = df.loc[y_test.index, 'sex']

# Create metric frame to compute fairness and keep accuracy and selection rate
metric_frame = MetricFrame(
    metrics={
        "accuracy": accuracy_score,
        "selection_rate": selection_rate
    },
    y_true=y_test,
    y_pred=y_pred,
    sensitive_features=sensitive_feature
)

dp_differ = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=sensitive_feature)
dp_ratio = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=sensitive_feature)

print("Demographic Parity Difference: ", dp_differ)
print("Demographic Parity Ratio: ", dp_ratio)

print(metric_frame.by_group)


Demographic Parity Difference:  0.7825386292539577
Demographic Parity Ratio:  0.10066456040963069
     accuracy  selection_rate
sex                          
0    0.857143        0.870130
1    0.766423        0.087591
