### Demo

In [1]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from fairlearn.metrics import MetricFrame, selection_rate, demographic_parity_difference
from fairlearn.reductions import ExponentiatedGradient, DemographicParity
from fairlearn.postprocessing import ThresholdOptimizer
from imblearn.over_sampling import SMOTE

#### Load dataset

In [6]:
col_names = [
    'age','workclass','fnlwgt','education','education-num',
    'marital-status','occupation','relationship','race','sex',
    'capital-gain','capital-loss','hours-per-week','native-country','income'
]
df = pd.read_csv("adult.csv", names=col_names, na_values=" ?", skipinitialspace=True)
df = df.dropna()

In [7]:
df.head()

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,<=50K
1,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
2,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
3,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
4,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K


#### Train Test Split

In [8]:
X = pd.get_dummies(df.drop('income', axis=1), drop_first=True)
y = (df['income'] == '>50K').astype(int)
sensitive_feature = df['sex']

X_train, X_test, y_train, y_test, s_train, s_test = train_test_split(
    X, y, sensitive_feature, test_size=0.3, random_state=42
)

#### Metrics Evaluation

In [15]:
def evaluate_model(model, X_t, y_t, s_t):
    if isinstance(model, ThresholdOptimizer):
        y_pred = model.predict(X_t, sensitive_features=s_t)
    else:
        y_pred = model.predict(X_t)

    mf = MetricFrame(
        metrics={"accuracy": accuracy_score, "selection_rate": selection_rate},
        y_true=y_t,
        y_pred=y_pred,
        sensitive_features=s_t
    )
    print(mf.by_group)
    print("Demographic Parity Difference:",
          demographic_parity_difference(y_t, y_pred, sensitive_features=s_t))
    print("-" * 50)


In [16]:
import warnings
warnings.filterwarnings('ignore')

#### BaseLine

In [17]:
print("\n=== BASELINE (No Mitigation) ===")
baseline_model = LogisticRegression(max_iter=1000)
baseline_model.fit(X_train, y_train)
evaluate_model(baseline_model, X_test, y_test, s_test)


=== BASELINE (No Mitigation) ===
        accuracy  selection_rate
sex                             
Female  0.918777        0.079988
Male    0.808452        0.238095
Demographic Parity Difference: 0.15810759139974703
--------------------------------------------------


#### Reweighting

In [18]:


print("\n=== MITIGATION 1: Reweighting ===")
constraint = DemographicParity()
reweight_model = ExponentiatedGradient(
    estimator=LogisticRegression(max_iter=1000),
    constraints=constraint
)

reweight_model.fit(X_train, y_train, sensitive_features=s_train)
evaluate_model(reweight_model, X_test, y_test, s_test)


=== MITIGATION 1: Reweighting ===
        accuracy  selection_rate
sex                             
Female  0.877085        0.168623
Male    0.811208        0.175318
Demographic Parity Difference: 0.006695108963390151
--------------------------------------------------


#### Oversampling

In [19]:
print("\n=== MITIGATION 2: Oversampling ===")
X_res, y_res = SMOTE().fit_resample(X_train, y_train)
oversample_model = LogisticRegression(max_iter=1000)
oversample_model.fit(X_res, y_res)
evaluate_model(oversample_model, X_test, y_test, s_test)


=== MITIGATION 2: Oversampling ===
        accuracy  selection_rate
sex                             
Female  0.876158        0.144225
Male    0.782116        0.304548
Demographic Parity Difference: 0.1603227123476017
--------------------------------------------------


#### Thershold Optimization

In [20]:
print("\n=== MITIGATION 3: Threshold Optimization ===")
threshold_model = ThresholdOptimizer(
    estimator=LogisticRegression(max_iter=1000),
    constraints="demographic_parity",
    prefit=False,
)

threshold_model.fit(X_train, y_train, sensitive_features=s_train)
evaluate_model(threshold_model, X_test, y_test, s_test)


=== MITIGATION 3: Threshold Optimization ===
        accuracy  selection_rate
sex                             
Female  0.866893        0.181285
Male    0.807840        0.175318
Demographic Parity Difference: 0.005967028158289894
--------------------------------------------------
