In [1]:
#!pip install mlflow fairlearn
#!pip install tensorflow

In [2]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score
import mlflow
import mlflow.sklearn
from fairlearn.metrics import MetricFrame
from fairlearn.reductions import ExponentiatedGradient, DemographicParity
import requests
import io

In [3]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
column_names = [
    "age", "workclass", "fnlwgt", "education", "education-num", "marital-status",
    "occupation", "relationship", "race", "sex", "capital-gain", "capital-loss",
    "hours-per-week", "native-country", "income"
]
response = requests.get(url)
df = pd.read_csv(io.StringIO(response.text), names=column_names, na_values=" ?", skipinitialspace=True)


In [4]:
df['race'].value_counts(normalize=True)

race
White                 0.854274
Black                 0.095943
Asian-Pac-Islander    0.031909
Amer-Indian-Eskimo    0.009551
Other                 0.008323
Name: proportion, dtype: float64

In [5]:
df['sex'].value_counts(normalize=True)

sex
Male      0.669205
Female    0.330795
Name: proportion, dtype: float64

In [6]:
data= df.dropna() 
X = data.drop("income", axis=1)
y = (data["income"] == ">50K").astype(int) 


In [7]:
categorical_cols = X.select_dtypes(include=['object']).columns
le = LabelEncoder()
for col in categorical_cols:
    X[col] = le.fit_transform(X[col])

In [8]:
sensitive_features = X[["sex", "race"]]  # Sex: 0=Female, 1=Male; Race: multiple categories
X = X.drop(["sex", "race"], axis=1)  # Remove sensitive features from main feature set


In [9]:
n_splits = 5
kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

# MLflow experiment setup
mlflow.set_experiment("Adult_Income_Fairness_Classification")

def train_and_evaluate():
    fold = 0
    
    for train_idx, test_idx in kf.split(X):
        fold += 1
        print(f"\nFold {fold}")
        
        # Split data
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]
        sensitive_train = sensitive_features.iloc[train_idx]
        sensitive_test = sensitive_features.iloc[test_idx]
        
        # Scale features
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        

        with mlflow.start_run(run_name=f"fold_{fold}"):
    
            base_model = LogisticRegression(random_state=42, max_iter=1000)
            base_model.fit(X_train_scaled, y_train)
            
         
            fair_constraint = DemographicParity()
            fair_model = ExponentiatedGradient(
                LogisticRegression(random_state=42, max_iter=1000),
                constraints=fair_constraint
            )
            fair_model.fit(X_train_scaled, y_train, sensitive_features=sensitive_train["sex"])
            
      
            base_pred = base_model.predict(X_test_scaled)
            fair_pred = fair_model.predict(X_test_scaled)
            
            base_accuracy = accuracy_score(y_test, base_pred)
            base_precision = precision_score(y_test, base_pred)
            base_recall = recall_score(y_test, base_pred)
            
            fair_accuracy = accuracy_score(y_test, fair_pred)
            fair_precision = precision_score(y_test, fair_pred)
            fair_recall = recall_score(y_test, fair_pred)
            
   
            mf_base_sex = MetricFrame(
                metrics={"accuracy": accuracy_score},
                y_true=y_test,
                y_pred=base_pred,
                sensitive_features=sensitive_test["sex"]
            )
            mf_fair_sex = MetricFrame(
                metrics={"accuracy": accuracy_score},
                y_true=y_test,
                y_pred=fair_pred,
                sensitive_features=sensitive_test["sex"]
            )
            
       
            mf_base_race = MetricFrame(
                metrics={"accuracy": accuracy_score},
                y_true=y_test,
                y_pred=base_pred,
                sensitive_features=sensitive_test["race"]
            )
            mf_fair_race = MetricFrame(
                metrics={"accuracy": accuracy_score},
                y_true=y_test,
                y_pred=fair_pred,
                sensitive_features=sensitive_test["race"]
            )
            
     
            mlflow.log_param("fold", fold)
            mlflow.log_param("model_type", "logistic_regression")
            
        
            mlflow.log_metric("base_accuracy", base_accuracy)
            mlflow.log_metric("base_precision", base_precision)
            mlflow.log_metric("base_recall", base_recall)
            

            mlflow.log_metric("fair_accuracy", fair_accuracy)
            mlflow.log_metric("fair_precision", fair_precision)
            mlflow.log_metric("fair_recall", fair_recall)
            
          
            base_sex_diff = mf_base_sex.by_group["accuracy"].max() - mf_base_sex.by_group["accuracy"].min()
            fair_sex_diff = mf_fair_sex.by_group["accuracy"].max() - mf_fair_sex.by_group["accuracy"].min()
            base_race_diff = mf_base_race.by_group["accuracy"].max() - mf_base_race.by_group["accuracy"].min()
            fair_race_diff = mf_fair_race.by_group["accuracy"].max() - mf_fair_race.by_group["accuracy"].min()
            
            mlflow.log_metric("base_sex_fairness_diff", base_sex_diff)
            mlflow.log_metric("fair_sex_fairness_diff", fair_sex_diff)
            mlflow.log_metric("base_race_fairness_diff", base_race_diff)
            mlflow.log_metric("fair_race_fairness_diff", fair_race_diff)
            
   
            mlflow.sklearn.log_model(base_model, "base_model")
            mlflow.sklearn.log_model(fair_model, "fair_model")
            
            print(f"Base Model - Accuracy: {base_accuracy:.3f}")
            print(f"Sex Fairness Diff: {base_sex_diff:.3f}, Race Fairness Diff: {base_race_diff:.3f}")
            print(f"Fair Model - Accuracy: {fair_accuracy:.3f}")
            print(f"Sex Fairness Diff: {fair_sex_diff:.3f}, Race Fairness Diff: {fair_race_diff:.3f}")

train_and_evaluate()


Fold 1




Base Model - Accuracy: 0.825
Sex Fairness Diff: 0.088, Race Fairness Diff: 0.147
Fair Model - Accuracy: 0.806
Sex Fairness Diff: 0.100, Race Fairness Diff: 0.183

Fold 2




Base Model - Accuracy: 0.823
Sex Fairness Diff: 0.118, Race Fairness Diff: 0.125
Fair Model - Accuracy: 0.804
Sex Fairness Diff: 0.121, Race Fairness Diff: 0.184

Fold 3




Base Model - Accuracy: 0.821
Sex Fairness Diff: 0.119, Race Fairness Diff: 0.150
Fair Model - Accuracy: 0.806
Sex Fairness Diff: 0.126, Race Fairness Diff: 0.170

Fold 4




Base Model - Accuracy: 0.816
Sex Fairness Diff: 0.107, Race Fairness Diff: 0.136
Fair Model - Accuracy: 0.798
Sex Fairness Diff: 0.112, Race Fairness Diff: 0.158

Fold 5




Base Model - Accuracy: 0.828
Sex Fairness Diff: 0.099, Race Fairness Diff: 0.108
Fair Model - Accuracy: 0.807
Sex Fairness Diff: 0.117, Race Fairness Diff: 0.111
