In [None]:
# Project 1: Gender Bias in Hiring – Logistic Regression

# Step 1: Install necessary libraries
!pip install fairlearn --quiet

# Step 2: Import libraries
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from fairlearn.reductions import ExponentiatedGradient, DemographicParity
from fairlearn.metrics import demographic_parity_difference

# Step 3: Generate synthetic dataset (biased towards males)
np.random.seed(42)
size = 1000  # 1000 resumes
experience = np.random.randint(0, 11, size)
test_score = np.random.normal(70, 10, size)
gender = np.random.choice(["Male", "Female"], size=size, p=[0.6, 0.4])
hired = (experience + test_score + (gender == "Male") * 5 + np.random.randn(size) * 5) > 80
hired = hired.astype(int)

# Step 4: Create DataFrame
df = pd.DataFrame({
    "Experience": experience,
    "TestScore": test_score,
    "Gender": gender,
    "Hired": hired
})

# Step 5: Encode Gender feature
X = df[["Experience", "TestScore"]]
X["Gender"] = (df["Gender"] == "Male").astype(int)  # Encode Male as 1, Female as 0
y = df["Hired"]

# Step 6: Split data into train and test
X_train, X_test, y_train, y_test, gender_train, gender_test = train_test_split(
    X, y, df["Gender"], test_size=0.3, random_state=42
)

# Step 7: Train baseline Logistic Regression model
lr_model = LogisticRegression()
lr_model.fit(X_train, y_train)
y_pred = lr_model.predict(X_test)

# Step 8: Evaluate baseline accuracy and fairness
baseline_accuracy = accuracy_score(y_test, y_pred)
baseline_fairness = demographic_parity_difference(y_test, y_pred, sensitive_features=gender_test)

print(f"Baseline Accuracy: {baseline_accuracy:.2f}")
print(f"Baseline Demographic Parity Difference: {baseline_fairness:.2f}")

# Step 9: Apply Fairness-Constrained Training
fair_model = ExponentiatedGradient(
    LogisticRegression(),
    constraints=DemographicParity()
)
fair_model.fit(X_train, y_train, sensitive_features=gender_train)
y_pred_fair = fair_model.predict(X_test)

# Step 10: Evaluate mitigated model
fair_accuracy = accuracy_score(y_test, y_pred_fair)
fair_fairness = demographic_parity_difference(y_test, y_pred_fair, sensitive_features=gender_test)

print(f"\nFair Model Accuracy: {fair_accuracy:.2f}")
print(f"Fair Model Demographic Parity Difference: {fair_fairness:.2f}")
