<a href="https://colab.research.google.com/github/IamGayatri27/AIES_lab/blob/main/ex2_aies.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Name : Gayatri Sagar Chougale
PRN  :22SC114501085
Class: B.tech 'B'
Title: Detecting algorithmic bias in hiring dataset


In [3]:
!pip install fairlearn



In [4]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from fairlearn.metrics import (
    MetricFrame,
    true_positive_rate,
    false_positive_rate,
    false_negative_rate,
    selection_rate,
    demographic_parity_difference,
    equalized_odds_difference
)
from fairlearn.reductions import ExponentiatedGradient, DemographicParity


In [5]:
df = pd.read_csv('/content/StudentsPerformance.csv')
df['average_score'] = df[['math score', 'reading score', 'writing score']].mean(axis=1)
df['pass_fail'] = (df['average_score'] >= 60).astype(int)
categorical_cols = ['race/ethnicity', 'parental level of education', 'lunch', 'test preparation course']
df = pd.get_dummies(df, columns=categorical_cols, drop_first=True)
features = [col for col in df.columns if col not in ['gender', 'average_score', 'pass_fail']]
X = df[features]
y = df['pass_fail']
sensitive = df['gender']
X_train, X_test, y_train, y_test, s_train, s_test = train_test_split(
    X, y, sensitive, test_size=0.3, random_state=42, stratify=y
)


In [6]:

clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

In [7]:
metric_frame = MetricFrame(
    metrics={
        'Accuracy': accuracy_score,
        'TPR': true_positive_rate,
        'FPR': false_positive_rate,
        'FNR': false_negative_rate,
        'Selection Rate': selection_rate
    },
    y_true=y_test,
    y_pred=y_pred,
    sensitive_features=s_test
)

In [8]:
print("=== Fairness Metrics by Gender ===")
print(metric_frame.by_group)
print("\nOverall Accuracy:", accuracy_score(y_test, y_pred))
print("Demographic Parity Difference:", demographic_parity_difference(y_test, y_pred, sensitive_features=s_test))
print("Equalized Odds Difference:", equalized_odds_difference(y_test, y_pred, sensitive_features=s_test))


=== Fairness Metrics by Gender ===
        Accuracy  TPR  FPR  FNR  Selection Rate
gender                                         
female       1.0  1.0  0.0  0.0        0.790850
male         1.0  1.0  0.0  0.0        0.632653

Overall Accuracy: 1.0
Demographic Parity Difference: 0.1581966119781245
Equalized Odds Difference: 0.0


In [9]:
base_estimator = LogisticRegression(max_iter=1000)
constraint = DemographicParity()


In [10]:
mitigator = ExponentiatedGradient(base_estimator, constraints=constraint)
mitigator.fit(X_train, y_train, sensitive_features=s_train)
y_pred_mitigated = mitigator.predict(X_test)


In [11]:
mitigated_frame = MetricFrame(
    metrics={
        'Accuracy': accuracy_score,
        'TPR': true_positive_rate,
        'FPR': false_positive_rate,
        'FNR': false_negative_rate,
        'Selection Rate': selection_rate
    },
    y_true=y_test,
    y_pred=y_pred_mitigated,
    sensitive_features=s_test
)

print("\n=== After Mitigation ===")
print(mitigated_frame.by_group)
print("\nOverall Accuracy:", accuracy_score(y_test, y_pred_mitigated))
print("Demographic Parity Difference:", demographic_parity_difference(y_test, y_pred_mitigated, sensitive_features=s_test))
print("Equalized Odds Difference:", equalized_odds_difference(y_test, y_pred_mitigated, sensitive_features=s_test))



=== After Mitigation ===
        Accuracy       TPR       FPR       FNR  Selection Rate
gender                                                        
female  0.960784  0.958678  0.031250  0.041322        0.764706
male    0.965986  1.000000  0.092593  0.000000        0.666667

Overall Accuracy: 0.9633333333333334
Demographic Parity Difference: 0.0980392156862745
Equalized Odds Difference: 0.06134259259259259
