In [31]:
import pandas as pd

# 1) Ingest CSV into a DataFrame
applicants = pd.read_csv("job_applicant_dataset.csv")

# 2) Glimpse a few random rows for sanity check
print(applicants.sample(5, random_state=0))


     Job Applicant Name  Age  Gender             Race Ethnicity  \
9394       Keiko Tanaka   30  Female  Mongoloid/Asian  Japanese   
898     Tristan Jackson   30    Male    Negroid/Black   African   
2398     Takahiro Honda   26    Male  Mongoloid/Asian    Korean   
5906       Zoe Mitchell   43  Female    Negroid/Black  Jamaican   
2343    Chinatsu Tanabe   50  Female  Mongoloid/Asian    Indian   

                                                 Resume       Job Roles  \
9394  Proficient in User Research, User Testing, Pro...     UX Designer   
898   Proficient in Analytics, Link Building, Conten...  SEO Specialist   
2398  Proficient in Negotiation, Case Management, Co...          Lawyer   
5906  Proficient in Google Analytics, SEO Optimizati...  SEO Specialist   
2343  Proficient in Conversion Optimization, Digital...  SEO Specialist   

                                        Job Description  Best Match  
9394  Design user interfaces, improve user experienc...           1  
898   

In [32]:
# Merge text columns into one corpus column
text_cols = ["Resume", "Job Description", "Job Roles"]
applicants[text_cols] = applicants[text_cols].fillna("")
applicants["full_text"] = applicants[text_cols].agg(" ".join, axis=1)


In [33]:
# Identify features
categorical_features = ["Gender", "Race", "Ethnicity"]
numerical_features   = ["Age"]
feature_list         = ["full_text"] + categorical_features + numerical_features

# Extract feature matrix and response vector
X = applicants[feature_list]
y = applicants["Best Match"]


In [34]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    stratify=y,
    test_size=0.20,
    random_state=0
)


In [35]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing        import OneHotEncoder, MinMaxScaler
from sklearn.impute               import SimpleImputer
from sklearn.pipeline             import Pipeline
from sklearn.compose              import ColumnTransformer

# Text → TF-IDF
tfidf_pipe = TfidfVectorizer(max_features=5000)

# Cats → one-hot (dense)
ohe_pipe = OneHotEncoder(handle_unknown="ignore", sparse_output=False)

# Numbers → median impute + min-max scale
num_pipe = Pipeline([
    ("fill_numeric", SimpleImputer(strategy="median")),
    ("scale_numeric", MinMaxScaler())
])

preprocessor = ColumnTransformer([
    ("text_tfidf", tfidf_pipe,       "full_text"),
    ("cat_ohe",    ohe_pipe,         categorical_features),
    ("num_scale",  num_pipe,         numerical_features)
], remainder="drop")


In [36]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline      import Pipeline

clf_pipeline = Pipeline([
    ("prep", preprocessor),
    ("nb",   MultinomialNB(alpha=1.0))
])


In [37]:
clf_pipeline.fit(X_train, y_train)


In [38]:
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix

# 1) Predictions & sensitive attribute
y_pred = clf_pipeline.predict(X_test)
y_true = y_test.values
groups = X_test["Gender"].values

# 2) Overall accuracy
acc = accuracy_score(y_true, y_pred)
print(f"Baseline accuracy: {acc:.3f}\n")

print("--- Baseline Fairness Metrics by Gender ---")
metrics = {}
for grp in ["Male", "Female"]:
    mask = (groups == grp)
    tn, fp, fn, tp = confusion_matrix(y_true[mask], y_pred[mask], labels=[0,1]).ravel()
    tpr = tp / (tp + fn) if (tp+fn)>0 else np.nan
    fpr = fp / (fp + tn) if (fp+tn)>0 else np.nan
    fnr = fn / (tp + fn) if (tp+fn)>0 else np.nan
    te  = fn / fp if fp>0 else np.nan
    metrics[grp] = dict(TPR=tpr, FPR=fpr, FNR=fnr, TE_ratio=te)
    print(f"{grp}:  TPR={tpr:.3f}  FPR={fpr:.3f}  FNR={fnr:.3f}  TE_ratio={te:.3f}")

# 3) Gaps & differences
male, female = metrics["Male"], metrics["Female"]
eo_tpr_diff = abs(male["TPR"] - female["TPR"])
eo_fpr_diff = abs(male["FPR"] - female["FPR"])
oaed        = 0.5 * ((male["FPR"] - female["FPR"]) + (male["FNR"] - female["FNR"]))
ted         = male["TE_ratio"] - female["TE_ratio"]

print(f"\nEqualized-Odds TPR diff (abs):         {eo_tpr_diff:.3f}")
print(f"Equalized-Odds FPR diff (abs):         {eo_fpr_diff:.3f}")
print(f"Average Odds Difference (OAED, abs):   {abs(oaed):.3f}")
print(f"Treatment-Equality Diff (TED, abs):    {abs(ted):.3f}")


Baseline accuracy: 0.548

--- Baseline Fairness Metrics by Gender ---
Male:  TPR=0.801  FPR=0.786  FNR=0.199  TE_ratio=0.409
Female:  TPR=0.125  FPR=0.264  FNR=0.875  TE_ratio=1.801

Equalized-Odds TPR diff (abs):         0.676
Equalized-Odds FPR diff (abs):         0.522
Average Odds Difference (OAED, abs):   0.077
Treatment-Equality Diff (TED, abs):    1.393


In [39]:
# Cell 9: === Mitigation Step 1: Instance re-weighting ===


In [40]:
# Cell 10: Re-weighting and retrain
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes         import MultinomialNB
from sklearn.metrics             import accuracy_score

# Tag privileged/favorable
applicants["is_priv"] = (applicants.Gender == "Male").astype(int)
applicants["is_fav"]  = (applicants["Best Match"] == 1).astype(int)

# Totals
N      = len(applicants)
n_priv = applicants.is_priv.sum()
n_unp  = N - n_priv
n_fav  = applicants.is_fav.sum()
n_ufav = N - n_fav

# Joint counts
joint = applicants.groupby(["is_priv","is_fav"]).size().to_dict()

# Calculate weight for each (priv,fav) cell
cell_weights = {
    (p,f): (cnt * N) / ((n_priv if p else n_unp) * (n_fav if f else n_ufav))
    for (p,f), cnt in joint.items()
}

applicants["rw"] = applicants.apply(
    lambda row: cell_weights[(row.is_priv, row.is_fav)], axis=1
)

# Vectorize resumes & fit
vec_rw = CountVectorizer()
X_rw   = vec_rw.fit_transform(applicants["Resume"])
y_all  = applicants["Best Match"]

nb_rw = MultinomialNB()
nb_rw.fit(X_rw, y_all, sample_weight=applicants["rw"])

pred_rw = nb_rw.predict(X_rw)
print("Re-weighting in-sample acc:", accuracy_score(y_all, pred_rw))


Re-weighting in-sample acc: 0.5587


In [41]:
# Cell 11: Fairness after re-weighting (with OAED & TED)
from sklearn.metrics import accuracy_score, confusion_matrix

df_eval = pd.DataFrame({
    "actual": y_all,
    "pred_rw": pred_rw,
    "Gender": applicants["Gender"]
})

print(f"\nIn-sample accuracy (re-weighted): {accuracy_score(df_eval.actual, df_eval.pred_rw):.3f}")
print("\n--- Fairness after Re-weighting ---")

stats_rw = {}
for grp in ["Male", "Female"]:
    sub = df_eval[df_eval.Gender == grp]
    tn, fp, fn, tp = confusion_matrix(sub.actual, sub.pred_rw, labels=[0,1]).ravel()
    tpr = tp / (tp + fn) if (tp+fn)>0 else np.nan
    fpr = fp / (fp + tn) if (fp+tn)>0 else np.nan
    fnr = fn / (tp + fn) if (tp+fn)>0 else np.nan
    te  = fn / fp if fp>0 else np.nan
    stats_rw[grp] = dict(TPR=tpr, FPR=fpr, FNR=fnr, TE_ratio=te)
    print(f"{grp}: SR={sub.pred_rw.mean():.3f}  TPR={tpr:.3f}  FPR={fpr:.3f}  FNR={fnr:.3f}  TE_ratio={te:.3f}")

m, f = stats_rw["Male"], stats_rw["Female"]
eo_tpr = abs(m["TPR"] - f["TPR"])
eo_fpr = abs(m["FPR"] - f["FPR"])
oaed   = 0.5 * ((m["FPR"] - f["FPR"]) + (m["FNR"] - f["FNR"]))
ted    = m["TE_ratio"] - f["TE_ratio"]

print(f"\nEqualized-Odds TPR diff (abs):       {eo_tpr:.3f}")
print(f"Equalized-Odds FPR diff (abs):       {eo_fpr:.3f}")
print(f"Average Odds Difference (OAED, abs): {abs(oaed):.3f}")
print(f"Treatment-Equality Diff (TED, abs):  {abs(ted):.3f}")



In-sample accuracy (re-weighted): 0.559

--- Fairness after Re-weighting ---
Male: SR=0.612  TPR=0.692  FPR=0.485  FNR=0.308  TE_ratio=1.005
Female: SR=0.585  TPR=0.609  FPR=0.571  FNR=0.391  TE_ratio=0.375

Equalized-Odds TPR diff (abs):       0.082
Equalized-Odds FPR diff (abs):       0.086
Average Odds Difference (OAED, abs): 0.084
Treatment-Equality Diff (TED, abs):  0.630


In [42]:
# Cell 12: === Mitigation Step 2: In-processing via Fairlearn reductions ===


In [43]:
# Cell 13: ExponentiatedGradient in-processing + metrics
from fairlearn.reductions import ExponentiatedGradient, DemographicParity
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix

# Vectorize full set
cv = CountVectorizer()
X_mat = cv.fit_transform(applicants["Resume"]).toarray()

A = (applicants.Gender == "Male").astype(int).values
Y = applicants["Best Match"].values

base_nb  = MultinomialNB()
exp_grad = ExponentiatedGradient(base_nb, DemographicParity(), eps=0.02)
exp_grad.fit(X_mat, Y, sensitive_features=A)

Y_pred_mit = exp_grad.predict(X_mat)
print(f"In-processing accuracy: {accuracy_score(Y, Y_pred_mit):.3f}\n")
print("--- Fairness after In-processing ---")

df_mit = pd.DataFrame({
    "actual": Y,
    "pred":   Y_pred_mit,
    "Gender": applicants.Gender
})

stats_mit = {}
for grp in ["Male", "Female"]:
    sub = df_mit[df_mit.Gender == grp]
    tn, fp, fn, tp = confusion_matrix(sub.actual, sub.pred, labels=[0,1]).ravel()
    tpr = tp/(tp+fn) if (tp+fn)>0 else np.nan
    fpr = fp/(fp+tn) if (fp+tn)>0 else np.nan
    fnr = fn/(tp+fn) if (tp+fn)>0 else np.nan
    te  = fn/fp if fp>0 else np.nan
    stats_mit[grp] = dict(TPR=tpr, FPR=fpr, FNR=fnr, TE_ratio=te)
    print(f"{grp}: TPR={tpr:.3f}  FPR={fpr:.3f}  FNR={fnr:.3f}  TE_ratio={te:.3f}")

m, f = stats_mit["Male"], stats_mit["Female"]
eo_tpr = abs(m["TPR"] - f["TPR"])
eo_fpr = abs(m["FPR"] - f["FPR"])
oaed   = 0.5 * ((m["FPR"] - f["FPR"]) + (m["FNR"] - f["FNR"]))
ted    = m["TE_ratio"] - f["TE_ratio"]

print(f"\nEqualized-Odds TPR diff (abs):       {eo_tpr:.3f}")
print(f"Equalized-Odds FPR diff (abs):       {eo_fpr:.3f}")
print(f"Average Odds Difference (OAED, abs): {abs(oaed):.3f}")
print(f"Treatment-Equality Diff (TED, abs):  {abs(ted):.3f}")


In-processing accuracy: 0.562

--- Fairness after In-processing ---
Male: TPR=0.704  FPR=0.520  FNR=0.296  TE_ratio=0.899
Female: TPR=0.666  FPR=0.582  FNR=0.334  TE_ratio=0.315

Equalized-Odds TPR diff (abs):       0.038
Equalized-Odds FPR diff (abs):       0.062
Average Odds Difference (OAED, abs): 0.050
Treatment-Equality Diff (TED, abs):  0.584


In [44]:
# Cell 14: === Mitigation Step 3: Post-processing via ThresholdOptimizer ===


In [45]:
# Cell 15: ThresholdOptimizer post‐processing + metrics

from fairlearn.postprocessing import ThresholdOptimizer
from sklearn.metrics import accuracy_score, confusion_matrix
import pandas as pd

# Preconditions from Cell 13:
#   exp_grad: fitted ExponentiatedGradient mitigator
#   cv:       CountVectorizer fitted on applicants["Resume"]
#   X_mat:    cv.transform(applicants["Resume"]).toarray()
#   Y:        applicants["Best Match"].values
#   A:        (applicants.Gender == "Male").astype(int).values
#   applicants: original DataFrame
#   X_test, y_test: test split from Cell 4

# 1) Initialize the post‐processor
post_opt = ThresholdOptimizer(
    estimator=exp_grad,
    constraints="demographic_parity",
    predict_method="predict",
    prefit=True
)

# 2) Fit with sensitive features
post_opt.fit(X_mat, Y, sensitive_features=A)

# 3) Prepare test inputs
test_resumes = applicants.loc[X_test.index, "Resume"]
X_test_arr  = cv.transform(test_resumes).toarray()

# Sensitive feature array for test set
A_test = (X_test["Gender"] == "Male").astype(int).values

# 4) Predict with sensitive_features argument
y_post = post_opt.predict(X_test_arr, sensitive_features=A_test)

# 5) Overall accuracy
print(f"Post‐processed accuracy: {accuracy_score(y_test, y_post):.3f}\n")
print("--- Fairness Metrics after Post‐processing by Gender ---")

# 6) Build evaluation DataFrame
df_post = pd.DataFrame({
    "actual": y_test.values,
    "pred":   y_post,
    "Gender": X_test["Gender"]
})

# 7) Compute per‐group metrics
stats_post = {}
for grp in ["Male", "Female"]:
    sub = df_post[df_post.Gender == grp]
    tn, fp, fn, tp = confusion_matrix(sub.actual, sub.pred, labels=[0,1]).ravel()
    tpr = tp / (tp + fn) if (tp + fn) > 0 else float("nan")
    fpr = fp / (fp + tn) if (fp + tn) > 0 else float("nan")
    fnr = fn / (tp + fn) if (tp + fn) > 0 else float("nan")
    te  = fn / fp if fp > 0 else float("nan")
    stats_post[grp] = dict(TPR=tpr, FPR=fpr, FNR=fnr, TE_ratio=te)
    print(f"{grp}: TPR={tpr:.3f}  FPR={fpr:.3f}  FNR={fnr:.3f}  TE_ratio={te:.3f}")

# 8) Calculate and display fairness gaps
m, f = stats_post["Male"], stats_post["Female"]
eo_tpr = abs(m["TPR"] - f["TPR"])
eo_fpr = abs(m["FPR"] - f["FPR"])
oaed   = 0.5 * ((m["FPR"] - f["FPR"]) + (m["FNR"] - f["FNR"]))
ted    = m["TE_ratio"] - f["TE_ratio"]

print(f"\nEqualized‐Odds TPR diff (abs):       {eo_tpr:.3f}")
print(f"Equalized‐Odds FPR diff (abs):       {eo_fpr:.3f}")
print(f"Average Odds Difference (OAED, abs): {abs(oaed):.3f}")
print(f"Treatment‐Equality Diff (TED, abs):  {abs(ted):.3f}")


Post‐processed accuracy: 0.544

--- Fairness Metrics after Post‐processing by Gender ---
Male: TPR=0.662  FPR=0.496  FNR=0.338  TE_ratio=1.100
Female: TPR=0.648  FPR=0.601  FNR=0.352  TE_ratio=0.319

Equalized‐Odds TPR diff (abs):       0.014
Equalized‐Odds FPR diff (abs):       0.105
Average Odds Difference (OAED, abs): 0.060
Treatment‐Equality Diff (TED, abs):  0.781
