In [2]:
import pandas as pd
import numpy as np

np.random.seed(123)

df = pd.read_csv("AI_Resume_Screening_with_demographics.csv")

df.head()


Unnamed: 0,Resume_ID,Name,Skills,Experience (Years),Education,Certifications,Job Role,Recruiter Decision,Salary Expectation ($),Projects Count,AI Score (0-100),Gender,Race,Age,Disability_Status
0,1,Ashley Ali,"TensorFlow, NLP, Pytorch",10,B.Sc,,AI Researcher,Hire,104895,8,100,Male,Asian,30,No
1,2,Wesley Roman,"Deep Learning, Machine Learning, Python, SQL",10,MBA,Google ML,Data Scientist,Hire,113002,1,100,Female,White,33,No
2,3,Corey Sanchez,"Ethical Hacking, Cybersecurity, Linux",1,MBA,Deep Learning Specialization,Cybersecurity Analyst,Hire,71766,7,70,Female,Hispanic,26,No
3,4,Elizabeth Carney,"Python, Pytorch, TensorFlow",7,B.Tech,AWS Certified,AI Researcher,Hire,46848,0,95,Male,Asian,30,No
4,5,Julie Hill,"SQL, React, Java",4,PhD,,Software Engineer,Hire,87441,9,100,Male,Hispanic,34,No


In [4]:
def biased_ai_score(row):
    score = row["AI Score (0-100)"]
    gender = row["Gender"]
    race = row["Race"]
    disability = row["Disability_Status"]
    
    bias = 0.0
    
    # Gender bias
    if gender == "Female":
        bias -= 4
    elif gender == "Male":
        bias += 2
    
    # Race bias
    if race in ["Asian", "Black", "Hispanic", "Native American", "Other"]:
        bias -= 4
    elif race == "White":
        bias += 3
    
    # Disability bias
    if disability == "Yes":
        bias -= 5
    else:  # No
        bias += 1
    
    # Small random noise so it's not perfectly deterministic
    noise = np.random.normal(loc=0, scale=2)
    
    new_score = score + bias + noise
    
    # Clip to 0–100
    new_score = float(np.clip(new_score, 0, 100))
    return round(new_score, 1)

df["AI_Score_Biased"] = df.apply(biased_ai_score, axis=1)

df[["AI Score (0-100)", "AI_Score_Biased", "Gender", "Race", "Disability_Status"]].head()


Unnamed: 0,AI Score (0-100),AI_Score_Biased,Gender,Race,Disability_Status
0,100,96.8,Male,Asian,No
1,100,100.0,Female,White,No
2,70,63.6,Female,Hispanic,No
3,95,91.0,Male,Asian,No
4,100,97.8,Male,Hispanic,No


In [6]:
def biased_hire_probability(row):
    # Base probability from biased AI score
    score = row["AI_Score_Biased"]
    base_p = score / 100.0  # 0–1
    
    gender = row["Gender"]
    race = row["Race"]
    disability = row["Disability_Status"]
    
    # Group-based biases (THIS IS SYNTHETIC & UNFAIR BY DESIGN)
    bias = 0.0
    
    # Gender bias
    if gender == "Female":
        bias -= 0.07   # 7% less likely
    elif gender == "Male":
        bias += 0.05   # 5% more likely
    
    # Race bias
    if race in ["Asian", "Black", "Hispanic", "Native American", "Other"]:
        bias -= 0.08   # 8% penalty
    elif race == "White":
        bias += 0.06   # 6% bonus
    
    # Disability bias
    if disability == "Yes":
        bias -= 0.08   # 8% penalty
    else:
        bias += 0.03   # 3% bonus
    
    p = base_p + bias
    
    # Clamp to [0, 1]
    p = max(0.01, min(0.99, p))
    return p

def sample_biased_decision(row):
    p_hire = biased_hire_probability(row)
    # Draw a random decision based on biased probability
    return "Hire" if np.random.rand() < p_hire else "Reject"

df["Recruiter_Decision_Biased"] = df.apply(sample_biased_decision, axis=1)

df[["AI_Score_Biased", "Gender", "Race", "Disability_Status", "Recruiter_Decision_Biased"]].head()


Unnamed: 0,AI_Score_Biased,Gender,Race,Disability_Status,Recruiter_Decision_Biased
0,96.8,Male,Asian,No,Hire
1,100.0,Female,White,No,Hire
2,63.6,Female,Hispanic,No,Hire
3,91.0,Male,Asian,No,Hire
4,97.8,Male,Hispanic,No,Hire


In [8]:
# Average biased AI score by group
print("Mean biased AI score by gender:")
print(df.groupby("Gender")["AI_Score_Biased"].mean(), "\n")

print("Mean biased AI score by race:")
print(df.groupby("Race")["AI_Score_Biased"].mean(), "\n")

print("Mean biased AI score by disability:")
print(df.groupby("Disability_Status")["AI_Score_Biased"].mean(), "\n")

# Hire rate by group (biased decision)
print("Hire rate (biased) by gender:")
print(df.groupby("Gender")["Recruiter_Decision_Biased"].value_counts(normalize=True).unstack(), "\n")

print("Hire rate (biased) by race:")
print(df.groupby("Race")["Recruiter_Decision_Biased"].value_counts(normalize=True).unstack(), "\n")

print("Hire rate (biased) by disability:")
print(df.groupby("Disability_Status")["Recruiter_Decision_Biased"].value_counts(normalize=True).unstack(), "\n")


Mean biased AI score by gender:
Gender
Female    79.643662
Male      85.081113
Name: AI_Score_Biased, dtype: float64 

Mean biased AI score by race:
Race
Asian              74.373846
Black              82.087975
Hispanic           78.768927
Native American    85.133333
Other              78.828000
White              84.621025
Name: AI_Score_Biased, dtype: float64 

Mean biased AI score by disability:
Disability_Status
No     82.256880
Yes    83.838961
Name: AI_Score_Biased, dtype: float64 

Hire rate (biased) by gender:
Recruiter_Decision_Biased      Hire    Reject
Gender                                       
Female                     0.738431  0.261569
Male                       0.874751  0.125249 

Hire rate (biased) by race:
Recruiter_Decision_Biased      Hire    Reject
Race                                         
Asian                      0.676923  0.323077
Black                      0.772152  0.227848
Hispanic                   0.728814  0.271186
Native American            0.7

In [10]:
output_path = "AI_Resume_Screening_with_demographics_BIASED.csv"
df.to_csv(output_path, index=False)

output_path


'AI_Resume_Screening_with_demographics_BIASED.csv'

In [14]:
changed_ai = df[df["AI Score (0-100)"] != df["AI_Score_Biased"]]

changed_ai[[
    "Name", 
    "Gender", 
    "Race", 
    "Disability_Status",
    "AI Score (0-100)", 
    "AI_Score_Biased"
]].head(20)


Unnamed: 0,Name,Gender,Race,Disability_Status,AI Score (0-100),AI_Score_Biased
0,Ashley Ali,Male,Asian,No,100,96.8
2,Corey Sanchez,Female,Hispanic,No,70,63.6
3,Elizabeth Carney,Male,Asian,No,95,91.0
4,Julie Hill,Male,Hispanic,No,100,97.8
6,Tony Smith,Female,White,No,100,95.1
7,Anthony Harrison,Female,Hispanic,No,90,82.1
9,Courtney Gibson,Male,White,No,35,39.3
10,Laura French,Male,White,No,70,74.6
11,Ralph Barrera,Female,Other,No,50,42.8
12,David Diaz,Female,Hispanic,No,65,61.0


In [16]:
changed_decision = df[df["Recruiter Decision"] != df["Recruiter_Decision_Biased"]]

changed_decision[[
    "Name",
    "Gender",
    "Race",
    "Disability_Status",
    "AI Score (0-100)",
    "AI_Score_Biased",
    "Recruiter Decision",
    "Recruiter_Decision_Biased"
]].head(20)


Unnamed: 0,Name,Gender,Race,Disability_Status,AI Score (0-100),AI_Score_Biased,Recruiter Decision,Recruiter_Decision_Biased
9,Courtney Gibson,Male,White,No,35,39.3,Reject,Hire
12,David Diaz,Female,Hispanic,No,65,61.0,Hire,Reject
15,Courtney Cook,Male,Hispanic,No,60,58.1,Reject,Hire
29,Steven Thomas,Female,Hispanic,Yes,100,85.3,Hire,Reject
37,Jason Flores,Female,Black,No,100,94.4,Hire,Reject
43,Denise Ramirez,Male,Black,No,70,70.1,Hire,Reject
48,David Nash,Female,Hispanic,No,80,75.0,Hire,Reject
50,Kenneth Skinner,Female,Hispanic,No,100,90.4,Hire,Reject
53,William Hill,Female,Hispanic,No,100,91.4,Hire,Reject
54,James Jones,Female,Other,No,60,53.1,Reject,Hire


In [21]:
df["AI_Score_Difference"] = df["AI_Score_Biased"] - df["AI Score (0-100)"]

df[["Name", "AI Score (0-100)", "AI_Score_Biased", "AI_Score_Difference"]].head()


Unnamed: 0,Name,AI Score (0-100),AI_Score_Biased,AI_Score_Difference
0,Ashley Ali,100,96.8,-3.2
1,Wesley Roman,100,100.0,0.0
2,Corey Sanchez,70,63.6,-6.4
3,Elizabeth Carney,95,91.0,-4.0
4,Julie Hill,100,97.8,-2.2
