In [41]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load dataset
df = pd.read_csv("recruitment_data.csv")
df.columns = df.columns.str.strip().str.lower()
df

Unnamed: 0,age,gender,educationlevel,experienceyears,previouscompanies,distancefromcompany,interviewscore,skillscore,personalityscore,recruitmentstrategy,hiringdecision
0,26,1,2,0,3,26.783828,48,78,91,1,1
1,39,1,4,12,3,25.862694,35,68,80,2,1
2,48,0,2,3,2,9.920805,20,67,13,2,0
3,34,1,2,5,2,6.407751,36,27,70,3,0
4,30,0,1,6,1,43.105343,23,52,85,2,0
...,...,...,...,...,...,...,...,...,...,...,...
1495,48,0,2,3,4,9.183783,66,3,80,3,1
1496,27,1,2,10,3,14.847731,43,97,7,2,0
1497,24,1,1,1,2,4.289911,31,91,58,1,1
1498,48,0,2,4,4,36.299263,9,37,44,2,1


In [43]:
if df["gender"].dtype == object:
    df["gender"] = df["gender"].str.strip().str.lower()
    df["gender"] = df["gender"].map({"male": 1, "female": 0})
else:
    df["gender"] = df["gender"].astype(int)


In [45]:
df = df.dropna()

In [47]:
X = df[["gender", "experienceyears"]]
y = df["hiringdecision"]

In [49]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

In [51]:
model_normal = LogisticRegression()
model_normal.fit(X_train, y_train)

In [53]:
print("Normal Model Accuracy:",
      accuracy_score(y_test, model_normal.predict(X_test)))

# -------- Create Biased Data --------
biased_df = df[~((df["gender"] == 0) & (df["hiringdecision"] == 1))]

X_b = biased_df[["gender", "experienceyears"]]
y_b = biased_df["hiringdecision"]

X_train_b, X_test_b, y_train_b, y_test_b = train_test_split(
    X_b, y_b, test_size=0.3, random_state=42
)

Normal Model Accuracy: 0.7133333333333334


In [55]:
model_biased = LogisticRegression()
model_biased.fit(X_train_b, y_train_b)

In [57]:
print("Biased Model Accuracy:",
      accuracy_score(y_test_b, model_biased.predict(X_test_b)))

Biased Model Accuracy: 0.7889182058047494


In [59]:
test_case = pd.DataFrame([[0, 5]], columns=["gender", "experienceyears"])

print("\nPrediction for Female (5 years experience):")
print("Normal Model:",
      "Hired" if model_normal.predict(test_case)[0] == 1 else "Not Hired")
print("Biased Model:",
      "Hired" if model_biased.predict(test_case)[0] == 1 else "Not Hired")


Prediction for Female (5 years experience):
Normal Model: Not Hired
Biased Model: Not Hired
