In [28]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import pickle

df = pd.read_csv('recruitment_data.csv')

df['EducationLevel'] = df['EducationLevel'].replace({1: 1, 2: 1})

X = df[['Age', 'Gender', 'EducationLevel', 'ExperienceYears', 'PreviousCompanies', 'DistanceFromCompany', 'InterviewScore', 'SkillScore', 'PersonalityScore', 'RecruitmentStrategy']]
y = df['HiringDecision']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

rf_model = RandomForestClassifier(max_depth=23, min_samples_split=6, n_estimators=727, n_jobs=-1, random_state=42)
rf_model.fit(X_train, y_train)

rf_pred = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_pred)

logreg_model = LogisticRegression(max_iter=1000)
logreg_model.fit(X_train, y_train)

logreg_pred = logreg_model.predict(X_test)
logreg_accuracy = accuracy_score(y_test, logreg_pred)

knn_model = KNeighborsClassifier()
knn_model.fit(X_train, y_train)

knn_pred = knn_model.predict(X_test)
knn_accuracy = accuracy_score(y_test, knn_pred)

with open('rf_model.pkl', 'wb') as rf_file:
    pickle.dump(rf_model, rf_file)

with open('logreg_model.pkl', 'wb') as logreg_file:
    pickle.dump(logreg_model, logreg_file)

with open('knn_model.pkl', 'wb') as knn_file:
    pickle.dump(knn_model, knn_file)

print(f"Random Forest Accuracy: {rf_accuracy:.4f}")
print(f"Logistic Regression Accuracy: {logreg_accuracy:.4f}")
print(f"KNN Accuracy: {knn_accuracy:.4f}")


Random Forest Accuracy: 0.9367
Logistic Regression Accuracy: 0.9000
KNN Accuracy: 0.8833
