In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_selection import RFE

from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.metrics import classification_report, confusion_matrix

df = pd.read_csv("/content/WA_Fn-UseC_-HR-Employee-Attrition.csv")

df['Retention'] = df['Attrition'].apply(lambda x: 0 if x == 'Yes' else 1)

df.drop(['EmployeeNumber', 'EmployeeCount', 'Over18', 'StandardHours', 'Attrition'], axis=1, inplace=True)

cat_cols = df.select_dtypes(include=['object']).columns
df[cat_cols] = df[cat_cols].apply(LabelEncoder().fit_transform)

X = df.drop('Retention', axis=1)
y = df['Retention']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


lr_model = LogisticRegression(max_iter=1000)
rfe = RFE(estimator=lr_model, n_features_to_select=15)
X_train_rfe = rfe.fit_transform(X_train, y_train)
X_test_rfe = rfe.transform(X_test)

lr_params = {
    'C': [0.01, 0.1, 1, 10],
    'solver': ['liblinear', 'lbfgs']
}
lr_grid = GridSearchCV(LogisticRegression(max_iter=1000), lr_params, cv=5)
lr_grid.fit(X_train_rfe, y_train)

print("Best Logistic Regression Params:", lr_grid.best_params_)
lr_pred = lr_grid.predict(X_test_rfe)
print("\nLogistic Regression Report:\n", classification_report(y_test, lr_pred))


rf_params = {
    'n_estimators': [100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5]
}
rf_grid = GridSearchCV(RandomForestClassifier(), rf_params, cv=5)
rf_grid.fit(X_train_rfe, y_train)

print("Best Random Forest Params:", rf_grid.best_params_)
rf_pred = rf_grid.predict(X_test_rfe)
print("\nRandom Forest Report:\n", classification_report(y_test, rf_pred))


Best Logistic Regression Params: {'C': 10, 'solver': 'liblinear'}

Logistic Regression Report:
               precision    recall  f1-score   support

           0       0.60      0.31      0.41        39
           1       0.90      0.97      0.93       255

    accuracy                           0.88       294
   macro avg       0.75      0.64      0.67       294
weighted avg       0.86      0.88      0.86       294

Best Random Forest Params: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 200}

Random Forest Report:
               precision    recall  f1-score   support

           0       0.62      0.13      0.21        39
           1       0.88      0.99      0.93       255

    accuracy                           0.87       294
   macro avg       0.75      0.56      0.57       294
weighted avg       0.85      0.87      0.84       294

