In [2]:
# 📦 Step 1: Import Libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import joblib

# 📂 Step 2: Load Data
df = pd.read_csv("WA_Fn-UseC_-HR-Employee-Attrition.csv")
df.head()

# 🔍 Step 3: Clean and Encode
df.drop(['Over18', 'EmployeeNumber', 'StandardHours', 'EmployeeCount'], axis=1, inplace=True)
df['Attrition'] = df['Attrition'].map({'Yes': 1, 'No': 0})

# 🔠 Step 4: One-Hot Encode Categorical Features
df = pd.get_dummies(df, drop_first=True)

# 🎯 Step 5: Split Data
X = df.drop('Attrition', axis=1)
y = df['Attrition']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 🧠 Step 6: Train Model
model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train, y_train)

# 📊 Step 7: Evaluate
y_pred = model.predict(X_test)
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nAccuracy Score:", accuracy_score(y_test, y_pred))

# 💾 Step 8: Save Model
joblib.dump(model, "attrition_model.pkl")

# 📥 Step 9: Download Model
from google.colab import files
files.download("attrition_model.pkl")


Confusion Matrix:
 [[254   1]
 [ 35   4]]

Classification Report:
               precision    recall  f1-score   support

           0       0.88      1.00      0.93       255
           1       0.80      0.10      0.18        39

    accuracy                           0.88       294
   macro avg       0.84      0.55      0.56       294
weighted avg       0.87      0.88      0.83       294


Accuracy Score: 0.8775510204081632


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>