In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

file_path = 'Employee-Attrition.csv'
data = pd.read_csv(file_path)

In [8]:
#uninformative columns
uninformative_cols = ['EmployeeCount', 'EmployeeNumber', 'Over18', 'StandardHours']
data_cleaned = data.drop(columns=uninformative_cols)

#target variable
data_cleaned['Attrition'] = data_cleaned['Attrition'].apply(lambda x: 1 if x == 'Yes' else 0)

#ategorical variables
categorical_cols = data_cleaned.select_dtypes(include='object').columns
data_encoded = pd.get_dummies(data_cleaned, columns=categorical_cols, drop_first=True)

#spliting data into features (X) and target (y)
X = data_encoded.drop(columns=['Attrition'])
y = data_encoded['Attrition']

#split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

#Random Forest classifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

#Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

#results
print(f"Accuracy: {accuracy * 100:.2f}%")
print("\nClassification Report:\n", report)

feature_importances = pd.DataFrame({
    'Feature': X.columns,
    'Importance': model.feature_importances_
}).sort_values(by='Importance', ascending=False)

print("\nTop Features:\n", feature_importances.head(10))


Accuracy: 83.45%

Classification Report:
               precision    recall  f1-score   support

           0       0.85      0.97      0.91       370
           1       0.45      0.13      0.20        71

    accuracy                           0.83       441
   macro avg       0.65      0.55      0.55       441
weighted avg       0.79      0.83      0.79       441


Top Features:
                  Feature  Importance
9          MonthlyIncome    0.078772
0                    Age    0.064982
16     TotalWorkingYears    0.054929
1              DailyRate    0.051431
5             HourlyRate    0.049992
10           MonthlyRate    0.048017
19        YearsAtCompany    0.045208
2       DistanceFromHome    0.043131
22  YearsWithCurrManager    0.041797
43          OverTime_Yes    0.039565
