In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

In [12]:
turn = pd.read_csv('FAU_Bank_Turnover.csv')
turn.head()

Unnamed: 0,job_satisfaction_level,engagement_with_task,last_performance_evaluation,completed_projects,average_working_hours_monthly,years_spent_with_company,received_support,left,promotion_last_5years,job_role,salary
0,0.38,0.19,0.53,2,157,3,0,1,0,investment_banker,low
1,0.8,0.72,0.86,5,262,6,0,1,0,investment_banker,medium
2,0.11,0.82,0.88,7,272,4,0,1,0,investment_banker,medium
3,0.72,0.66,0.87,5,223,5,0,1,0,investment_banker,low
4,0.37,0.52,0.52,2,159,3,0,1,0,investment_banker,low


In [8]:
scaler = MinMaxScaler()
turn['job_satisfaction_level_normalized'] = scaler.fit_transform(turn[['job_satisfaction_level']])
turn['last_performance_evaluation_normalized'] = scaler.fit_transform(turn[['last_performance_evaluation']])

In [10]:
turn['job_satisfaction_binned'] = pd.cut(turn['job_satisfaction_level_normalized'], bins=5, labels=False)
turn['performance_evaluation_binned'] = pd.cut(turn['last_performance_evaluation_normalized'], bins=5, labels=False)

In [14]:
# Feature Engineering: Create new feature combining completed projects and working hours
turn['project_hours_interaction'] = turn['completed_projects'] * turn['average_working_hours_monthly']


In [16]:

salary_mapping = {'low': 1, 'medium': 2, 'high': 3}
turn['salary_num'] = turn['salary'].map(salary_mapping)

In [20]:
job_role_encoder = LabelEncoder()
turn['job_role_num'] = job_role_encoder.fit_transform(turn['job_role'])

In [22]:
# Dropped unnecessary columns
turn = turn.drop(columns=['job_satisfaction_level', 'last_performance_evaluation', 'job_role', 'salary'])

In [24]:
# Defined features and target
X = turn.drop(columns=['left'])
y = turn['left']

In [26]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [28]:
# Train a Random Forest Classifier
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

In [30]:
# Make predictions
y_pred = rf_model.predict(X_test)

In [32]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)
feature_importances = rf_model.feature_importances_

In [34]:
# Print evaluation metrics
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")
print("\nConfusion Matrix:\n", conf_matrix)
print("\nClassification Report:\n", class_report)
print("\nFeature Importances:\n", feature_importances)

Accuracy: 0.96
Precision: 0.93
Recall: 0.91
F1 Score: 0.92

Confusion Matrix:
 [[2245   49]
 [  62  644]]

Classification Report:
               precision    recall  f1-score   support

           0       0.97      0.98      0.98      2294
           1       0.93      0.91      0.92       706

    accuracy                           0.96      3000
   macro avg       0.95      0.95      0.95      3000
weighted avg       0.96      0.96      0.96      3000


Feature Importances:
 [0.05330035 0.16161975 0.16515913 0.21616937 0.01197379 0.00198079
 0.3415851  0.01856245 0.02964928]


In [36]:
# Analyzed of feature importances
feature_importance_turn = pd.DataFrame({'feature': X.columns, 'importance': feature_importances}).sort_values(by='importance', ascending=False)
print("\nFeature Importance Analysis:\n", feature_importance_turn)


Feature Importance Analysis:
                          feature  importance
6      project_hours_interaction    0.341585
3       years_spent_with_company    0.216169
2  average_working_hours_monthly    0.165159
1             completed_projects    0.161620
0           engagement_with_task    0.053300
8                   job_role_num    0.029649
7                     salary_num    0.018562
4               received_support    0.011974
5          promotion_last_5years    0.001981
