## Importing Modules

In [1]:
import pandas as pd      
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.pipeline import Pipeline
import pickle

### Loading Dataset

In [2]:
df = pd.read_csv("HR_Dataset.csv")
df.head()

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,left,promotion_last_5years,Departments,salary
0,0.38,0.53,2,157,3,0,1,0,sales,low
1,0.8,0.86,5,262,6,0,1,0,sales,medium
2,0.11,0.88,7,272,4,0,1,0,sales,medium
3,0.72,0.87,5,223,5,0,1,0,sales,low
4,0.37,0.52,2,159,3,0,1,0,sales,low


### Data Insights

In [3]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
satisfaction_level,14999.0,0.612834,0.248631,0.09,0.44,0.64,0.82,1.0
last_evaluation,14999.0,0.716102,0.171169,0.36,0.56,0.72,0.87,1.0
number_project,14999.0,3.803054,1.232592,2.0,3.0,4.0,5.0,7.0
average_montly_hours,14999.0,201.050337,49.943099,96.0,156.0,200.0,245.0,310.0
time_spend_company,14999.0,3.498233,1.460136,2.0,3.0,3.0,4.0,10.0
Work_accident,14999.0,0.14461,0.351719,0.0,0.0,0.0,0.0,1.0
left,14999.0,0.238083,0.425924,0.0,0.0,0.0,0.0,1.0
promotion_last_5years,14999.0,0.021268,0.144281,0.0,0.0,0.0,0.0,1.0


In [4]:
df.describe(include="object").T

Unnamed: 0,count,unique,top,freq
Departments,14999,10,sales,4140
salary,14999,3,low,7316


In [5]:
df.duplicated().sum()

3008

In [6]:
df.drop_duplicates(keep='first', inplace=True)

In [7]:
df.shape

(11991, 10)

In [8]:
df.columns=[i.lower().strip() for i in df.columns]
for i in df.columns:
    print(i)

satisfaction_level
last_evaluation
number_project
average_montly_hours
time_spend_company
work_accident
left
promotion_last_5years
departments
salary


In [10]:
salary_oe = OrdinalEncoder(categories=[['low', 'medium', 'high']])
departments_oe = OrdinalEncoder()

df['salary'] = salary_oe.fit_transform(df[['salary']])
df['departments'] = departments_oe.fit_transform(df[['departments']])

In [11]:
X = df.drop(['left'],axis=1)
y = df['left']

### K-Nearest Neighbor (K-NN)

In [13]:
operations = [("scaler", StandardScaler()), ("knn", KNeighborsClassifier(n_neighbors=2, p = 3, weights= "uniform"))]

final_model = Pipeline(steps=operations)

final_model.fit(X, y)

Pipeline(steps=[('scaler', StandardScaler()),
                ('knn', KNeighborsClassifier(n_neighbors=2, p=3))])

In [14]:
pickle.dump(final_model, open('knn_model', 'wb'))

### Random Forest Classifier

In [15]:
operations = [("scaler", StandardScaler()), ("rf_model", RandomForestClassifier(class_weight='balanced', max_depth=3,
                                        max_features=4, max_samples=0.8,
                                        min_samples_leaf=3, n_estimators=128))]

final_model = Pipeline(steps=operations)

final_model.fit(X, y)

Pipeline(steps=[('scaler', StandardScaler()),
                ('rf_model',
                 RandomForestClassifier(class_weight='balanced', max_depth=3,
                                        max_features=4, max_samples=0.8,
                                        min_samples_leaf=3,
                                        n_estimators=128))])

In [16]:
pickle.dump(final_model, open('RF_model', 'wb'))

### XGBoost Classifier

In [17]:
operations = [("scaler", StandardScaler()), ("xgb", XGBClassifier(random_state=101))]

operations = [("scaler", StandardScaler()), ("xgb", XGBClassifier(colsample_bytree= 1, learning_rate= 0.2, max_depth= 4,
                                                                  n_estimators= 100, subsample= 0.8))]

final_model = Pipeline(steps=operations)

final_model.fit(X, y)

Pipeline(steps=[('scaler', StandardScaler()),
                ('xgb',
                 XGBClassifier(base_score=None, booster=None, callbacks=None,
                               colsample_bylevel=None, colsample_bynode=None,
                               colsample_bytree=1, early_stopping_rounds=None,
                               enable_categorical=False, eval_metric=None,
                               feature_types=None, gamma=None, gpu_id=None,
                               grow_policy=None, importance_type=None,
                               interaction_constraints=None, learning_rate=0.2,
                               max_bin=None, max_cat_threshold=None,
                               max_cat_to_onehot=None, max_delta_step=None,
                               max_depth=4, max_leaves=None,
                               min_child_weight=None, missing=nan,
                               monotone_constraints=None, n_estimators=100,
                               n_jobs=None, 

In [23]:
pickle.dump(final_model, open('xgb_model', 'wb'))

## Model Deployement

### Save and Export the Model

In [20]:
mustafa_knn_model = pickle.load(open('knn_model', 'rb'))

In [21]:
mustafa_rf_model = pickle.load(open('rf_model', 'rb'))

In [24]:
mustafa_xgb_model = pickle.load(open('xgb_model', 'rb'))

In [25]:
my_dict = {
    'satisfaction_level': 0.55,
    'last_evaluation': 0.75,
    'number_project': 4,
    'average_montly_hours': 200,
    'time_spend_company': 3,
    'work_accident': 0,
    'promotion_last_5years': 1,  
    'departments': "IT",
    'salary': "medium"
}

In [28]:
df_pre = pd.DataFrame.from_dict([my_dict])
df_pre

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,work_accident,promotion_last_5years,departments,salary
0,0.55,0.75,4,200,3,0,1,IT,medium


In [29]:
salary_oe = OrdinalEncoder(categories=[['low', 'medium', 'high']])
departments_oe = OrdinalEncoder()

df_pre['salary'] = salary_oe.fit_transform(df_pre[['salary']])
df_pre['departments'] = departments_oe.fit_transform(df_pre[['departments']])

In [30]:
scaler = StandardScaler()
df_pre_scaled = scaler.fit_transform(df_pre)

In [31]:
prediction_knn = mustafa_knn_model.predict(df_pre_scaled)

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


In [32]:
if prediction_knn[0] == 1:
    print("Your employee is predicted to not churn (churn = 0).")
else:
    print("Your employee is predicted to churn (churn = 1).")

Your employee is predicted to churn (churn = 1).


In [37]:
prediction_rf = mustafa_rf_model.predict(df_pre_scaled)



In [36]:
if prediction_rf[0] == 1:
    print("Your employee is predicted to not churn (churn = 0).")
else:
    print("Your employee is predicted to churn (churn = 1).")

Your employee is predicted to not churn (churn = 0).


In [38]:
prediction_xgb = mustafa_xgb_model.predict(df_pre_scaled)



In [39]:
if prediction_xgb[0] == 1:
    print("Your employee is predicted to not churn (churn = 0).")
else:
    print("Your employee is predicted to churn (churn = 1).")

Your employee is predicted to churn (churn = 1).
