In [1]:
pip install prettytable lazypredict shap imbalanced-learn tensorflow

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting lazypredict
  Downloading lazypredict-0.2.12-py2.py3-none-any.whl (12 kB)
Collecting shap
  Downloading shap-0.41.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (572 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m572.4/572.4 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
Collecting slicer==0.0.7
  Downloading slicer-0.0.7-py3-none-any.whl (14 kB)
Installing collected packages: slicer, shap, lazypredict
Successfully installed lazypredict-0.2.12 shap-0.41.0 slicer-0.0.7


In [2]:
import pandas as pd
import seaborn as sb
import numpy as np
from matplotlib import pyplot as plt
import io
from sklearn.preprocessing import LabelEncoder,StandardScaler,MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, mean_squared_error,r2_score, recall_score, precision_score, f1_score
from math import sqrt
from imblearn.over_sampling import SMOTE
import warnings
warnings.filterwarnings("ignore")
from prettytable import PrettyTable
import shap
from lazypredict.Supervised import LazyRegressor
from datetime import datetime
import pickle
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

In [3]:
emp_past_data = pd.read_csv('past_emp_data_updated.csv',sep=r'\s*,\s*',header=0, engine='python') # removes the spaces in the data too to avoid naming issues (sep=r'\s*,\s*)
emp_past_data.head()

Unnamed: 0,age_category,gender,marital_status,educational_status,total_years_industry,years_work_current_hotel,number_of_years_current_role,department,joined_date,resigned_date,salary,opportunities,workload
0,20 - 30,Male,Single,Diploma holder,1 - 3 years,1 - 3 years,1 - 3 years,Food and Beverages,24/02/2021,03/03/2022,5,5,5
1,30 - 40,Male,Married with children,Diploma holder,5 - 10 years,1 - 3 years,1 - 3 years,Food and Beverages,19/02/2020,15/03/2022,1,5,5
2,20 - 30,Female,Single,Diploma holder,,1 - 3 years,1 - 3 years,Food and Beverages,20/03/2020,22/03/2022,5,5,4
3,30 - 40,Male,Married with children,Diploma holder,10 - 15 years,3 - 5 years,3 - 5 years,Maintenance,22/03/2017,02/04/2022,5,4,4
4,20 - 30,Female,Single,Diploma holder,,1 - 3 years,1 - 3 years,Front office,12/04/2020,06/05/2022,2,3,5


In [4]:
# summing up the worked days count using resigned date and joined date (resigned date-joined date)
days = []
for index, row in emp_past_data.iterrows():
    resigned_date = datetime.strptime(row['resigned_date'], '%d/%m/%Y')
    joined_date = datetime.strptime(row['joined_date'], '%d/%m/%Y')
    number_of_days = resigned_date-joined_date
    days.append(int(number_of_days.days))
emp_past_data['job_days']=days
emp_past_data =  emp_past_data.drop(columns=['joined_date','resigned_date'],inplace=False)
emp_past_data.head()

Unnamed: 0,age_category,gender,marital_status,educational_status,total_years_industry,years_work_current_hotel,number_of_years_current_role,department,salary,opportunities,workload,job_days
0,20 - 30,Male,Single,Diploma holder,1 - 3 years,1 - 3 years,1 - 3 years,Food and Beverages,5,5,5,372
1,30 - 40,Male,Married with children,Diploma holder,5 - 10 years,1 - 3 years,1 - 3 years,Food and Beverages,1,5,5,755
2,20 - 30,Female,Single,Diploma holder,,1 - 3 years,1 - 3 years,Food and Beverages,5,5,4,732
3,30 - 40,Male,Married with children,Diploma holder,10 - 15 years,3 - 5 years,3 - 5 years,Maintenance,5,4,4,1837
4,20 - 30,Female,Single,Diploma holder,,1 - 3 years,1 - 3 years,Front office,2,3,5,754


In [5]:
#for the categorical attributes
categorical_attributues = emp_past_data.loc[:, ['age_category','gender','marital_status','educational_status','total_years_industry','years_work_current_hotel','number_of_years_current_role','department']]
categorical_attributues.head()

Unnamed: 0,age_category,gender,marital_status,educational_status,total_years_industry,years_work_current_hotel,number_of_years_current_role,department
0,20 - 30,Male,Single,Diploma holder,1 - 3 years,1 - 3 years,1 - 3 years,Food and Beverages
1,30 - 40,Male,Married with children,Diploma holder,5 - 10 years,1 - 3 years,1 - 3 years,Food and Beverages
2,20 - 30,Female,Single,Diploma holder,,1 - 3 years,1 - 3 years,Food and Beverages
3,30 - 40,Male,Married with children,Diploma holder,10 - 15 years,3 - 5 years,3 - 5 years,Maintenance
4,20 - 30,Female,Single,Diploma holder,,1 - 3 years,1 - 3 years,Front office


In [6]:
categorical_attributues_data_types = pd.DataFrame(emp_past_data[categorical_attributues.columns].dtypes,columns=['Data Type']) # data types
categorical_attributues_cardinality = pd.DataFrame(emp_past_data[categorical_attributues.columns].nunique(),columns=['Cardinality']) # number of unique categories in the column
categorical_attributues_analytics =  categorical_attributues_data_types.join(categorical_attributues_cardinality)
categorical_attributues_analytics

Unnamed: 0,Data Type,Cardinality
age_category,object,3
gender,object,2
marital_status,object,3
educational_status,object,5
total_years_industry,object,6
years_work_current_hotel,object,6
number_of_years_current_role,object,6
department,object,5


In [8]:
#for the continuous attributes
continous_attributues = emp_past_data.loc[:, ['salary',	'opportunities'	,'workload']]
continous_attributues.head()

Unnamed: 0,salary,opportunities,workload
0,5,5,5
1,1,5,5
2,5,5,4
3,5,4,4
4,2,3,5


In [9]:
continous_attributues_data_types = pd.DataFrame(emp_past_data[continous_attributues.columns].dtypes,columns=['Data Type']) # data types
continous_attributues_cardinality = pd.DataFrame(emp_past_data[continous_attributues.columns].nunique(),columns=['Cardinality']) # number of unique categories in the column
continous_attributues_min = pd.DataFrame(emp_past_data[continous_attributues.columns].min(),columns=['Min']) # minimum value in the column
continous_attributues_max = pd.DataFrame(emp_past_data[continous_attributues.columns].max(),columns=['Max']) # maximum value in the column
continous_attributues_mean = pd.DataFrame(emp_past_data[continous_attributues.columns].mean(),columns=['Mean']) # mean value in the column

continous_attributues_analytics =  continous_attributues_data_types.join(continous_attributues_cardinality).join(continous_attributues_min).join(continous_attributues_max).join(continous_attributues_mean)
continous_attributues_analytics

Unnamed: 0,Data Type,Cardinality,Min,Max,Mean
salary,int64,5,1,5,4.05
opportunities,int64,5,1,5,4.34
workload,int64,5,1,5,4.3


In [10]:
corr_matrix = emp_past_data.corr().abs()
upper_trangle = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))
to_drop = [column for column in upper_trangle.columns if any(upper_trangle[column] > 0.95)]
print("columns to drop: ",to_drop)

columns to drop:  []


In [11]:
emp_past_data.isna().sum() # this step shows the number of missing/null values in the dataset

age_category                     0
gender                           0
marital_status                   0
educational_status               1
total_years_industry            22
years_work_current_hotel         0
number_of_years_current_role     0
department                       2
salary                           0
opportunities                    0
workload                         0
job_days                         0
dtype: int64

In [12]:
emp_past_data['total_years_industry'] = emp_past_data['total_years_industry'].fillna(emp_past_data['total_years_industry'].mode()[0])
emp_past_data['educational_status'] = emp_past_data['educational_status'].fillna(emp_past_data['educational_status'].mode()[0])
emp_past_data['department'] = emp_past_data['department'].fillna(emp_past_data['department'].mode()[0])
emp_past_data.isna().sum()

age_category                    0
gender                          0
marital_status                  0
educational_status              0
total_years_industry            0
years_work_current_hotel        0
number_of_years_current_role    0
department                      0
salary                          0
opportunities                   0
workload                        0
job_days                        0
dtype: int64

In [13]:
# using pandas get_dummies method non-numerical values encords into numerical values based on One-Hot technique
non_numeric_value_cols =['age_category','gender','marital_status','educational_status','total_years_industry','years_work_current_hotel','number_of_years_current_role','department']
encoded_emp_data = pd.DataFrame(pd.get_dummies(emp_past_data, dummy_na=False,columns=non_numeric_value_cols,dtype=np.int64))

encoded_emp_data.dtypes

salary                                             int64
opportunities                                      int64
workload                                           int64
job_days                                           int64
age_category_20 - 30                               int64
age_category_30 - 40                               int64
age_category_Above 40                              int64
gender_Female                                      int64
gender_Male                                        int64
marital_status_Married with children               int64
marital_status_Married without children            int64
marital_status_Single                              int64
educational_status_A/L passer                      int64
educational_status_Below O/L                       int64
educational_status_Degree holder                   int64
educational_status_Diploma holder                  int64
educational_status_O/L passer                      int64
total_years_industry_1 - 3 year

In [14]:
encoded_emp_data.head()

Unnamed: 0,salary,opportunities,workload,job_days,age_category_20 - 30,age_category_30 - 40,age_category_Above 40,gender_Female,gender_Male,marital_status_Married with children,marital_status_Married without children,marital_status_Single,educational_status_A/L passer,educational_status_Below O/L,educational_status_Degree holder,educational_status_Diploma holder,educational_status_O/L passer,total_years_industry_1 - 3 years,total_years_industry_10 - 15 years,total_years_industry_15 years and above,total_years_industry_3 - 5 years,total_years_industry_5 - 10 years,total_years_industry_Less than 1 year,years_work_current_hotel_1 - 3 years,years_work_current_hotel_10 - 15 years,years_work_current_hotel_15 years and above,years_work_current_hotel_3 - 5 years,years_work_current_hotel_5 - 10 years,years_work_current_hotel_Less than 1 year,number_of_years_current_role_1 - 3 years,number_of_years_current_role_10 - 15 years,number_of_years_current_role_15 years and above,number_of_years_current_role_3 - 5 years,number_of_years_current_role_5 - 10 years,number_of_years_current_role_Less than 1 year,department_Food and Beverages,department_Front office,department_Housekeeping,department_Maintenance,department_Security
0,5,5,5,372,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0
1,1,5,5,755,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0
2,5,5,4,732,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0
3,5,4,4,1837,0,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0
4,2,3,5,754,1,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0


In [15]:
print("before removal dupplication data: ",encoded_emp_data.shape[0])
encoded_emp_data.drop_duplicates(inplace = True)
print("after removal dupplication data: ",encoded_emp_data.shape[0])

before removal dupplication data:  61
after removal dupplication data:  61


In [16]:
encoded_emp_satisfaction_data_features = encoded_emp_data.drop("job_days", axis=1)
encoded_emp_satisfaction_data_output = encoded_emp_data[["job_days"]]

min_max_scaler_features=MinMaxScaler()
min_max_scaler_features.fit(encoded_emp_satisfaction_data_features)

encoded_balanced_mx_emp_features = pd.DataFrame(min_max_scaler_features.transform(encoded_emp_satisfaction_data_features.to_numpy()),columns=encoded_emp_satisfaction_data_features.columns)

min_max_scaler_output=MinMaxScaler()
min_max_scaler_output.fit(encoded_emp_satisfaction_data_output)

encoded_balanced_mx_emp_output = pd.DataFrame()
encoded_balanced_mx_emp_output['job_days'] = pd.DataFrame(min_max_scaler_output.transform(encoded_emp_satisfaction_data_output.to_numpy()))

encoded_balanced_mx_scaled_emp_data= encoded_balanced_mx_emp_features.join(encoded_balanced_mx_emp_output)
encoded_balanced_mx_scaled_emp_data.head()

Unnamed: 0,salary,opportunities,workload,age_category_20 - 30,age_category_30 - 40,age_category_Above 40,gender_Female,gender_Male,marital_status_Married with children,marital_status_Married without children,marital_status_Single,educational_status_A/L passer,educational_status_Below O/L,educational_status_Degree holder,educational_status_Diploma holder,educational_status_O/L passer,total_years_industry_1 - 3 years,total_years_industry_10 - 15 years,total_years_industry_15 years and above,total_years_industry_3 - 5 years,total_years_industry_5 - 10 years,total_years_industry_Less than 1 year,years_work_current_hotel_1 - 3 years,years_work_current_hotel_10 - 15 years,years_work_current_hotel_15 years and above,years_work_current_hotel_3 - 5 years,years_work_current_hotel_5 - 10 years,years_work_current_hotel_Less than 1 year,number_of_years_current_role_1 - 3 years,number_of_years_current_role_10 - 15 years,number_of_years_current_role_15 years and above,number_of_years_current_role_3 - 5 years,number_of_years_current_role_5 - 10 years,number_of_years_current_role_Less than 1 year,department_Food and Beverages,department_Front office,department_Housekeeping,department_Maintenance,department_Security,job_days
0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.03
1,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.1
2,1.0,1.0,0.75,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.09
3,1.0,0.75,0.75,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.28
4,0.25,0.5,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.1


In [17]:
encoded_features_train, encoded_features_test , encoded_output_train, encoded_output_test = train_test_split(encoded_emp_data.drop("job_days", axis=1),encoded_emp_data['job_days'],train_size=0.8) # encoded , balanced
mx_features_train, mx_features_test , mx_output_train, mx_output_test = train_test_split(encoded_balanced_mx_scaled_emp_data.drop("job_days", axis=1),encoded_balanced_mx_scaled_emp_data['job_days'],train_size=0.8) # encoded , balanced, sclaed

In [18]:
lazy_regressor = LazyRegressor(verbose=0,ignore_warnings=False, custom_metric=None )
models,predictions = lazy_regressor.fit(encoded_features_train, encoded_features_test, encoded_output_train, encoded_output_test)
models.head(250)

100%|██████████| 42/42 [00:03<00:00, 13.97it/s]


Unnamed: 0_level_0,Adjusted R-Squared,R-Squared,RMSE,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Lars,10.93,-21.35,8357.13,0.03
GaussianProcessRegressor,1.78,-0.76,2347.87,0.02
LinearSVR,1.77,-0.73,2326.18,0.01
MLPRegressor,1.76,-0.71,2312.59,0.12
LarsCV,1.67,-0.52,2178.19,0.09
QuantileRegressor,1.54,-0.21,1946.46,0.11
SVR,1.54,-0.21,1945.08,0.01
NuSVR,1.47,-0.05,1812.24,0.01
DummyRegressor,1.45,-0.01,1777.02,0.04
LGBMRegressor,1.3,0.33,1448.07,0.06


In [19]:
mx_lazy_regressor = LazyRegressor(verbose=0,ignore_warnings=False, custom_metric=None )
mx_models,mx_predictions = mx_lazy_regressor.fit(mx_features_train, mx_features_test, mx_output_train, mx_output_test)
mx_models.head(150)

 31%|███       | 13/42 [00:00<00:01, 21.05it/s]

GammaRegressor model failed to execute
Some value(s) of y are out of the valid range of the loss 'HalfGammaLoss'.


100%|██████████| 42/42 [00:01<00:00, 27.54it/s]


Unnamed: 0_level_0,Adjusted R-Squared,R-Squared,RMSE,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Lars,20.26,-42.34,2.11,0.02
MLPRegressor,3.21,-3.98,0.72,0.04
GaussianProcessRegressor,1.85,-0.92,0.44,0.02
QuantileRegressor,1.64,-0.44,0.38,0.06
DummyRegressor,1.51,-0.16,0.35,0.01
ElasticNet,1.51,-0.16,0.35,0.01
LassoLars,1.51,-0.16,0.35,0.01
Lasso,1.51,-0.16,0.35,0.01
HistGradientBoostingRegressor,1.29,0.36,0.26,0.07
LGBMRegressor,1.29,0.36,0.26,0.02


In [20]:
from xgboost import XGBRFRegressor
from sklearn.tree import ExtraTreeRegressor,DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor,BaggingRegressor,RandomForestRegressor,GradientBoostingRegressor
from sklearn.linear_model import ElasticNetCV,LassoLarsIC,LassoCV,OrthogonalMatchingPursuitCV,GammaRegressor,RidgeCV,PassiveAggressiveRegressor

In [29]:
ompcv =OrthogonalMatchingPursuitCV()
abr = AdaBoostRegressor()
gmr = GammaRegressor()
gbr = GradientBoostingRegressor()
rcv = RidgeCV() 
par = PassiveAggressiveRegressor()
rfr = RandomForestRegressor()


results_table = PrettyTable(["Algorithm Name","Train Score(R2 Score)","Test Score(R2 Score)", "MSE Score"])
for model in (ompcv,abr,gmr,gbr,rcv,par,rfr):


  model.fit(encoded_features_train,encoded_output_train)
  model_predictions_std = model.predict(encoded_features_test)

  model_train_score = model.score(encoded_features_train,encoded_output_train)
  model_test_score = model.score(encoded_features_test,encoded_output_test)
  model_mse_score=mean_squared_error(encoded_output_test, model_predictions_std)

  results_table.add_row([model.__class__.__name__,round(model_train_score*100,2), round(model_test_score*100,2), round(model_mse_score*100,2)])

print(results_table)

+-----------------------------+-----------------------+----------------------+-------------+
|        Algorithm Name       | Train Score(R2 Score) | Test Score(R2 Score) |  MSE Score  |
+-----------------------------+-----------------------+----------------------+-------------+
| OrthogonalMatchingPursuitCV |         84.45         |        80.46         | 61051044.72 |
|      AdaBoostRegressor      |         98.56         |        95.92         | 12747053.56 |
|        GammaRegressor       |         62.57         |         38.3         | 239254794.7 |
|  GradientBoostingRegressor  |         99.83         |        96.39         | 11289192.83 |
|           RidgeCV           |         98.48         |        95.99         | 12546692.05 |
|  PassiveAggressiveRegressor |         45.16         |        36.04         | 199885720.5 |
|    RandomForestRegressor    |         96.57         |        92.08         | 24760870.68 |
+-----------------------------+-----------------------+---------------

In [25]:
mx_etr = ExtraTreeRegressor()
mx_encv = ElasticNetCV()
mx_abr = AdaBoostRegressor()
mx_gbr = GradientBoostingRegressor()
mx_bgr = BaggingRegressor()
mx_lcv =  LassoCV()
mx_xgbr = XGBRFRegressor()
mx_rfr = RandomForestRegressor()


mx_results_table =PrettyTable(["Algorithm Name","Train Score(R2 Score)","Test Score(R2 Score)", "MSE Score"])
for model_mx in (mx_etr,mx_encv,mx_abr,mx_gbr,mx_bgr,mx_lcv,mx_xgbr,mx_rfr):

  model_mx.fit(mx_features_train,mx_output_train)
  model_predictions_mx = model_mx.predict(mx_features_test)

  model_train_score = model_mx.score(mx_features_train,mx_output_train)
  model_test_score = model_mx.score(mx_features_test,mx_output_test)
  model_mse_score=mean_squared_error(mx_output_test, model_predictions_mx)

  mx_results_table.add_row([model_mx.__class__.__name__,round(model_train_score*100,2), round(model_test_score*100,2), round(model_mse_score*100,2)])

print(mx_results_table)

+---------------------------+-----------------------+----------------------+-----------+
|       Algorithm Name      | Train Score(R2 Score) | Test Score(R2 Score) | MSE Score |
+---------------------------+-----------------------+----------------------+-----------+
|     ExtraTreeRegressor    |         99.93         |         91.4         |    0.89   |
|        ElasticNetCV       |         98.82         |        94.25         |    0.59   |
|     AdaBoostRegressor     |          98.0         |        93.43         |    0.68   |
| GradientBoostingRegressor |         99.84         |        89.68         |    1.06   |
|      BaggingRegressor     |         95.47         |        80.42         |    2.02   |
|          LassoCV          |         98.79         |        94.69         |    0.55   |
|       XGBRFRegressor      |          99.0         |         95.9         |    0.42   |
|   RandomForestRegressor   |         97.89         |        94.44         |    0.57   |
+--------------------

In [40]:
best_params ={
    'max_depth': 250,
    'n_estimators': 150}

tuned_results_table = PrettyTable(["Algorithm Name","Train Score(R2 Score)","Test Score(R2 Score)","MSE Score"])

tuned_classifier = XGBRFRegressor(**best_params)

tuned_classifier.fit(mx_features_train,mx_output_train)
model_predictions = tuned_classifier.predict(mx_features_test)

model_train_score = tuned_classifier.score(mx_features_train,mx_output_train)
model_test_score = tuned_classifier.score(mx_features_test,mx_output_test)
model_mse_score=mean_squared_error(mx_output_test, model_predictions)

tuned_results_table.add_row([tuned_classifier.__class__.__name__,round(model_train_score*100,2), round(model_test_score*100,2), round(model_mse_score*100,2)])

print(tuned_results_table)

+----------------+-----------------------+----------------------+-----------+
| Algorithm Name | Train Score(R2 Score) | Test Score(R2 Score) | MSE Score |
+----------------+-----------------------+----------------------+-----------+
| XGBRFRegressor |         99.43         |        96.33         |    0.38   |
+----------------+-----------------------+----------------------+-----------+


In [42]:
pickle.dump(tuned_classifier, open('employee_turnover_days_xgb_regression_updated_model.pkl', 'wb'))
pickle.dump(min_max_scaler_features, open('employee_turnover_days_features_min_max_feature_updated_scaler.pkl', 'wb'))
pickle.dump(min_max_scaler_output, open('employee_turnover_days_features_min_max_output_updated_scaler.pkl', 'wb'))