# Import libraries

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.preprocessing import OneHotEncoder, StandardScaler, MinMaxScaler,RobustScaler

from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer, make_column_selector

from sklearn.linear_model import LogisticRegression

from sklearn.model_selection import train_test_split, cross_val_score, cross_validate

from imblearn.over_sampling import SMOTE

# Retrieve data

In [2]:
dataset = pd.read_csv('../raw_data/HR_Engagement_Sat_Sales_UpdatedV4.0.csv')

In [3]:
pd.set_option('display.max_columns', None)
dataset.head()

Unnamed: 0,ID,Name,Department,GEO,Role,Rising_Star,Will_Relocate,Critical,Trending Perf,Talent_Level,Validated_Talent_Level,Percent_Remote,EMP_Sat_OnPrem_1,EMP_Sat_OnPrem_2,EMP_Sat_OnPrem_3,EMP_Sat_OnPrem_4,EMP_Sat_OnPrem_5,EMP_Sat_Remote_1,EMP_Sat_Remote_2,EMP_Sat_Remote_3,EMP_Sat_Remote_4,EMP_Sat_Remote_5,EMP_Engagement_1,EMP_Engagement_2,EMP_Engagement_3,EMP_Engagement_4,EMP_Engagement_5,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,left_Company,CSR Factor,promotion_last_5years,sales,salary,Gender,LinkedIn_Hits,Emp_Work_Status2,Emp_Work_Status_3,Emp_Work_Status_4,Emp_Work_Status_5,Emp_Identity,Emp_Role,Emp_Position,Emp_Title,Women_Leave,Men_Leave,Emp_Competitive_1,Emp_Competitive_2,Emp_Competitive_3,Emp_Competitive_4,Emp_Competitive_5,Emp_Collaborative_1,Emp_Collaborative_2,Emp_Collaborative_3,Emp_Collaborative_4,Emp_Collaborative_5,Sensor_StepCount,Sensor_Heartbeat(Average/Min),Sensor_Proximity(1-highest/10-lowest)
0,1,BRADDY,Operations,US,VP,,0,1.0,3,6,6,0.4,3.0,3.0,4.0,3.0,5.0,2,3,2,3,2,5,5,4,4,3,0.36,3,168,3,1,0,,0,sales,low,M,5,4,3,1,1,1,1,2,1,,1.0,2,4,2,2,2,2,2,2,1,5,1841,61,9
1,2,BORST,Sales,UK,Senior Director,,0,,3,6,6,0.4,3.0,3.0,4.0,3.0,5.0,2,3,2,3,2,5,5,4,4,3,0.36,5,159,2,0,0,,0,accounting,low,F,58,3,3,4,3,2,1,2,1,0.0,,2,2,1,4,1,3,2,5,1,5,1990,90,8
2,3,BIRDWELL,Finance,France,Senior Director,,0,,3,6,6,0.4,3.0,3.0,4.0,3.0,5.0,2,3,2,3,2,5,5,4,4,3,0.36,2,282,3,0,0,,0,product_mng,medium,F,42,2,4,1,3,1,1,2,2,1.0,,1,5,1,2,5,2,2,4,4,4,3193,80,8
3,4,BENT,Human Resources,China,Senior Director,,0,,3,6,6,0.4,3.0,3.0,4.0,3.0,5.0,2,3,2,3,2,5,5,4,4,3,0.36,4,278,4,1,0,,0,product_mng,high,M,37,3,1,4,5,2,1,2,1,,1.0,1,1,2,5,1,1,2,4,4,1,3248,77,6
4,5,BAZAN,IT,Korea,Director,,0,,3,6,6,0.4,3.0,3.0,4.0,3.0,5.0,2,3,2,3,2,5,5,4,4,3,0.36,6,256,5,0,0,,0,sales,low,F,6,4,5,4,5,1,1,2,1,1.0,,1,4,1,5,3,5,1,2,5,4,3149,80,10


# Exploring the dataset

## Hypothesis 0 : Drop NAME column

In [4]:
print(f"The dataset shape BEFORE Hypothesis 0 : {dataset.shape}")
dataset = dataset.drop(columns=['Name'])
print(f"The following column has been dropped : 'Name'")
print(f"The dataset shape AFTER Hypothesis 0 : {dataset.shape}")

The dataset shape BEFORE Hypothesis 0 : (14999, 62)
The following column has been dropped : 'Name'
The dataset shape AFTER Hypothesis 0 : (14999, 61)


## Count non-NA/null values of dataset

In [5]:
def extract_col_fulfillment(dataframe : pd.DataFrame, 
                            above_below_equal : str = None, 
                            threshold : int = None):
    ''' 
    This function takes as inputs: 
    - a pandas dataframe
    - an optional string parameter (string parameter and integer parameter MUST BE defined if used)
    - an optional integer parameter (string parameter and integer parameter MUST BE defined if used)
    
     This function returns:
     - the percentage of fulfillment (with non NAN/null values)
         of each columns of input dataframe
     - the percentage of fulfillment (with non NAN/null values)
         of each columns of input dataframe (based on above_below_equal and threshold, is specified)
    '''
    
    assert isinstance(dataframe, pd.DataFrame), f"{dataframe} sould be a pandas dataframe"
    if above_below_equal != None :
        assert isinstance(above_below_equal, str), f"{above_below_equal} sould be a string"
        assert above_below_equal in ['below','above','equal'], f"{above_below_equal} sould be either 'below' or 'above'"
    if threshold != None:
        assert isinstance(threshold, int), f"{threshold} sould be an integer"
        
        
    fulfillment = (dataset.count()/len(dataset)*100).sort_values()
    
    if above_below_equal != None and threshold != None:
        if above_below_equal == 'above':
            fulfillment = fulfillment[fulfillment > threshold]
        elif above_below_equal == 'below':
            fulfillment = fulfillment[fulfillment < threshold]
        else:
            fulfillment = fulfillment[fulfillment == threshold]
    
    return fulfillment

In [6]:
# Threshold to extract columns based on fulfillment (in percent) 
# It can be used to extract columns with 'threshold'% of fulfillment (higher or lower -> See 'above_below' variable)
above_below_equal = 'below'
threshold = 100

In [7]:
# print(f"Columns whose number of non NAN/null values are {above_below} {threshold}% : \n ")
# print(extract_col_fulfillment(dataset,above_below_equal, threshold))

In [8]:
# Extraction of columns that have fulfillment lower/higher than threshold
fulfilled_cols = extract_col_fulfillment(dataset, above_below_equal, threshold)
fulfilled_cols_names = fulfilled_cols.index.to_list()
fulfilled_cols_names;

In [9]:
print(f"There are {len(fulfilled_cols_names)} columns, that are {above_below_equal} to {threshold}% : " )
extract_col_fulfillment(dataset,above_below_equal, threshold)

There are 10 columns, that are below to 100% : 


Rising_Star          0.140009
Critical             3.200213
CSR Factor          11.420761
Men_Leave           32.435496
Women_Leave         50.643376
EMP_Sat_OnPrem_5    99.146610
EMP_Sat_OnPrem_4    99.146610
EMP_Sat_OnPrem_3    99.146610
EMP_Sat_OnPrem_2    99.146610
EMP_Sat_OnPrem_1    99.146610
dtype: float64

## Hypothesis 1 : Drop low filled columns

In [10]:
print(f"The dataset shape BEFORE Hypothesis 1 : {dataset.shape}")
dataset = dataset.drop(columns=fulfilled_cols_names)
print(f"The following columns have been dropped : {fulfilled_cols_names}")
print(f"The dataset shape AFTER Hypothesis 1 : {dataset.shape}")

The dataset shape BEFORE Hypothesis 1 : (14999, 61)
The following columns have been dropped : ['Rising_Star', 'Critical', 'CSR Factor', 'Men_Leave', 'Women_Leave', 'EMP_Sat_OnPrem_5', 'EMP_Sat_OnPrem_4', 'EMP_Sat_OnPrem_3', 'EMP_Sat_OnPrem_2', 'EMP_Sat_OnPrem_1']
The dataset shape AFTER Hypothesis 1 : (14999, 51)


## Check unique values in each column

In [11]:
# Check for constant values <=> standard deviation == 0
zero_std_cols = dataset.std(axis=0, numeric_only=True)[dataset.std(axis=0, numeric_only=True) == 0]
zero_std_cols

Trending Perf             0.0
Talent_Level              0.0
Validated_Talent_Level    0.0
dtype: float64

In [12]:
#Check the mean value of the columns with standard deviation == 0
unique_value_cols = dataset.mean(axis=0, numeric_only=True)[zero_std_cols.index]

In [13]:
print(f"There are {len(zero_std_cols)} numerical columns which have a standard deviation equal to 0.\n")
print("These columns can be considered as unique value columns : \n")
print(unique_value_cols)

There are 3 numerical columns which have a standard deviation equal to 0.

These columns can be considered as unique value columns : 

Trending Perf             3.0
Talent_Level              6.0
Validated_Talent_Level    6.0
dtype: float64


In [14]:
unique_value_cols_names = unique_value_cols.index.to_list()
unique_value_cols_names

['Trending Perf', 'Talent_Level', 'Validated_Talent_Level']

## Hypothesis 2 : Drop columns with unique single value

In [15]:
print(f"The dataset shape BEFORE Hypothesis 2 : {dataset.shape}")
dataset = dataset.drop(columns=unique_value_cols_names)
print(f"The following columns have been dropped : {unique_value_cols_names}")
print(f"The dataset shape AFTER Hypothesis 2 : {dataset.shape}")

The dataset shape BEFORE Hypothesis 2 : (14999, 51)
The following columns have been dropped : ['Trending Perf', 'Talent_Level', 'Validated_Talent_Level']
The dataset shape AFTER Hypothesis 2 : (14999, 48)


In [16]:
dataset.head()

Unnamed: 0,ID,Department,GEO,Role,Will_Relocate,Percent_Remote,EMP_Sat_Remote_1,EMP_Sat_Remote_2,EMP_Sat_Remote_3,EMP_Sat_Remote_4,EMP_Sat_Remote_5,EMP_Engagement_1,EMP_Engagement_2,EMP_Engagement_3,EMP_Engagement_4,EMP_Engagement_5,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,left_Company,promotion_last_5years,sales,salary,Gender,LinkedIn_Hits,Emp_Work_Status2,Emp_Work_Status_3,Emp_Work_Status_4,Emp_Work_Status_5,Emp_Identity,Emp_Role,Emp_Position,Emp_Title,Emp_Competitive_1,Emp_Competitive_2,Emp_Competitive_3,Emp_Competitive_4,Emp_Competitive_5,Emp_Collaborative_1,Emp_Collaborative_2,Emp_Collaborative_3,Emp_Collaborative_4,Emp_Collaborative_5,Sensor_StepCount,Sensor_Heartbeat(Average/Min),Sensor_Proximity(1-highest/10-lowest)
0,1,Operations,US,VP,0,0.4,2,3,2,3,2,5,5,4,4,3,0.36,3,168,3,1,0,0,sales,low,M,5,4,3,1,1,1,1,2,1,2,4,2,2,2,2,2,2,1,5,1841,61,9
1,2,Sales,UK,Senior Director,0,0.4,2,3,2,3,2,5,5,4,4,3,0.36,5,159,2,0,0,0,accounting,low,F,58,3,3,4,3,2,1,2,1,2,2,1,4,1,3,2,5,1,5,1990,90,8
2,3,Finance,France,Senior Director,0,0.4,2,3,2,3,2,5,5,4,4,3,0.36,2,282,3,0,0,0,product_mng,medium,F,42,2,4,1,3,1,1,2,2,1,5,1,2,5,2,2,4,4,4,3193,80,8
3,4,Human Resources,China,Senior Director,0,0.4,2,3,2,3,2,5,5,4,4,3,0.36,4,278,4,1,0,0,product_mng,high,M,37,3,1,4,5,2,1,2,1,1,1,2,5,1,1,2,4,4,1,3248,77,6
4,5,IT,Korea,Director,0,0.4,2,3,2,3,2,5,5,4,4,3,0.36,6,256,5,0,0,0,sales,low,F,6,4,5,4,5,1,1,2,1,1,4,1,5,3,5,1,2,5,4,3149,80,10


In [17]:
# correlation_df = dataset.drop(columns=['left_Company'])
# sns.heatmap(correlation_df.corr(), cmap='coolwarm')

# Define X and y

In [18]:
dataset = dataset.drop(columns=['ID'])
dataset.head()

Unnamed: 0,Department,GEO,Role,Will_Relocate,Percent_Remote,EMP_Sat_Remote_1,EMP_Sat_Remote_2,EMP_Sat_Remote_3,EMP_Sat_Remote_4,EMP_Sat_Remote_5,EMP_Engagement_1,EMP_Engagement_2,EMP_Engagement_3,EMP_Engagement_4,EMP_Engagement_5,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,left_Company,promotion_last_5years,sales,salary,Gender,LinkedIn_Hits,Emp_Work_Status2,Emp_Work_Status_3,Emp_Work_Status_4,Emp_Work_Status_5,Emp_Identity,Emp_Role,Emp_Position,Emp_Title,Emp_Competitive_1,Emp_Competitive_2,Emp_Competitive_3,Emp_Competitive_4,Emp_Competitive_5,Emp_Collaborative_1,Emp_Collaborative_2,Emp_Collaborative_3,Emp_Collaborative_4,Emp_Collaborative_5,Sensor_StepCount,Sensor_Heartbeat(Average/Min),Sensor_Proximity(1-highest/10-lowest)
0,Operations,US,VP,0,0.4,2,3,2,3,2,5,5,4,4,3,0.36,3,168,3,1,0,0,sales,low,M,5,4,3,1,1,1,1,2,1,2,4,2,2,2,2,2,2,1,5,1841,61,9
1,Sales,UK,Senior Director,0,0.4,2,3,2,3,2,5,5,4,4,3,0.36,5,159,2,0,0,0,accounting,low,F,58,3,3,4,3,2,1,2,1,2,2,1,4,1,3,2,5,1,5,1990,90,8
2,Finance,France,Senior Director,0,0.4,2,3,2,3,2,5,5,4,4,3,0.36,2,282,3,0,0,0,product_mng,medium,F,42,2,4,1,3,1,1,2,2,1,5,1,2,5,2,2,4,4,4,3193,80,8
3,Human Resources,China,Senior Director,0,0.4,2,3,2,3,2,5,5,4,4,3,0.36,4,278,4,1,0,0,product_mng,high,M,37,3,1,4,5,2,1,2,1,1,1,2,5,1,1,2,4,4,1,3248,77,6
4,IT,Korea,Director,0,0.4,2,3,2,3,2,5,5,4,4,3,0.36,6,256,5,0,0,0,sales,low,F,6,4,5,4,5,1,1,2,1,1,4,1,5,3,5,1,2,5,4,3149,80,10


In [19]:
dataset.shape

(14999, 47)

In [20]:
X = dataset.drop(columns=['left_Company'])
X.shape

(14999, 46)

In [21]:
y = dataset['left_Company']
y.shape

(14999,)

In [22]:
X.info();

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14999 entries, 0 to 14998
Data columns (total 46 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   Department                             14999 non-null  object 
 1   GEO                                    14999 non-null  object 
 2   Role                                   14999 non-null  object 
 3   Will_Relocate                          14999 non-null  int64  
 4   Percent_Remote                         14999 non-null  float64
 5   EMP_Sat_Remote_1                       14999 non-null  int64  
 6   EMP_Sat_Remote_2                       14999 non-null  int64  
 7   EMP_Sat_Remote_3                       14999 non-null  int64  
 8   EMP_Sat_Remote_4                       14999 non-null  int64  
 9   EMP_Sat_Remote_5                       14999 non-null  int64  
 10  EMP_Engagement_1                       14999 non-null  int64  
 11  EM

# Balancing of y (to be defined)

In [23]:
y.value_counts()/len(y)*100

left_Company
0    76.191746
1    23.808254
Name: count, dtype: float64

In [24]:
# smote = SMOTE()
# X_resampled, y_resampled = smote.fit_resample(X, y)

# Encoding object type columns

In [25]:
X_cat = X.select_dtypes(include=[object])
X_cat.head()

Unnamed: 0,Department,GEO,Role,sales,salary,Gender
0,Operations,US,VP,sales,low,M
1,Sales,UK,Senior Director,accounting,low,F
2,Finance,France,Senior Director,product_mng,medium,F
3,Human Resources,China,Senior Director,product_mng,high,M
4,IT,Korea,Director,sales,low,F


In [26]:
X_cat.shape

(14999, 6)

In [27]:
one_hot_encoder = OneHotEncoder(sparse_output = False, drop = "if_binary") 
one_hot_encoder.fit(X_cat)

In [28]:
# Display the generated names
print(f"The column names for the encoded values are {one_hot_encoder.get_feature_names_out()}")

The column names for the encoded values are ['Department_Finance' 'Department_Human Resources' 'Department_IT'
 'Department_Operations' 'Department_Sales' 'Department_Warehouse'
 'GEO_Australia' 'GEO_China' 'GEO_Colombia' 'GEO_France' 'GEO_Japan'
 'GEO_Korea' 'GEO_Turkey' 'GEO_UK' 'GEO_US' 'Role_Director' 'Role_Level 1'
 'Role_Level 2-4' 'Role_Manager' 'Role_Senior Director'
 'Role_Senior Manager' 'Role_VP' 'sales_IT' 'sales_RandD'
 'sales_accounting' 'sales_hr' 'sales_management' 'sales_marketing'
 'sales_product_mng' 'sales_sales' 'sales_support' 'sales_technical'
 'salary_high' 'salary_low' 'salary_medium' 'Gender_M']


In [29]:
# Transform the current column
X_cat[one_hot_encoder.get_feature_names_out()] = one_hot_encoder.transform(X_cat)
X_cat.head()

Unnamed: 0,Department,GEO,Role,sales,salary,Gender,Department_Finance,Department_Human Resources,Department_IT,Department_Operations,Department_Sales,Department_Warehouse,GEO_Australia,GEO_China,GEO_Colombia,GEO_France,GEO_Japan,GEO_Korea,GEO_Turkey,GEO_UK,GEO_US,Role_Director,Role_Level 1,Role_Level 2-4,Role_Manager,Role_Senior Director,Role_Senior Manager,Role_VP,sales_IT,sales_RandD,sales_accounting,sales_hr,sales_management,sales_marketing,sales_product_mng,sales_sales,sales_support,sales_technical,salary_high,salary_low,salary_medium,Gender_M
0,Operations,US,VP,sales,low,M,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0
1,Sales,UK,Senior Director,accounting,low,F,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,Finance,France,Senior Director,product_mng,medium,F,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,Human Resources,China,Senior Director,product_mng,high,M,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
4,IT,Korea,Director,sales,low,F,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0


In [30]:
X_cat.shape

(14999, 42)

In [31]:
X_cat.select_dtypes(include=[object]).columns

Index(['Department', 'GEO', 'Role', 'sales', 'salary', 'Gender'], dtype='object')

In [32]:
X_cat =X_cat.drop(columns=X_cat.select_dtypes(include=[object]).columns)
X_cat.head()

Unnamed: 0,Department_Finance,Department_Human Resources,Department_IT,Department_Operations,Department_Sales,Department_Warehouse,GEO_Australia,GEO_China,GEO_Colombia,GEO_France,GEO_Japan,GEO_Korea,GEO_Turkey,GEO_UK,GEO_US,Role_Director,Role_Level 1,Role_Level 2-4,Role_Manager,Role_Senior Director,Role_Senior Manager,Role_VP,sales_IT,sales_RandD,sales_accounting,sales_hr,sales_management,sales_marketing,sales_product_mng,sales_sales,sales_support,sales_technical,salary_high,salary_low,salary_medium,Gender_M
0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0
1,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
4,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0


In [33]:
X_cat.shape

(14999, 36)

In [34]:
X_cat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14999 entries, 0 to 14998
Data columns (total 36 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Department_Finance          14999 non-null  float64
 1   Department_Human Resources  14999 non-null  float64
 2   Department_IT               14999 non-null  float64
 3   Department_Operations       14999 non-null  float64
 4   Department_Sales            14999 non-null  float64
 5   Department_Warehouse        14999 non-null  float64
 6   GEO_Australia               14999 non-null  float64
 7   GEO_China                   14999 non-null  float64
 8   GEO_Colombia                14999 non-null  float64
 9   GEO_France                  14999 non-null  float64
 10  GEO_Japan                   14999 non-null  float64
 11  GEO_Korea                   14999 non-null  float64
 12  GEO_Turkey                  14999 non-null  float64
 13  GEO_UK                      149

In [35]:
X_cat.shape

(14999, 36)

In [36]:
X_cat.dtypes.value_counts()

float64    36
Name: count, dtype: int64

# Scaling

In [37]:
X_num = X.select_dtypes(exclude=[object])
X_num.head()

Unnamed: 0,Will_Relocate,Percent_Remote,EMP_Sat_Remote_1,EMP_Sat_Remote_2,EMP_Sat_Remote_3,EMP_Sat_Remote_4,EMP_Sat_Remote_5,EMP_Engagement_1,EMP_Engagement_2,EMP_Engagement_3,EMP_Engagement_4,EMP_Engagement_5,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,promotion_last_5years,LinkedIn_Hits,Emp_Work_Status2,Emp_Work_Status_3,Emp_Work_Status_4,Emp_Work_Status_5,Emp_Identity,Emp_Role,Emp_Position,Emp_Title,Emp_Competitive_1,Emp_Competitive_2,Emp_Competitive_3,Emp_Competitive_4,Emp_Competitive_5,Emp_Collaborative_1,Emp_Collaborative_2,Emp_Collaborative_3,Emp_Collaborative_4,Emp_Collaborative_5,Sensor_StepCount,Sensor_Heartbeat(Average/Min),Sensor_Proximity(1-highest/10-lowest)
0,0,0.4,2,3,2,3,2,5,5,4,4,3,0.36,3,168,3,1,0,5,4,3,1,1,1,1,2,1,2,4,2,2,2,2,2,2,1,5,1841,61,9
1,0,0.4,2,3,2,3,2,5,5,4,4,3,0.36,5,159,2,0,0,58,3,3,4,3,2,1,2,1,2,2,1,4,1,3,2,5,1,5,1990,90,8
2,0,0.4,2,3,2,3,2,5,5,4,4,3,0.36,2,282,3,0,0,42,2,4,1,3,1,1,2,2,1,5,1,2,5,2,2,4,4,4,3193,80,8
3,0,0.4,2,3,2,3,2,5,5,4,4,3,0.36,4,278,4,1,0,37,3,1,4,5,2,1,2,1,1,1,2,5,1,1,2,4,4,1,3248,77,6
4,0,0.4,2,3,2,3,2,5,5,4,4,3,0.36,6,256,5,0,0,6,4,5,4,5,1,1,2,1,1,4,1,5,3,5,1,2,5,4,3149,80,10


In [38]:
X_num.shape

(14999, 40)

## Check Standard Scaler

In [39]:
std_scaler = StandardScaler().set_output(transform="pandas")
std_scaler.fit(X_num)

In [71]:
X_num_std = std_scaler.transform(X_num)
X_num_std

Unnamed: 0,Will_Relocate,Percent_Remote,EMP_Sat_Remote_1,EMP_Sat_Remote_2,EMP_Sat_Remote_3,EMP_Sat_Remote_4,EMP_Sat_Remote_5,EMP_Engagement_1,EMP_Engagement_2,EMP_Engagement_3,EMP_Engagement_4,EMP_Engagement_5,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,promotion_last_5years,LinkedIn_Hits,Emp_Work_Status2,Emp_Work_Status_3,Emp_Work_Status_4,Emp_Work_Status_5,Emp_Identity,Emp_Role,Emp_Position,Emp_Title,Emp_Competitive_1,Emp_Competitive_2,Emp_Competitive_3,Emp_Competitive_4,Emp_Competitive_5,Emp_Collaborative_1,Emp_Collaborative_2,Emp_Collaborative_3,Emp_Collaborative_4,Emp_Collaborative_5,Sensor_StepCount,Sensor_Heartbeat(Average/Min),Sensor_Proximity(1-highest/10-lowest)
0,-0.999667,-1.121168,-1.082078,-1.470718,-1.470718,-1.311578,-1.443304,0.558997,0.558997,0.558997,0.558997,0.558997,-2.080478,-0.651538,-0.661782,-0.341235,2.432112,-0.147412,-0.713863,0.706023,-0.010208,-1.413786,-1.417496,-1.115417,-1.104762,0.001849,-1.156345,-0.151123,0.714878,-0.139760,-0.710533,-0.698923,-0.693458,0.374196,-0.708174,-1.421983,1.404261,-0.891785,-2.339462,1.004882
1,-0.999667,-1.121168,-1.082078,-1.470718,-1.470718,-1.311578,-1.443304,0.558997,0.558997,0.558997,0.558997,0.558997,-2.080478,0.971113,-0.841993,-1.026126,-0.411165,-0.147412,2.077564,-0.001320,-0.010208,0.700761,-0.002029,-0.138132,-1.104762,0.001849,-1.156345,-0.151123,-0.696807,-1.110253,0.711860,-1.407004,0.014490,0.374196,1.419469,-1.421983,1.404261,-0.806918,1.659100,0.618484
2,-0.999667,-1.121168,-1.082078,-1.470718,-1.470718,-1.311578,-1.443304,0.558997,0.558997,0.558997,0.558997,0.558997,-2.080478,-1.462863,1.620892,-0.341235,-0.411165,-0.147412,1.234869,-0.708664,0.695373,-1.413786,-0.002029,-1.115417,-1.104762,0.001849,0.011602,-1.119792,1.420721,-1.110253,-0.710533,1.425321,-0.693458,0.374196,0.710255,0.705460,0.696134,-0.121715,0.280286,0.618484
3,-0.999667,-1.121168,-1.082078,-1.470718,-1.470718,-1.311578,-1.443304,0.558997,0.558997,0.558997,0.558997,0.558997,-2.080478,0.159788,1.540798,0.343655,2.432112,-0.147412,0.971527,-0.001320,-1.421371,0.700761,1.413438,-0.138132,-1.104762,0.001849,-1.156345,-1.119792,-1.402650,-0.139760,1.423057,-1.407004,-1.401407,0.374196,0.710255,0.705460,-1.428244,-0.090388,-0.133359,-0.154312
4,-0.999667,-1.121168,-1.082078,-1.470718,-1.470718,-1.311578,-1.443304,0.558997,0.558997,0.558997,0.558997,0.558997,-2.080478,1.782438,1.100282,1.028546,-0.411165,-0.147412,-0.661195,0.706023,1.400954,0.700761,1.413438,-1.115417,-1.104762,0.001849,-1.156345,-1.119792,0.714878,-1.110253,1.423057,0.009158,1.430387,-0.781604,-0.708174,1.414607,0.696134,-0.146777,0.280286,1.391280
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14994,1.000333,1.974255,2.783762,0.679940,0.679940,2.131859,0.019308,-1.788917,-1.788917,-1.788917,-1.788917,-1.788917,1.658639,0.159788,1.020188,1.028546,-0.411165,-0.147412,-0.924537,-1.416007,-0.715789,-0.708937,-0.709763,-0.138132,-1.104762,0.001849,-1.156345,-1.119792,-1.402650,-0.139760,0.000664,-0.698923,0.722439,0.374196,-0.708174,-0.003688,-1.428244,-1.064368,1.383337,1.004882
14995,1.000333,1.974255,2.783762,0.679940,0.679940,2.131859,0.019308,-1.788917,-1.788917,-1.788917,-1.788917,-1.788917,1.658639,0.159788,0.619719,1.028546,-0.411165,-0.147412,-0.661195,-1.416007,-1.421371,-0.708937,0.705705,-1.115417,-0.115075,0.001849,-1.156345,-1.119792,1.420721,-1.110253,0.711860,0.717240,0.014490,-0.781604,0.710255,1.414607,1.404261,-1.230115,1.521219,-0.927108
14996,-0.999667,1.974255,2.783762,0.679940,0.679940,2.131859,0.019308,-1.788917,-1.788917,-1.788917,-1.788917,-1.788917,1.658639,0.971113,0.819954,1.028546,-0.411165,-0.147412,-0.766531,0.706023,-0.010208,-1.413786,1.413438,-1.115417,-1.104762,0.001849,-1.156345,-0.151123,-0.696807,-1.110253,-0.710533,0.717240,0.722439,0.374196,0.001040,0.705460,1.404261,-1.282516,1.383337,-1.699904
14997,-0.999667,1.974255,2.783762,0.679940,0.679940,2.131859,0.019308,-1.788917,-1.788917,-1.788917,-1.788917,-1.788917,1.658639,0.971113,0.439508,1.713436,-0.411165,-0.147412,-0.871868,-0.708664,-0.715789,-0.004088,-0.709763,-1.115417,-0.115075,-1.153740,-1.156345,-1.119792,-1.402650,-0.139760,-1.421729,1.425321,-0.693458,-0.781604,0.001040,1.414607,-1.428244,-1.251190,1.107575,-0.540710


In [41]:
X_num_std.describe()

Unnamed: 0,Will_Relocate,Percent_Remote,EMP_Sat_Remote_1,EMP_Sat_Remote_2,EMP_Sat_Remote_3,EMP_Sat_Remote_4,EMP_Sat_Remote_5,EMP_Engagement_1,EMP_Engagement_2,EMP_Engagement_3,EMP_Engagement_4,EMP_Engagement_5,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,promotion_last_5years,LinkedIn_Hits,Emp_Work_Status2,Emp_Work_Status_3,Emp_Work_Status_4,Emp_Work_Status_5,Emp_Identity,Emp_Role,Emp_Position,Emp_Title,Emp_Competitive_1,Emp_Competitive_2,Emp_Competitive_3,Emp_Competitive_4,Emp_Competitive_5,Emp_Collaborative_1,Emp_Collaborative_2,Emp_Collaborative_3,Emp_Collaborative_4,Emp_Collaborative_5,Sensor_StepCount,Sensor_Heartbeat(Average/Min),Sensor_Proximity(1-highest/10-lowest)
count,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0
mean,9.746928e-17,-3.031851e-16,1.819111e-16,2.425481e-16,2.425481e-16,1.21274e-16,-2.425481e-16,9.095553000000001e-17,-1.21274e-16,9.095553000000001e-17,9.095553000000001e-17,9.095553000000001e-17,-4.244592e-16,6.063702e-17,-6.063702e-17,-1.515926e-17,4.547777e-17,1.894907e-18,0.0,-9.474535e-18,-1.2316900000000001e-17,-5.210994e-17,-1.146419e-16,-1.21274e-16,0.0,9.095553000000001e-17,6.063702e-17,0.0,-1.847534e-17,-3.031851e-17,-2.439693e-17,-3.789814e-18,6.442684000000001e-17,-3.7898140000000005e-17,1.544349e-16,-6.016330000000001e-17,-1.6106710000000002e-17,-3.031851e-17,9.247146e-16,-6.063702e-17
std,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033
min,-0.9996667,-1.121168,-1.082078,-1.470718,-1.470718,-1.311578,-1.443304,-1.788917,-1.788917,-1.788917,-1.788917,-1.788917,-2.080478,-1.462863,-2.103471,-1.026126,-0.4111653,-0.1474118,-0.924537,-1.416007,-1.421371,-1.413786,-1.417496,-1.115417,-1.104762,-1.15374,-1.156345,-1.119792,-1.40265,-1.110253,-1.421729,-1.407004,-1.401407,-0.7816045,-1.417389,-1.421983,-1.428244,-1.370801,-2.477344,-2.086302
25%,-0.9996667,-1.121168,-1.082078,-1.470718,-1.470718,-1.311578,-1.443304,0.5589974,0.5589974,0.5589974,0.5589974,0.5589974,-0.9120042,-0.6515376,-0.9020633,-0.3412352,-0.4111653,-0.1474118,-0.713863,-0.7086637,-0.7157893,-0.7089373,-0.7097627,-1.115417,-1.104762,-1.15374,-1.156345,-1.119792,-0.6968073,-1.110253,-0.7105327,-0.6989228,-0.6934581,-0.7816045,-0.7081742,-0.7128354,-0.7201179,-0.969533,-0.4091216,-0.54071
50%,-0.9996667,0.9424472,0.2065355,0.67994,0.67994,0.4101407,0.7506139,0.5589974,0.5589974,0.5589974,0.5589974,0.5589974,0.02277508,0.1597876,-0.02103137,-0.3412352,-0.4111653,-0.1474118,-0.503189,-0.001320462,-0.01020809,-0.004088396,-0.002028972,-0.1381321,-0.115075,0.001849065,0.01160238,-0.151123,0.009035389,-0.1397603,0.0006638276,0.009158461,0.01449031,0.3741961,0.001040251,-0.003687813,-0.01199174,-0.01748222,0.004522814,0.2320861
75%,1.000333,0.9424472,0.2065355,0.67994,0.67994,0.4101407,0.7506139,0.5589974,0.5589974,0.5589974,0.5589974,0.5589974,0.8991306,0.9711129,0.880024,0.3436553,-0.4111653,-0.1474118,0.655517,0.7060228,0.6953731,0.7007605,0.7057047,0.8391526,0.874612,0.001849065,0.01160238,0.817547,0.7148781,0.8307324,0.7118603,0.7172397,0.7224387,0.3741961,0.7102547,0.7054597,0.6961344,0.8465686,0.9696931,0.6184842
max,1.000333,1.974255,2.783762,0.67994,0.67994,2.131859,0.7506139,0.5589974,0.5589974,0.5589974,0.5589974,0.5589974,1.658639,2.593763,2.181549,4.452998,2.432112,6.783716,2.446244,1.413366,1.400954,1.405609,1.413438,2.793722,2.853986,3.468616,3.515444,2.754886,1.420721,2.771718,1.423057,1.425321,1.430387,3.841598,1.419469,1.414607,1.404261,2.901039,1.6591,1.39128


In [42]:
X_num_std.shape

(14999, 40)

## Check MinMaxScaler

In [43]:
minmax_scaler = MinMaxScaler().set_output(transform="pandas")
minmax_scaler.fit(X_num)

In [44]:
X_num_minmax = minmax_scaler.transform(X_num)
X_num_minmax

Unnamed: 0,Will_Relocate,Percent_Remote,EMP_Sat_Remote_1,EMP_Sat_Remote_2,EMP_Sat_Remote_3,EMP_Sat_Remote_4,EMP_Sat_Remote_5,EMP_Engagement_1,EMP_Engagement_2,EMP_Engagement_3,EMP_Engagement_4,EMP_Engagement_5,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,promotion_last_5years,LinkedIn_Hits,Emp_Work_Status2,Emp_Work_Status_3,Emp_Work_Status_4,Emp_Work_Status_5,Emp_Identity,Emp_Role,Emp_Position,Emp_Title,Emp_Competitive_1,Emp_Competitive_2,Emp_Competitive_3,Emp_Competitive_4,Emp_Competitive_5,Emp_Collaborative_1,Emp_Collaborative_2,Emp_Collaborative_3,Emp_Collaborative_4,Emp_Collaborative_5,Sensor_StepCount,Sensor_Heartbeat(Average/Min),Sensor_Proximity(1-highest/10-lowest)
0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,1.0,1.0,1.0,1.0,1.0,0.0,0.2,0.336449,0.125,1.0,0.0,0.062500,0.75,0.50,0.00,0.00,0.00,0.00,0.25,0.00,0.25,0.75,0.25,0.25,0.25,0.25,0.25,0.25,0.00,1.00,0.112133,0.033333,0.888889
1,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,1.0,1.0,1.0,1.0,1.0,0.0,0.6,0.294393,0.000,0.0,0.0,0.890625,0.50,0.50,0.75,0.50,0.25,0.00,0.25,0.00,0.25,0.25,0.00,0.75,0.00,0.50,0.25,1.00,0.00,1.00,0.132000,1.000000,0.777778
2,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.869159,0.125,0.0,0.0,0.640625,0.25,0.75,0.00,0.50,0.00,0.00,0.25,0.25,0.00,1.00,0.00,0.25,1.00,0.25,0.25,0.75,0.75,0.75,0.292400,0.666667,0.777778
3,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,1.0,1.0,1.0,1.0,1.0,0.0,0.4,0.850467,0.250,1.0,0.0,0.562500,0.50,0.00,0.75,1.00,0.25,0.00,0.25,0.00,0.00,0.00,0.25,1.00,0.00,0.00,0.25,0.75,0.75,0.00,0.299733,0.566667,0.555556
4,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,1.0,1.0,1.0,1.0,1.0,0.0,0.8,0.747664,0.375,0.0,0.0,0.078125,0.75,1.00,0.75,1.00,0.00,0.00,0.25,0.00,0.00,0.75,0.00,1.00,0.50,1.00,0.00,0.25,1.00,0.75,0.286533,0.666667,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14994,1.0,1.0,1.0,1.0,1.0,1.0,0.666667,0.0,0.0,0.0,0.0,0.0,1.0,0.4,0.728972,0.375,0.0,0.0,0.000000,0.00,0.25,0.25,0.25,0.25,0.00,0.25,0.00,0.00,0.00,0.25,0.50,0.25,0.75,0.25,0.25,0.50,0.00,0.071733,0.933333,0.888889
14995,1.0,1.0,1.0,1.0,1.0,1.0,0.666667,0.0,0.0,0.0,0.0,0.0,1.0,0.4,0.635514,0.375,0.0,0.0,0.078125,0.00,0.00,0.25,0.75,0.00,0.25,0.25,0.00,0.00,1.00,0.00,0.75,0.75,0.50,0.00,0.75,1.00,1.00,0.032933,0.966667,0.333333
14996,0.0,1.0,1.0,1.0,1.0,1.0,0.666667,0.0,0.0,0.0,0.0,0.0,1.0,0.6,0.682243,0.375,0.0,0.0,0.046875,0.75,0.50,0.00,1.00,0.00,0.00,0.25,0.00,0.25,0.25,0.00,0.25,0.75,0.75,0.25,0.50,0.75,1.00,0.020667,0.933333,0.111111
14997,0.0,1.0,1.0,1.0,1.0,1.0,0.666667,0.0,0.0,0.0,0.0,0.0,1.0,0.6,0.593458,0.500,0.0,0.0,0.015625,0.25,0.25,0.50,0.25,0.00,0.25,0.00,0.00,0.00,0.00,0.25,0.00,1.00,0.25,0.00,0.50,1.00,0.00,0.028000,0.866667,0.444444


## Check Robust Scaler (to be defined)

In [45]:
robust_scaler = RobustScaler().set_output(transform="pandas")
robust_scaler.fit(X_num)

In [46]:
X_num_robust = robust_scaler.transform(X_num)
X_num_robust

Unnamed: 0,Will_Relocate,Percent_Remote,EMP_Sat_Remote_1,EMP_Sat_Remote_2,EMP_Sat_Remote_3,EMP_Sat_Remote_4,EMP_Sat_Remote_5,EMP_Engagement_1,EMP_Engagement_2,EMP_Engagement_3,EMP_Engagement_4,EMP_Engagement_5,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,promotion_last_5years,LinkedIn_Hits,Emp_Work_Status2,Emp_Work_Status_3,Emp_Work_Status_4,Emp_Work_Status_5,Emp_Identity,Emp_Role,Emp_Position,Emp_Title,Emp_Competitive_1,Emp_Competitive_2,Emp_Competitive_3,Emp_Competitive_4,Emp_Competitive_5,Emp_Collaborative_1,Emp_Collaborative_2,Emp_Collaborative_3,Emp_Collaborative_4,Emp_Collaborative_5,Sensor_StepCount,Sensor_Heartbeat(Average/Min),Sensor_Proximity(1-highest/10-lowest)
0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.000000,0.0,0.0,0.0,0.0,0.0,-1.161290,-0.5,-0.359551,0.0,1.0,0.0,-0.153846,0.5,0.0,-1.0,-1.0,-0.5,-0.5,0.0,-1.0,0.0,0.5,0.0,-0.5,-0.5,-0.5,0.0,-0.5,-1.0,1.0,-0.481418,-1.7,0.666667
1,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.000000,0.0,0.0,0.0,0.0,0.0,-1.161290,0.5,-0.460674,-1.0,0.0,0.0,1.884615,0.0,0.0,0.5,0.0,0.0,-0.5,0.0,-1.0,0.0,-0.5,-0.5,0.5,-1.0,0.0,0.0,1.0,-1.0,1.0,-0.434687,1.2,0.333333
2,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.000000,0.0,0.0,0.0,0.0,0.0,-1.161290,-1.0,0.921348,0.0,0.0,0.0,1.269231,-0.5,0.5,-1.0,0.0,-0.5,-0.5,0.0,0.0,-0.5,1.0,-0.5,-0.5,1.0,-0.5,0.0,0.5,0.5,0.5,-0.057394,0.2,0.333333
3,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.000000,0.0,0.0,0.0,0.0,0.0,-1.161290,0.0,0.876404,1.0,1.0,0.0,1.076923,0.0,-1.0,0.5,1.0,0.0,-0.5,0.0,-1.0,-0.5,-1.0,0.0,1.0,-1.0,-1.0,0.0,0.5,0.5,-1.0,-0.040144,-0.1,-0.333333
4,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.000000,0.0,0.0,0.0,0.0,0.0,-1.161290,1.0,0.629213,2.0,0.0,0.0,-0.115385,0.5,1.0,0.5,1.0,-0.5,-0.5,0.0,-1.0,-0.5,0.5,-0.5,1.0,0.0,1.0,-1.0,-0.5,1.0,0.5,-0.071193,0.2,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14994,1.0,0.5,2.0,0.0,0.0,1.0,-0.333333,-4.0,-3.0,-1.0,-2.0,-2.0,0.903226,0.0,0.584270,2.0,0.0,0.0,-0.307692,-1.0,-0.5,-0.5,-0.5,0.0,-0.5,0.0,-1.0,-0.5,-1.0,0.0,0.0,-0.5,0.5,0.0,-0.5,0.0,-1.0,-0.576447,1.0,0.666667
14995,1.0,0.5,2.0,0.0,0.0,1.0,-0.333333,-4.0,-3.0,-1.0,-2.0,-2.0,0.903226,0.0,0.359551,2.0,0.0,0.0,-0.115385,-1.0,-1.0,-0.5,0.5,-0.5,0.0,0.0,-1.0,-0.5,1.0,-0.5,0.5,0.5,0.0,-1.0,0.5,1.0,1.0,-0.667712,1.1,-1.000000
14996,0.0,0.5,2.0,0.0,0.0,1.0,-0.333333,-4.0,-3.0,-1.0,-2.0,-2.0,0.903226,0.5,0.471910,2.0,0.0,0.0,-0.192308,0.5,0.0,-1.0,1.0,-0.5,-0.5,0.0,-1.0,0.0,-0.5,-0.5,-0.5,0.5,0.5,0.0,0.0,0.5,1.0,-0.696566,1.0,-1.666667
14997,0.0,0.5,2.0,0.0,0.0,1.0,-0.333333,-4.0,-3.0,-1.0,-2.0,-2.0,0.903226,0.5,0.258427,3.0,0.0,0.0,-0.269231,-0.5,-0.5,0.0,-0.5,-0.5,0.0,-1.0,-1.0,-0.5,-1.0,0.0,-1.0,1.0,-0.5,-1.0,0.0,1.0,-1.0,-0.679316,0.8,-0.666667


# Define X_train, X_test, y_train, y_test

In [47]:
X.head()

Unnamed: 0,Department,GEO,Role,Will_Relocate,Percent_Remote,EMP_Sat_Remote_1,EMP_Sat_Remote_2,EMP_Sat_Remote_3,EMP_Sat_Remote_4,EMP_Sat_Remote_5,EMP_Engagement_1,EMP_Engagement_2,EMP_Engagement_3,EMP_Engagement_4,EMP_Engagement_5,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,promotion_last_5years,sales,salary,Gender,LinkedIn_Hits,Emp_Work_Status2,Emp_Work_Status_3,Emp_Work_Status_4,Emp_Work_Status_5,Emp_Identity,Emp_Role,Emp_Position,Emp_Title,Emp_Competitive_1,Emp_Competitive_2,Emp_Competitive_3,Emp_Competitive_4,Emp_Competitive_5,Emp_Collaborative_1,Emp_Collaborative_2,Emp_Collaborative_3,Emp_Collaborative_4,Emp_Collaborative_5,Sensor_StepCount,Sensor_Heartbeat(Average/Min),Sensor_Proximity(1-highest/10-lowest)
0,Operations,US,VP,0,0.4,2,3,2,3,2,5,5,4,4,3,0.36,3,168,3,1,0,sales,low,M,5,4,3,1,1,1,1,2,1,2,4,2,2,2,2,2,2,1,5,1841,61,9
1,Sales,UK,Senior Director,0,0.4,2,3,2,3,2,5,5,4,4,3,0.36,5,159,2,0,0,accounting,low,F,58,3,3,4,3,2,1,2,1,2,2,1,4,1,3,2,5,1,5,1990,90,8
2,Finance,France,Senior Director,0,0.4,2,3,2,3,2,5,5,4,4,3,0.36,2,282,3,0,0,product_mng,medium,F,42,2,4,1,3,1,1,2,2,1,5,1,2,5,2,2,4,4,4,3193,80,8
3,Human Resources,China,Senior Director,0,0.4,2,3,2,3,2,5,5,4,4,3,0.36,4,278,4,1,0,product_mng,high,M,37,3,1,4,5,2,1,2,1,1,1,2,5,1,1,2,4,4,1,3248,77,6
4,IT,Korea,Director,0,0.4,2,3,2,3,2,5,5,4,4,3,0.36,6,256,5,0,0,sales,low,F,6,4,5,4,5,1,1,2,1,1,4,1,5,3,5,1,2,5,4,3149,80,10


# Pipeline structure

## Preprocessing structure

In [49]:
num_col = make_column_selector(dtype_exclude=['object'])
cat_col = make_column_selector(dtype_include=['object'])

In [50]:
num_transformer = StandardScaler().set_output(transform="pandas")
cat_transformer = OneHotEncoder(sparse_output = False, drop = "if_binary")

preproc_basic = make_column_transformer((num_transformer, num_col),
                                        (cat_transformer, cat_col))

preproc_basic

## Model selection 

In [51]:
# classifier = LogisticRegression(class_weight='balanced')
classifier = LogisticRegression()
classifier

## Pipeline (Baseline)

In [52]:
# baseline_pipe = make_pipeline(preproc_basic, classifier)
# baseline_pipe

In [53]:
X.head()

Unnamed: 0,Department,GEO,Role,Will_Relocate,Percent_Remote,EMP_Sat_Remote_1,EMP_Sat_Remote_2,EMP_Sat_Remote_3,EMP_Sat_Remote_4,EMP_Sat_Remote_5,EMP_Engagement_1,EMP_Engagement_2,EMP_Engagement_3,EMP_Engagement_4,EMP_Engagement_5,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,promotion_last_5years,sales,salary,Gender,LinkedIn_Hits,Emp_Work_Status2,Emp_Work_Status_3,Emp_Work_Status_4,Emp_Work_Status_5,Emp_Identity,Emp_Role,Emp_Position,Emp_Title,Emp_Competitive_1,Emp_Competitive_2,Emp_Competitive_3,Emp_Competitive_4,Emp_Competitive_5,Emp_Collaborative_1,Emp_Collaborative_2,Emp_Collaborative_3,Emp_Collaborative_4,Emp_Collaborative_5,Sensor_StepCount,Sensor_Heartbeat(Average/Min),Sensor_Proximity(1-highest/10-lowest)
0,Operations,US,VP,0,0.4,2,3,2,3,2,5,5,4,4,3,0.36,3,168,3,1,0,sales,low,M,5,4,3,1,1,1,1,2,1,2,4,2,2,2,2,2,2,1,5,1841,61,9
1,Sales,UK,Senior Director,0,0.4,2,3,2,3,2,5,5,4,4,3,0.36,5,159,2,0,0,accounting,low,F,58,3,3,4,3,2,1,2,1,2,2,1,4,1,3,2,5,1,5,1990,90,8
2,Finance,France,Senior Director,0,0.4,2,3,2,3,2,5,5,4,4,3,0.36,2,282,3,0,0,product_mng,medium,F,42,2,4,1,3,1,1,2,2,1,5,1,2,5,2,2,4,4,4,3193,80,8
3,Human Resources,China,Senior Director,0,0.4,2,3,2,3,2,5,5,4,4,3,0.36,4,278,4,1,0,product_mng,high,M,37,3,1,4,5,2,1,2,1,1,1,2,5,1,1,2,4,4,1,3248,77,6
4,IT,Korea,Director,0,0.4,2,3,2,3,2,5,5,4,4,3,0.36,6,256,5,0,0,sales,low,F,6,4,5,4,5,1,1,2,1,1,4,1,5,3,5,1,2,5,4,3149,80,10


In [54]:
baseline_pipe = make_pipeline(preproc_basic)
features_name = baseline_pipe.fit(X_train).get_feature_names_out()
X_train = pd.DataFrame(baseline_pipe.transform(X_train),columns=features_name)
X_train.head()

Unnamed: 0,standardscaler__Will_Relocate,standardscaler__Percent_Remote,standardscaler__EMP_Sat_Remote_1,standardscaler__EMP_Sat_Remote_2,standardscaler__EMP_Sat_Remote_3,standardscaler__EMP_Sat_Remote_4,standardscaler__EMP_Sat_Remote_5,standardscaler__EMP_Engagement_1,standardscaler__EMP_Engagement_2,standardscaler__EMP_Engagement_3,standardscaler__EMP_Engagement_4,standardscaler__EMP_Engagement_5,standardscaler__last_evaluation,standardscaler__number_project,standardscaler__average_montly_hours,standardscaler__time_spend_company,standardscaler__Work_accident,standardscaler__promotion_last_5years,standardscaler__LinkedIn_Hits,standardscaler__Emp_Work_Status2,standardscaler__Emp_Work_Status_3,standardscaler__Emp_Work_Status_4,standardscaler__Emp_Work_Status_5,standardscaler__Emp_Identity,standardscaler__Emp_Role,standardscaler__Emp_Position,standardscaler__Emp_Title,standardscaler__Emp_Competitive_1,standardscaler__Emp_Competitive_2,standardscaler__Emp_Competitive_3,standardscaler__Emp_Competitive_4,standardscaler__Emp_Competitive_5,standardscaler__Emp_Collaborative_1,standardscaler__Emp_Collaborative_2,standardscaler__Emp_Collaborative_3,standardscaler__Emp_Collaborative_4,standardscaler__Emp_Collaborative_5,standardscaler__Sensor_StepCount,standardscaler__Sensor_Heartbeat(Average/Min),standardscaler__Sensor_Proximity(1-highest/10-lowest),onehotencoder__Department_Finance,onehotencoder__Department_Human Resources,onehotencoder__Department_IT,onehotencoder__Department_Operations,onehotencoder__Department_Sales,onehotencoder__Department_Warehouse,onehotencoder__GEO_Australia,onehotencoder__GEO_China,onehotencoder__GEO_Colombia,onehotencoder__GEO_France,onehotencoder__GEO_Japan,onehotencoder__GEO_Korea,onehotencoder__GEO_Turkey,onehotencoder__GEO_UK,onehotencoder__GEO_US,onehotencoder__Role_Director,onehotencoder__Role_Level 1,onehotencoder__Role_Level 2-4,onehotencoder__Role_Manager,onehotencoder__Role_Senior Director,onehotencoder__Role_Senior Manager,onehotencoder__Role_VP,onehotencoder__sales_IT,onehotencoder__sales_RandD,onehotencoder__sales_accounting,onehotencoder__sales_hr,onehotencoder__sales_management,onehotencoder__sales_marketing,onehotencoder__sales_product_mng,onehotencoder__sales_sales,onehotencoder__sales_support,onehotencoder__sales_technical,onehotencoder__salary_high,onehotencoder__salary_low,onehotencoder__salary_medium,onehotencoder__Gender_M
0,1.00143,-0.605051,0.212114,0.680793,0.680793,0.41473,0.750124,0.55597,0.55597,0.55597,0.55597,0.55597,-0.266134,-0.650282,-1.726885,-0.343315,-0.411441,-0.146297,2.176088,1.42557,1.412297,-0.007851,-1.417657,-1.106323,-0.11654,-1.153238,-1.151759,-0.15106,1.425947,-1.104903,0.714402,-0.69373,1.424446,0.368239,-0.71594,-0.015507,-0.717802,-1.30334,-1.646643,0.615515,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
1,-0.998572,-1.120838,0.212114,0.680793,0.680793,0.41473,0.750124,0.55597,0.55597,0.55597,0.55597,0.55597,-0.500572,-0.650282,-0.583133,0.33358,2.430481,-0.146297,0.758281,0.007899,-1.413508,0.696649,-0.709301,-0.135876,-0.11654,-1.153238,-1.151759,-1.124654,0.00864,-0.137479,-1.418645,-0.69373,-1.409062,-0.776267,-0.006889,1.400226,-1.426425,-0.808067,-1.784618,1.002317,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,-0.998572,-1.120838,-1.083398,-1.468875,-1.468875,-1.311776,-1.441994,0.55597,0.55597,0.55597,0.55597,0.55597,-1.145276,0.161913,-0.783791,-0.343315,-0.411441,-0.146297,1.493441,-0.700936,0.705846,-0.712351,1.415768,-1.106323,-1.1113,-1.153238,-1.151759,-0.15106,0.717293,-1.104903,-1.418645,1.4287,-1.409062,0.368239,-0.71594,-0.015507,1.408067,-0.990867,1.526777,0.228714,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,-0.998572,0.94231,0.212114,0.680793,0.680793,0.41473,0.750124,0.55597,0.55597,0.55597,0.55597,0.55597,1.199101,0.161913,1.303056,2.364266,-0.411441,-0.146297,-0.81706,0.716735,0.705846,-0.007851,0.707412,-0.135876,-0.11654,1.164941,2.337752,1.796126,0.00864,-0.137479,1.425417,1.4287,1.424446,-0.776267,-0.71594,-0.723373,-0.009179,1.295842,-0.40487,0.615515,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
4,-0.998572,-1.120838,-1.083398,-1.468875,-1.468875,-1.311776,-1.441994,0.55597,0.55597,0.55597,0.55597,0.55597,-1.086667,-0.650282,-0.904186,-0.343315,-0.411441,-0.146297,1.020838,0.007899,-0.707057,0.696649,-0.000945,-0.135876,-1.1113,0.005851,-1.151759,-0.15106,-0.700014,-1.104903,1.425417,-1.401207,1.424446,0.368239,0.702162,-0.015507,1.408067,-1.113685,-2.198542,0.615515,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [55]:
X_train.shape

(10499, 76)

In [56]:
X_train.describe()

Unnamed: 0,standardscaler__Will_Relocate,standardscaler__Percent_Remote,standardscaler__EMP_Sat_Remote_1,standardscaler__EMP_Sat_Remote_2,standardscaler__EMP_Sat_Remote_3,standardscaler__EMP_Sat_Remote_4,standardscaler__EMP_Sat_Remote_5,standardscaler__EMP_Engagement_1,standardscaler__EMP_Engagement_2,standardscaler__EMP_Engagement_3,standardscaler__EMP_Engagement_4,standardscaler__EMP_Engagement_5,standardscaler__last_evaluation,standardscaler__number_project,standardscaler__average_montly_hours,standardscaler__time_spend_company,standardscaler__Work_accident,standardscaler__promotion_last_5years,standardscaler__LinkedIn_Hits,standardscaler__Emp_Work_Status2,standardscaler__Emp_Work_Status_3,standardscaler__Emp_Work_Status_4,standardscaler__Emp_Work_Status_5,standardscaler__Emp_Identity,standardscaler__Emp_Role,standardscaler__Emp_Position,standardscaler__Emp_Title,standardscaler__Emp_Competitive_1,standardscaler__Emp_Competitive_2,standardscaler__Emp_Competitive_3,standardscaler__Emp_Competitive_4,standardscaler__Emp_Competitive_5,standardscaler__Emp_Collaborative_1,standardscaler__Emp_Collaborative_2,standardscaler__Emp_Collaborative_3,standardscaler__Emp_Collaborative_4,standardscaler__Emp_Collaborative_5,standardscaler__Sensor_StepCount,standardscaler__Sensor_Heartbeat(Average/Min),standardscaler__Sensor_Proximity(1-highest/10-lowest),onehotencoder__Department_Finance,onehotencoder__Department_Human Resources,onehotencoder__Department_IT,onehotencoder__Department_Operations,onehotencoder__Department_Sales,onehotencoder__Department_Warehouse,onehotencoder__GEO_Australia,onehotencoder__GEO_China,onehotencoder__GEO_Colombia,onehotencoder__GEO_France,onehotencoder__GEO_Japan,onehotencoder__GEO_Korea,onehotencoder__GEO_Turkey,onehotencoder__GEO_UK,onehotencoder__GEO_US,onehotencoder__Role_Director,onehotencoder__Role_Level 1,onehotencoder__Role_Level 2-4,onehotencoder__Role_Manager,onehotencoder__Role_Senior Director,onehotencoder__Role_Senior Manager,onehotencoder__Role_VP,onehotencoder__sales_IT,onehotencoder__sales_RandD,onehotencoder__sales_accounting,onehotencoder__sales_hr,onehotencoder__sales_management,onehotencoder__sales_marketing,onehotencoder__sales_product_mng,onehotencoder__sales_sales,onehotencoder__sales_support,onehotencoder__sales_technical,onehotencoder__salary_high,onehotencoder__salary_low,onehotencoder__salary_medium,onehotencoder__Gender_M
count,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0,10499.0
mean,-3.383859e-19,-5.414175e-17,-4.060631e-17,3.553052e-16,-7.309136000000001e-17,-2.619107e-16,7.816715e-17,1.88481e-16,1.88481e-16,1.88481e-16,1.88481e-16,1.88481e-16,-5.2111430000000005e-17,-1.136977e-16,-2.199508e-16,9.339451000000001e-17,-1.2012700000000001e-17,6.767718e-19,7.749037000000001e-17,-4.466694e-17,9.034904e-17,9.001065000000001e-17,-1.0828350000000001e-17,7.44449e-17,2.355166e-16,-7.782876e-17,-8.831872000000001e-17,-1.272331e-16,-7.512167000000001e-17,7.038427e-17,2.5378940000000002e-17,7.579844e-17,-7.512167000000001e-17,-1.028693e-16,-4.466694e-17,4.399017e-17,1.759607e-17,-1.4888980000000002e-17,7.984216e-16,7.850553000000001e-17,0.163063,0.168778,0.167445,0.167254,0.165159,0.168302,0.105629,0.112773,0.111249,0.110296,0.112106,0.112106,0.108772,0.116964,0.110106,0.043528,0.220592,0.455377,0.16011,0.023431,0.089723,0.007239,0.082198,0.052576,0.050671,0.050386,0.042575,0.05791,0.060387,0.271359,0.150395,0.181541,0.081151,0.486713,0.432136,0.496047
std,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,1.000048,0.369441,0.374574,0.37339,0.37322,0.371341,0.374152,0.307377,0.31633,0.314455,0.313274,0.315512,0.315512,0.311368,0.321392,0.313036,0.204052,0.414666,0.498028,0.366726,0.151275,0.285798,0.084777,0.27468,0.223197,0.219336,0.21875,0.201908,0.233585,0.238213,0.444682,0.357475,0.385484,0.273079,0.499847,0.495397,0.500008
min,-0.9985723,-1.120838,-1.083398,-1.468875,-1.468875,-1.311776,-1.441994,-1.798659,-1.798659,-1.798659,-1.798659,-1.798659,-2.083027,-1.462477,-2.108136,-1.020211,-0.4114412,-0.1462972,-0.9220825,-1.409772,-1.413508,-1.41685,-1.417657,-1.106323,-1.1113,-1.153238,-1.151759,-1.124654,-1.408667,-1.104903,-1.418645,-1.401207,-1.409062,-0.7762673,-1.42499,-1.43124,-1.426425,-1.372461,-2.474491,-2.092096,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,-0.9985723,-1.120838,-1.083398,-1.468875,-1.468875,-1.311776,-1.441994,0.5559696,0.5559696,0.5559696,0.5559696,0.5559696,-0.9108382,-0.6502821,-0.904186,-0.3433154,-0.4114412,-0.1462972,-0.712037,-0.7009364,-0.7070567,-0.7123506,-0.7093009,-1.106323,-1.1113,-1.153238,-1.151759,-1.124654,-0.7000138,-1.104903,-0.7076295,-0.6937303,-0.7006853,-0.7762673,-0.7159395,-0.7233735,-0.7178022,-0.9685881,-0.4048697,-0.5448895,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,-0.9985723,0.9423099,0.2121139,0.6807931,0.6807931,0.4147298,0.7501236,0.5559696,0.5559696,0.5559696,0.5559696,0.5559696,0.02691266,0.161913,-0.02128902,-0.3433154,-0.4114412,-0.1462972,-0.5019915,0.007899207,-0.0006055872,-0.007850888,-0.0009445651,-0.1358756,-0.1165401,0.005851199,0.01141124,-0.1510605,0.008639646,-0.1374794,0.003386111,0.01374657,0.007691683,0.3682392,-0.006888579,-0.01550712,-0.009179229,-0.01403164,0.009054636,0.2287136,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,1.00143,0.9423099,0.2121139,0.6807931,0.6807931,0.4147298,0.7501236,0.5559696,0.5559696,0.5559696,0.5559696,0.5559696,0.9060541,0.974108,0.8816738,0.33358,-0.4114412,-0.1462972,0.6532587,0.7167348,0.7058456,0.6966489,0.7074118,0.8345718,0.8782199,0.005851199,0.01141124,0.822533,0.7172931,0.8299444,0.7144017,0.7212235,0.7160687,0.3682392,0.7021623,0.6923592,0.6994438,0.8479826,0.9748781,0.6155151,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0
max,1.00143,1.973884,2.803137,0.6807931,0.6807931,2.141236,0.7501236,0.5559696,0.5559696,0.5559696,0.5559696,0.5559696,1.667977,2.598498,2.185953,4.394952,2.430481,6.835402,2.438645,1.42557,1.412297,1.401149,1.415768,2.775466,2.86774,3.483119,3.500923,2.76972,1.425947,2.764792,1.425417,1.4287,1.424446,3.801759,1.411213,1.400226,1.408067,2.911333,1.664752,1.389118,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [70]:
# Oversampling
smote = SMOTE()
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

In [58]:
X_train_resampled.shape

(16040, 76)

In [59]:
X_train_resampled.head()

Unnamed: 0,standardscaler__Will_Relocate,standardscaler__Percent_Remote,standardscaler__EMP_Sat_Remote_1,standardscaler__EMP_Sat_Remote_2,standardscaler__EMP_Sat_Remote_3,standardscaler__EMP_Sat_Remote_4,standardscaler__EMP_Sat_Remote_5,standardscaler__EMP_Engagement_1,standardscaler__EMP_Engagement_2,standardscaler__EMP_Engagement_3,standardscaler__EMP_Engagement_4,standardscaler__EMP_Engagement_5,standardscaler__last_evaluation,standardscaler__number_project,standardscaler__average_montly_hours,standardscaler__time_spend_company,standardscaler__Work_accident,standardscaler__promotion_last_5years,standardscaler__LinkedIn_Hits,standardscaler__Emp_Work_Status2,standardscaler__Emp_Work_Status_3,standardscaler__Emp_Work_Status_4,standardscaler__Emp_Work_Status_5,standardscaler__Emp_Identity,standardscaler__Emp_Role,standardscaler__Emp_Position,standardscaler__Emp_Title,standardscaler__Emp_Competitive_1,standardscaler__Emp_Competitive_2,standardscaler__Emp_Competitive_3,standardscaler__Emp_Competitive_4,standardscaler__Emp_Competitive_5,standardscaler__Emp_Collaborative_1,standardscaler__Emp_Collaborative_2,standardscaler__Emp_Collaborative_3,standardscaler__Emp_Collaborative_4,standardscaler__Emp_Collaborative_5,standardscaler__Sensor_StepCount,standardscaler__Sensor_Heartbeat(Average/Min),standardscaler__Sensor_Proximity(1-highest/10-lowest),onehotencoder__Department_Finance,onehotencoder__Department_Human Resources,onehotencoder__Department_IT,onehotencoder__Department_Operations,onehotencoder__Department_Sales,onehotencoder__Department_Warehouse,onehotencoder__GEO_Australia,onehotencoder__GEO_China,onehotencoder__GEO_Colombia,onehotencoder__GEO_France,onehotencoder__GEO_Japan,onehotencoder__GEO_Korea,onehotencoder__GEO_Turkey,onehotencoder__GEO_UK,onehotencoder__GEO_US,onehotencoder__Role_Director,onehotencoder__Role_Level 1,onehotencoder__Role_Level 2-4,onehotencoder__Role_Manager,onehotencoder__Role_Senior Director,onehotencoder__Role_Senior Manager,onehotencoder__Role_VP,onehotencoder__sales_IT,onehotencoder__sales_RandD,onehotencoder__sales_accounting,onehotencoder__sales_hr,onehotencoder__sales_management,onehotencoder__sales_marketing,onehotencoder__sales_product_mng,onehotencoder__sales_sales,onehotencoder__sales_support,onehotencoder__sales_technical,onehotencoder__salary_high,onehotencoder__salary_low,onehotencoder__salary_medium,onehotencoder__Gender_M
0,1.00143,-0.605051,0.212114,0.680793,0.680793,0.41473,0.750124,0.55597,0.55597,0.55597,0.55597,0.55597,-0.266134,-0.650282,-1.726885,-0.343315,-0.411441,-0.146297,2.176088,1.42557,1.412297,-0.007851,-1.417657,-1.106323,-0.11654,-1.153238,-1.151759,-0.15106,1.425947,-1.104903,0.714402,-0.69373,1.424446,0.368239,-0.71594,-0.015507,-0.717802,-1.30334,-1.646643,0.615515,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
1,-0.998572,-1.120838,0.212114,0.680793,0.680793,0.41473,0.750124,0.55597,0.55597,0.55597,0.55597,0.55597,-0.500572,-0.650282,-0.583133,0.33358,2.430481,-0.146297,0.758281,0.007899,-1.413508,0.696649,-0.709301,-0.135876,-0.11654,-1.153238,-1.151759,-1.124654,0.00864,-0.137479,-1.418645,-0.69373,-1.409062,-0.776267,-0.006889,1.400226,-1.426425,-0.808067,-1.784618,1.002317,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,-0.998572,-1.120838,-1.083398,-1.468875,-1.468875,-1.311776,-1.441994,0.55597,0.55597,0.55597,0.55597,0.55597,-1.145276,0.161913,-0.783791,-0.343315,-0.411441,-0.146297,1.493441,-0.700936,0.705846,-0.712351,1.415768,-1.106323,-1.1113,-1.153238,-1.151759,-0.15106,0.717293,-1.104903,-1.418645,1.4287,-1.409062,0.368239,-0.71594,-0.015507,1.408067,-0.990867,1.526777,0.228714,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,-0.998572,0.94231,0.212114,0.680793,0.680793,0.41473,0.750124,0.55597,0.55597,0.55597,0.55597,0.55597,1.199101,0.161913,1.303056,2.364266,-0.411441,-0.146297,-0.81706,0.716735,0.705846,-0.007851,0.707412,-0.135876,-0.11654,1.164941,2.337752,1.796126,0.00864,-0.137479,1.425417,1.4287,1.424446,-0.776267,-0.71594,-0.723373,-0.009179,1.295842,-0.40487,0.615515,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
4,-0.998572,-1.120838,-1.083398,-1.468875,-1.468875,-1.311776,-1.441994,0.55597,0.55597,0.55597,0.55597,0.55597,-1.086667,-0.650282,-0.904186,-0.343315,-0.411441,-0.146297,1.020838,0.007899,-0.707057,0.696649,-0.000945,-0.135876,-1.1113,0.005851,-1.151759,-0.15106,-0.700014,-1.104903,1.425417,-1.401207,1.424446,0.368239,0.702162,-0.015507,1.408067,-1.113685,-2.198542,0.615515,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [60]:
X_train_resampled.describe()

Unnamed: 0,standardscaler__Will_Relocate,standardscaler__Percent_Remote,standardscaler__EMP_Sat_Remote_1,standardscaler__EMP_Sat_Remote_2,standardscaler__EMP_Sat_Remote_3,standardscaler__EMP_Sat_Remote_4,standardscaler__EMP_Sat_Remote_5,standardscaler__EMP_Engagement_1,standardscaler__EMP_Engagement_2,standardscaler__EMP_Engagement_3,standardscaler__EMP_Engagement_4,standardscaler__EMP_Engagement_5,standardscaler__last_evaluation,standardscaler__number_project,standardscaler__average_montly_hours,standardscaler__time_spend_company,standardscaler__Work_accident,standardscaler__promotion_last_5years,standardscaler__LinkedIn_Hits,standardscaler__Emp_Work_Status2,standardscaler__Emp_Work_Status_3,standardscaler__Emp_Work_Status_4,standardscaler__Emp_Work_Status_5,standardscaler__Emp_Identity,standardscaler__Emp_Role,standardscaler__Emp_Position,standardscaler__Emp_Title,standardscaler__Emp_Competitive_1,standardscaler__Emp_Competitive_2,standardscaler__Emp_Competitive_3,standardscaler__Emp_Competitive_4,standardscaler__Emp_Competitive_5,standardscaler__Emp_Collaborative_1,standardscaler__Emp_Collaborative_2,standardscaler__Emp_Collaborative_3,standardscaler__Emp_Collaborative_4,standardscaler__Emp_Collaborative_5,standardscaler__Sensor_StepCount,standardscaler__Sensor_Heartbeat(Average/Min),standardscaler__Sensor_Proximity(1-highest/10-lowest),onehotencoder__Department_Finance,onehotencoder__Department_Human Resources,onehotencoder__Department_IT,onehotencoder__Department_Operations,onehotencoder__Department_Sales,onehotencoder__Department_Warehouse,onehotencoder__GEO_Australia,onehotencoder__GEO_China,onehotencoder__GEO_Colombia,onehotencoder__GEO_France,onehotencoder__GEO_Japan,onehotencoder__GEO_Korea,onehotencoder__GEO_Turkey,onehotencoder__GEO_UK,onehotencoder__GEO_US,onehotencoder__Role_Director,onehotencoder__Role_Level 1,onehotencoder__Role_Level 2-4,onehotencoder__Role_Manager,onehotencoder__Role_Senior Director,onehotencoder__Role_Senior Manager,onehotencoder__Role_VP,onehotencoder__sales_IT,onehotencoder__sales_RandD,onehotencoder__sales_accounting,onehotencoder__sales_hr,onehotencoder__sales_management,onehotencoder__sales_marketing,onehotencoder__sales_product_mng,onehotencoder__sales_sales,onehotencoder__sales_support,onehotencoder__sales_technical,onehotencoder__salary_high,onehotencoder__salary_low,onehotencoder__salary_medium,onehotencoder__Gender_M
count,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0
mean,-0.019314,-0.003739,-0.030049,-0.103147,-0.103147,-0.061445,-0.114241,-0.621345,-0.621345,-0.621345,-0.621345,-0.621345,0.007526,0.014006,0.042175,0.088874,-0.109503,-0.037475,-0.010493,0.001362,0.001588,0.005581,-0.001814,-0.219966,-0.219695,-0.199813,-0.195472,-0.203703,0.001332,-0.206611,-0.008484,-0.005333,0.008835,-0.006835,0.006704,-0.008458,0.003892,-0.210086,0.040281,-0.002347,0.1629,0.170562,0.16488,0.164078,0.168369,0.169211,0.100736,0.11226,0.111306,0.110584,0.111859,0.112084,0.110591,0.115394,0.115187,0.043282,0.217026,0.465548,0.157333,0.022358,0.087303,0.00715,0.081539,0.041955,0.053562,0.052162,0.036443,0.055184,0.057016,0.28089,0.153413,0.187836,0.058729,0.53339,0.407881,0.489054
std,0.982362,1.010153,1.072014,1.034653,1.034653,1.060557,1.02593,1.177351,1.177351,1.177351,1.177351,1.177351,1.05426,1.174161,1.074322,0.897101,0.867554,0.863249,0.980262,0.967591,0.964275,0.966743,0.968306,0.906318,0.901763,0.904778,0.90269,0.903869,0.965583,0.904629,0.969316,0.966305,0.966921,0.961315,0.972613,0.963538,0.972952,1.100484,1.164122,0.973633,0.350997,0.357794,0.353416,0.352565,0.356385,0.356604,0.285919,0.299561,0.299096,0.297056,0.298939,0.299684,0.296742,0.303067,0.300677,0.192706,0.394596,0.477911,0.34758,0.140348,0.268366,0.078913,0.258884,0.19418,0.21237,0.209631,0.180099,0.217253,0.219916,0.428485,0.342605,0.371249,0.231221,0.488207,0.48165,0.476658
min,-0.998572,-1.120838,-1.083398,-1.468875,-1.468875,-1.311776,-1.441994,-1.798659,-1.798659,-1.798659,-1.798659,-1.798659,-2.083027,-1.462477,-2.108136,-1.020211,-0.411441,-0.146297,-0.922082,-1.409772,-1.413508,-1.41685,-1.417657,-1.106323,-1.1113,-1.153238,-1.151759,-1.124654,-1.408667,-1.104903,-1.418645,-1.401207,-1.409062,-0.776267,-1.42499,-1.43124,-1.426425,-1.372461,-2.474491,-2.092096,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,-0.998572,-1.120838,-1.083398,-1.468875,-1.468875,-1.311776,-1.441994,-1.798659,-1.798659,-1.798659,-1.798659,-1.798659,-1.028057,-1.462477,-1.007416,-0.343315,-0.411441,-0.146297,-0.712037,-0.700936,-0.707057,-0.712351,-0.709301,-1.106323,-1.1113,-1.153238,-1.151759,-1.124654,-0.700014,-1.104903,-0.707629,-0.69373,-0.700685,-0.776267,-0.71594,-0.723373,-0.717802,-1.099758,-0.818794,-0.544889,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,-0.432719,0.94231,0.212114,0.680793,0.680793,0.41473,0.750124,-0.621345,-0.621345,-0.621345,-0.621345,-0.621345,0.085522,0.161913,0.07904,-0.343315,-0.411441,-0.146297,-0.501992,0.007899,-0.000606,-0.007851,-0.000945,-0.135876,-0.11654,0.005851,0.011411,-0.15106,0.00864,-0.137479,0.003386,0.013747,0.007692,0.067286,-0.006889,-0.015507,-0.009179,-0.850971,0.009055,0.228714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.211938,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.415281
75%,1.00143,0.94231,0.212114,0.680793,0.680793,0.41473,0.750124,0.55597,0.55597,0.55597,0.55597,0.55597,0.964664,0.974108,1.022135,0.365513,-0.411441,-0.146297,0.653259,0.716735,0.705846,0.696649,0.707412,-0.135876,-0.11654,0.005851,0.011411,-0.15106,0.717293,-0.137479,0.714402,0.721223,0.716069,0.368239,0.702162,0.692359,0.699444,0.692746,1.195941,0.647745,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.117414,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.784915,0.0,0.0,0.0,1.0,1.0,1.0
max,1.00143,1.973884,2.803137,0.680793,0.680793,2.141236,0.750124,0.55597,0.55597,0.55597,0.55597,0.55597,1.667977,2.598498,2.185953,4.394952,2.430481,6.835402,2.438645,1.42557,1.412297,1.401149,1.415768,2.775466,2.86774,3.483119,3.500923,2.76972,1.425947,2.764792,1.425417,1.4287,1.424446,3.801759,1.411213,1.400226,1.408067,2.911333,1.664752,1.389118,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [61]:
baseline_pipe_2 = make_pipeline(preproc_basic)
features_name = baseline_pipe_2.fit(X_train_resampled).get_feature_names_out()
X_train_resampled = pd.DataFrame(baseline_pipe.transform(X_train_resampled),columns=features_name)
X_train_resampled.head()

Unnamed: 0,standardscaler__standardscaler__Will_Relocate,standardscaler__standardscaler__Percent_Remote,standardscaler__standardscaler__EMP_Sat_Remote_1,standardscaler__standardscaler__EMP_Sat_Remote_2,standardscaler__standardscaler__EMP_Sat_Remote_3,standardscaler__standardscaler__EMP_Sat_Remote_4,standardscaler__standardscaler__EMP_Sat_Remote_5,standardscaler__standardscaler__EMP_Engagement_1,standardscaler__standardscaler__EMP_Engagement_2,standardscaler__standardscaler__EMP_Engagement_3,standardscaler__standardscaler__EMP_Engagement_4,standardscaler__standardscaler__EMP_Engagement_5,standardscaler__standardscaler__last_evaluation,standardscaler__standardscaler__number_project,standardscaler__standardscaler__average_montly_hours,standardscaler__standardscaler__time_spend_company,standardscaler__standardscaler__Work_accident,standardscaler__standardscaler__promotion_last_5years,standardscaler__standardscaler__LinkedIn_Hits,standardscaler__standardscaler__Emp_Work_Status2,standardscaler__standardscaler__Emp_Work_Status_3,standardscaler__standardscaler__Emp_Work_Status_4,standardscaler__standardscaler__Emp_Work_Status_5,standardscaler__standardscaler__Emp_Identity,standardscaler__standardscaler__Emp_Role,standardscaler__standardscaler__Emp_Position,standardscaler__standardscaler__Emp_Title,standardscaler__standardscaler__Emp_Competitive_1,standardscaler__standardscaler__Emp_Competitive_2,standardscaler__standardscaler__Emp_Competitive_3,standardscaler__standardscaler__Emp_Competitive_4,standardscaler__standardscaler__Emp_Competitive_5,standardscaler__standardscaler__Emp_Collaborative_1,standardscaler__standardscaler__Emp_Collaborative_2,standardscaler__standardscaler__Emp_Collaborative_3,standardscaler__standardscaler__Emp_Collaborative_4,standardscaler__standardscaler__Emp_Collaborative_5,standardscaler__standardscaler__Sensor_StepCount,standardscaler__standardscaler__Sensor_Heartbeat(Average/Min),standardscaler__standardscaler__Sensor_Proximity(1-highest/10-lowest),standardscaler__onehotencoder__Department_Finance,standardscaler__onehotencoder__Department_Human Resources,standardscaler__onehotencoder__Department_IT,standardscaler__onehotencoder__Department_Operations,standardscaler__onehotencoder__Department_Sales,standardscaler__onehotencoder__Department_Warehouse,standardscaler__onehotencoder__GEO_Australia,standardscaler__onehotencoder__GEO_China,standardscaler__onehotencoder__GEO_Colombia,standardscaler__onehotencoder__GEO_France,standardscaler__onehotencoder__GEO_Japan,standardscaler__onehotencoder__GEO_Korea,standardscaler__onehotencoder__GEO_Turkey,standardscaler__onehotencoder__GEO_UK,standardscaler__onehotencoder__GEO_US,standardscaler__onehotencoder__Role_Director,standardscaler__onehotencoder__Role_Level 1,standardscaler__onehotencoder__Role_Level 2-4,standardscaler__onehotencoder__Role_Manager,standardscaler__onehotencoder__Role_Senior Director,standardscaler__onehotencoder__Role_Senior Manager,standardscaler__onehotencoder__Role_VP,standardscaler__onehotencoder__sales_IT,standardscaler__onehotencoder__sales_RandD,standardscaler__onehotencoder__sales_accounting,standardscaler__onehotencoder__sales_hr,standardscaler__onehotencoder__sales_management,standardscaler__onehotencoder__sales_marketing,standardscaler__onehotencoder__sales_product_mng,standardscaler__onehotencoder__sales_sales,standardscaler__onehotencoder__sales_support,standardscaler__onehotencoder__sales_technical,standardscaler__onehotencoder__salary_high,standardscaler__onehotencoder__salary_low,standardscaler__onehotencoder__salary_medium,standardscaler__onehotencoder__Gender_M
0,1.039103,-0.595287,0.225903,0.757708,0.757708,0.448999,0.842544,1.0,1.0,1.0,1.0,1.0,-0.259584,-0.565774,-1.646727,-0.481777,-0.348045,-0.126065,2.230679,1.471958,1.463018,-0.013895,-1.462232,-0.978006,0.114396,-1.0538,-1.059408,0.058244,1.475439,-0.993026,0.745792,-0.712424,1.464085,0.39018,-0.743015,-0.007316,-0.741781,-0.993461,-1.44914,0.634614,-0.464119,-0.47672,2.363067,-0.465398,-0.472449,-0.474522,-0.352335,-0.374759,-0.372153,-0.372278,-0.374199,2.96293,-0.372696,-0.380764,-0.383103,-0.224607,-0.550013,1.118343,-0.452668,-0.159306,-0.325325,-0.090609,-0.314975,-0.216068,4.456692,-0.248835,-0.202358,-0.254016,-0.259271,-0.655561,-0.447797,-0.505973,-0.254004,-1.092583,1.229393,1.071967
1,-0.996872,-1.105906,0.225903,0.757708,0.757708,0.448999,0.842544,1.0,1.0,1.0,1.0,1.0,-0.481963,-0.565774,-0.582066,0.272782,2.927843,-0.126065,0.784279,0.006757,-1.467569,0.714864,-0.730667,0.092785,0.114396,-1.0538,-1.059408,-1.01893,0.007568,0.076422,-1.454845,-0.712424,-1.46645,-0.80042,-0.013976,1.462036,-1.470126,-0.543397,-1.567667,1.031903,-0.464119,-0.47672,-0.466547,-0.465398,-0.472449,2.329794,-0.352335,-0.374759,-0.372153,-0.372278,2.971073,-0.374017,-0.372696,-0.380764,-0.383103,-0.224607,-0.550013,-0.97416,2.424459,-0.159306,-0.325325,-0.090609,-0.314975,-0.216068,4.456692,-0.248835,-0.202358,-0.254016,-0.259271,-0.655561,-0.447797,-0.505973,-0.254004,0.955792,-0.846867,-1.026038
2,-0.996872,-1.105906,-0.982618,-1.320027,-1.320027,-1.178975,-1.294235,1.0,1.0,1.0,1.0,1.0,-1.093505,0.125972,-0.768849,-0.481777,-0.348045,-0.126065,1.534264,-0.725844,0.730371,-0.742653,1.464028,-0.978006,-0.988767,-1.0538,-1.059408,0.058244,0.741503,-0.993026,-1.454845,1.484084,-1.46645,0.39018,-0.743015,-0.007316,1.443256,-0.709511,1.276965,0.237325,-0.464119,-0.47672,-0.466547,-0.465398,-0.472449,2.329794,-0.352335,-0.374759,-0.372153,-0.372278,2.971073,-0.374017,-0.372696,-0.380764,-0.383103,4.964811,-0.550013,-0.97416,-0.452668,-0.159306,-0.325325,-0.090609,-0.314975,-0.216068,-0.252218,-0.248835,-0.202358,-0.254016,4.288066,-0.655561,-0.447797,-0.505973,-0.254004,0.955792,-0.846867,-1.026038
3,-0.996872,0.93657,0.225903,0.757708,0.757708,0.448999,0.842544,1.0,1.0,1.0,1.0,1.0,1.130284,0.125972,1.17369,2.536462,-0.348045,-0.126065,-0.822833,0.739357,0.730371,-0.013895,0.732463,0.092785,0.114396,1.508432,2.806393,2.212592,0.007568,0.076422,1.479338,1.484084,1.464085,-0.80042,-0.743015,-0.741992,-0.013435,1.368466,-0.382404,0.634614,-0.464119,-0.47672,2.363067,-0.465398,-0.472449,-0.474522,-0.352335,-0.374759,-0.372153,2.994199,-0.374199,-0.374017,-0.372696,-0.380764,-0.383103,-0.224607,1.984303,-0.97416,-0.452668,-0.159306,-0.325325,-0.090609,-0.314975,-0.216068,-0.252218,-0.248835,-0.202358,-0.254016,4.288066,-0.655561,-0.447797,-0.505973,-0.254004,-1.092583,1.229393,1.071967
4,-0.996872,-1.105906,-0.982618,-1.320027,-1.320027,-1.178975,-1.294235,1.0,1.0,1.0,1.0,1.0,-1.03791,-0.565774,-0.880919,-0.481777,-0.348045,-0.126065,1.052131,0.006757,-0.734922,0.714864,0.000898,0.092785,-0.988767,0.227316,-1.059408,0.058244,-0.726367,-0.993026,1.479338,-1.444593,1.464085,0.39018,0.715063,-0.007316,1.443256,-0.821118,-1.923246,0.634614,-0.464119,-0.47672,2.363067,-0.465398,-0.472449,-0.474522,-0.352335,-0.374759,-0.372153,-0.372278,-0.374199,2.96293,-0.372696,-0.380764,-0.383103,-0.224607,-0.550013,1.118343,-0.452668,-0.159306,-0.325325,-0.090609,-0.314975,-0.216068,-0.252218,-0.248835,-0.202358,-0.254016,4.288066,-0.655561,-0.447797,-0.505973,-0.254004,0.955792,-0.846867,-1.026038


In [62]:
X_train_resampled.describe()

Unnamed: 0,standardscaler__standardscaler__Will_Relocate,standardscaler__standardscaler__Percent_Remote,standardscaler__standardscaler__EMP_Sat_Remote_1,standardscaler__standardscaler__EMP_Sat_Remote_2,standardscaler__standardscaler__EMP_Sat_Remote_3,standardscaler__standardscaler__EMP_Sat_Remote_4,standardscaler__standardscaler__EMP_Sat_Remote_5,standardscaler__standardscaler__EMP_Engagement_1,standardscaler__standardscaler__EMP_Engagement_2,standardscaler__standardscaler__EMP_Engagement_3,standardscaler__standardscaler__EMP_Engagement_4,standardscaler__standardscaler__EMP_Engagement_5,standardscaler__standardscaler__last_evaluation,standardscaler__standardscaler__number_project,standardscaler__standardscaler__average_montly_hours,standardscaler__standardscaler__time_spend_company,standardscaler__standardscaler__Work_accident,standardscaler__standardscaler__promotion_last_5years,standardscaler__standardscaler__LinkedIn_Hits,standardscaler__standardscaler__Emp_Work_Status2,standardscaler__standardscaler__Emp_Work_Status_3,standardscaler__standardscaler__Emp_Work_Status_4,standardscaler__standardscaler__Emp_Work_Status_5,standardscaler__standardscaler__Emp_Identity,standardscaler__standardscaler__Emp_Role,standardscaler__standardscaler__Emp_Position,standardscaler__standardscaler__Emp_Title,standardscaler__standardscaler__Emp_Competitive_1,standardscaler__standardscaler__Emp_Competitive_2,standardscaler__standardscaler__Emp_Competitive_3,standardscaler__standardscaler__Emp_Competitive_4,standardscaler__standardscaler__Emp_Competitive_5,standardscaler__standardscaler__Emp_Collaborative_1,standardscaler__standardscaler__Emp_Collaborative_2,standardscaler__standardscaler__Emp_Collaborative_3,standardscaler__standardscaler__Emp_Collaborative_4,standardscaler__standardscaler__Emp_Collaborative_5,standardscaler__standardscaler__Sensor_StepCount,standardscaler__standardscaler__Sensor_Heartbeat(Average/Min),standardscaler__standardscaler__Sensor_Proximity(1-highest/10-lowest),standardscaler__onehotencoder__Department_Finance,standardscaler__onehotencoder__Department_Human Resources,standardscaler__onehotencoder__Department_IT,standardscaler__onehotencoder__Department_Operations,standardscaler__onehotencoder__Department_Sales,standardscaler__onehotencoder__Department_Warehouse,standardscaler__onehotencoder__GEO_Australia,standardscaler__onehotencoder__GEO_China,standardscaler__onehotencoder__GEO_Colombia,standardscaler__onehotencoder__GEO_France,standardscaler__onehotencoder__GEO_Japan,standardscaler__onehotencoder__GEO_Korea,standardscaler__onehotencoder__GEO_Turkey,standardscaler__onehotencoder__GEO_UK,standardscaler__onehotencoder__GEO_US,standardscaler__onehotencoder__Role_Director,standardscaler__onehotencoder__Role_Level 1,standardscaler__onehotencoder__Role_Level 2-4,standardscaler__onehotencoder__Role_Manager,standardscaler__onehotencoder__Role_Senior Director,standardscaler__onehotencoder__Role_Senior Manager,standardscaler__onehotencoder__Role_VP,standardscaler__onehotencoder__sales_IT,standardscaler__onehotencoder__sales_RandD,standardscaler__onehotencoder__sales_accounting,standardscaler__onehotencoder__sales_hr,standardscaler__onehotencoder__sales_management,standardscaler__onehotencoder__sales_marketing,standardscaler__onehotencoder__sales_product_mng,standardscaler__onehotencoder__sales_sales,standardscaler__onehotencoder__sales_support,standardscaler__onehotencoder__sales_technical,standardscaler__onehotencoder__salary_high,standardscaler__onehotencoder__salary_low,standardscaler__onehotencoder__salary_medium,standardscaler__onehotencoder__Gender_M
count,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0,16040.0
mean,7.087708e-18,8.74889e-18,-6.378937e-17,4.252625e-17,-2.835083e-17,-4.961396e-17,-1.134033e-16,1.70105e-16,1.70105e-16,1.70105e-16,1.70105e-16,1.70105e-16,1.771927e-17,-1.9491200000000003e-17,-4.60701e-17,-1.4175420000000002e-17,-2.835083e-17,-2.126312e-17,-2.480698e-17,-7.796479e-17,4.075432e-17,3.1894690000000005e-17,-1.550436e-17,9.922791000000001e-17,-2.835083e-17,2.835083e-17,-5.670166e-17,-2.835083e-17,-1.550436e-17,-1.4175420000000002e-17,2.214909e-17,2.3921010000000002e-17,5.227185e-17,4.429818e-18,1.240349e-17,-5.315781e-18,5.315781e-18,-1.4175420000000002e-17,-2.835083e-17,-1.639032e-17,-4.695607e-17,-1.009998e-16,-4.429818e-17,-3.63245e-17,-8.328057e-17,-1.309011e-16,3.1894690000000005e-17,-2.9236800000000005e-17,2.480698e-17,4.739905e-17,2.68004e-17,3.1894690000000005e-17,1.993418e-17,-1.771927e-18,1.594734e-17,6.844068000000001e-17,7.53069e-17,2.037716e-17,-2.945829e-17,6.467534000000001e-17,4.8727990000000003e-17,4.385519e-17,4.695607e-17,-2.126312e-17,-9.036828e-17,1.5061380000000003e-17,1.771927e-18,4.075432e-17,1.9491200000000003e-17,1.594734e-17,5.4043770000000004e-17,3.0122760000000005e-17,5.670166e-17,-1.275787e-16,-7.796479e-17,9.568406e-17
std,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031
min,-0.9968719,-1.105906,-0.9826185,-1.320027,-1.320027,-1.178975,-1.294235,-1.0,-1.0,-1.0,-1.0,-1.0,-1.98302,-1.257519,-2.001614,-1.236337,-0.3480448,-0.1260649,-0.9299736,-1.458445,-1.467569,-1.471411,-1.462232,-0.9780055,-0.9887668,-1.0538,-1.059408,-1.01893,-1.460302,-0.9930262,-1.454845,-1.444593,-1.46645,-0.8004197,-1.472054,-1.476669,-1.470126,-1.056273,-2.160298,-2.146409,-0.4641194,-0.4767202,-0.4665467,-0.4653984,-0.4724494,-0.4745221,-0.3523347,-0.3747585,-0.3721529,-0.3722779,-0.3741993,-0.3740173,-0.3726964,-0.3807643,-0.3831031,-0.2246069,-0.5500131,-0.9741605,-0.452668,-0.1593059,-0.3253245,-0.09060942,-0.3149754,-0.2160675,-0.2522178,-0.2488348,-0.2023584,-0.254016,-0.2592707,-0.6555614,-0.4477972,-0.505973,-0.2540038,-1.092583,-0.8468665,-1.026038
25%,-0.9968719,-1.105906,-0.9826185,-1.320027,-1.320027,-1.178975,-1.294235,-1.0,-1.0,-1.0,-1.0,-1.0,-0.9823152,-1.257519,-0.9770102,-0.4817774,-0.3480448,-0.1260649,-0.7156921,-0.725844,-0.7349221,-0.7426531,-0.730667,-0.9780055,-0.9887668,-1.0538,-1.059408,-1.01893,-0.7263672,-0.9930262,-0.7212994,-0.712424,-0.7338163,-0.8004197,-0.7430151,-0.7419924,-0.7417806,-0.8084628,-0.7379825,-0.5572529,-0.4641194,-0.4767202,-0.4665467,-0.4653984,-0.4724494,-0.4745221,-0.3523347,-0.3747585,-0.3721529,-0.3722779,-0.3741993,-0.3740173,-0.3726964,-0.3807643,-0.3831031,-0.2246069,-0.5500131,-0.9741605,-0.452668,-0.1593059,-0.3253245,-0.09060942,-0.3149754,-0.2160675,-0.2522178,-0.2488348,-0.2023584,-0.254016,-0.2592707,-0.6555614,-0.4477972,-0.505973,-0.2540038,-1.092583,-0.8468665,-1.026038
50%,-0.4208406,0.9365697,0.2259026,0.7577081,0.7577081,0.4489991,0.8425439,2.220446e-16,2.220446e-16,2.220446e-16,2.220446e-16,2.220446e-16,0.07398433,0.1259718,0.03431635,-0.4817774,-0.3480448,-0.1260649,-0.5014106,0.006756643,-0.002275294,-0.01389473,0.0008978396,0.09278546,0.1143961,0.227316,0.2291921,0.05824372,0.007568075,0.07642192,0.01224644,0.01974544,-0.001182686,0.07710698,-0.01397592,-0.007316245,-0.01343494,-0.5823847,-0.02682462,0.2373253,-0.4641194,-0.4767202,-0.4665467,-0.4653984,-0.4724494,-0.4745221,-0.3523347,-0.3747585,-0.3721529,-0.3722779,-0.3741993,-0.3740173,-0.3726964,-0.3807643,-0.3831031,-0.2246069,-0.5500131,-0.530679,-0.452668,-0.1593059,-0.3253245,-0.09060942,-0.3149754,-0.2160675,-0.2522178,-0.2488348,-0.2023584,-0.254016,-0.2592707,-0.6555614,-0.4477972,-0.505973,-0.2540038,0.9557922,-0.8468665,-0.1547762
75%,1.039103,0.9365697,0.2259026,0.7577081,0.7577081,0.4489991,0.8425439,1.0,1.0,1.0,1.0,1.0,0.907905,0.8177173,0.9121947,0.3083787,-0.3480448,-0.1260649,0.6771378,0.7393573,0.7303715,0.7148636,0.7324627,0.09278546,0.1143961,0.227316,0.2291921,0.05824372,0.7415033,0.07642192,0.7457923,0.7519149,0.7314509,0.3901803,0.7150632,0.7273599,0.7149108,0.8204211,0.992762,0.6677186,-0.4641194,-0.4767202,-0.4665467,-0.4653984,-0.4724494,-0.4745221,-0.3523347,-0.3747585,-0.3721529,-0.3722779,-0.3741993,-0.3740173,-0.3726964,-0.3807643,-0.3831031,-0.2246069,-0.2524489,1.118343,-0.452668,-0.1593059,-0.3253245,-0.09060942,-0.3149754,-0.2160675,-0.2522178,-0.2488348,-0.2023584,-0.254016,-0.2592707,1.176333,-0.4477972,-0.505973,-0.2540038,0.9557922,1.229393,1.071967
max,1.039103,1.957807,2.642945,0.7577081,0.7577081,2.076973,0.8425439,1.0,1.0,1.0,1.0,1.0,1.575042,2.201208,1.995534,4.800141,2.927843,7.961886,2.498531,1.471958,1.463018,1.443622,1.464028,3.305158,3.423885,4.070665,4.094993,3.289765,1.475439,3.284766,1.479338,1.484084,1.464085,3.96198,1.444102,1.462036,1.443256,2.836495,1.395491,1.429193,2.384993,2.318275,2.363067,2.371045,2.33359,2.329794,3.145264,2.963564,2.971361,2.994199,2.971073,2.96293,2.997336,2.918936,2.942827,4.964811,1.984303,1.118343,2.424459,6.966073,3.401046,12.58204,3.547884,4.933949,4.456692,4.521598,5.350322,4.349052,4.288066,1.678314,2.471106,2.187719,4.070999,0.9557922,1.229393,1.071967


In [63]:
X_train_resampled.shape

(16040, 76)

In [64]:
y_train_resampled.shape

(16040,)

In [65]:
y_train_resampled.value_counts()/len(y_train_resampled)*100

left_Company
0    50.0
1    50.0
Name: count, dtype: float64

In [66]:
classifier.fit(X_train_resampled,y_train_resampled)

In [67]:
# baseline_pipe.fit(X_train,y_train)

In [68]:
# Cross-validate Pipeline
cross_val_score(classifier, X_train_resampled, y_train_resampled, cv=10, scoring='precision')

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [69]:
# sns.heatmap(X_resampled.corr(), cmap='coolwarm')