### Creating a logistic regression to predict absenteeism

In [1]:
import pandas as pd
import numpy as np

### Load the data

In [2]:
data_preprocessed = pd.read_csv('Absenteeism_preprocessed.csv')

In [3]:
data_preprocessed.head()

Unnamed: 0,Reason_1,Reason_2,Reason_3,Reason_4,Month Value,Day of the week,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism Time in Hours
0,False,False,False,True,7,1,289,36,33,239.554,30,0,2,1,4
1,False,False,False,False,7,1,118,13,50,239.554,31,0,1,0,0
2,False,False,False,True,7,2,179,51,38,239.554,31,0,0,0,2
3,True,False,False,False,7,3,279,5,39,239.554,24,0,2,0,4
4,False,False,False,True,7,3,289,36,33,239.554,30,0,2,1,2


### Create the targets

In [4]:
data_preprocessed['Absenteeism Time in Hours'].median()

np.float64(3.0)

In [5]:
#np.where(condition, value if true, value if false)
targets = np.where(data_preprocessed['Absenteeism Time in Hours'] >
                   data_preprocessed['Absenteeism Time in Hours'].median(),1,0)

In [6]:
targets


array([1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0,
       1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1,
       0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
       0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0,
       1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1,
       0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0,
       0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0,
       0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0,

In [7]:
data_preprocessed['Excessive Absenteeism'] = targets

In [8]:
data_preprocessed.head()

Unnamed: 0,Reason_1,Reason_2,Reason_3,Reason_4,Month Value,Day of the week,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism Time in Hours,Excessive Absenteeism
0,False,False,False,True,7,1,289,36,33,239.554,30,0,2,1,4,1
1,False,False,False,False,7,1,118,13,50,239.554,31,0,1,0,0,0
2,False,False,False,True,7,2,179,51,38,239.554,31,0,0,0,2,0
3,True,False,False,False,7,3,279,5,39,239.554,24,0,2,0,4,1
4,False,False,False,True,7,3,289,36,33,239.554,30,0,2,1,2,0


### A comment on the targets

In [9]:
targets.sum() / targets.shape[0]

np.float64(0.45571428571428574)

In [10]:
# that means 45 % of the targets are 1's s0 55 will be 0, so 45-55 is roughly balanced we can say

In [11]:
data_with_targets = data_preprocessed.drop(['Absenteeism Time in Hours','Day of the week',
                                            'Daily Work Load Average','Distance to Work'], axis=1)

In [12]:
data_with_targets is data_preprocessed

False

In [13]:
data_with_targets.head()

Unnamed: 0,Reason_1,Reason_2,Reason_3,Reason_4,Month Value,Transportation Expense,Age,Body Mass Index,Education,Children,Pets,Excessive Absenteeism
0,False,False,False,True,7,289,33,30,0,2,1,1
1,False,False,False,False,7,118,50,31,0,1,0,0
2,False,False,False,True,7,179,38,31,0,0,0,0
3,True,False,False,False,7,279,39,24,0,2,0,1
4,False,False,False,True,7,289,33,30,0,2,1,0


### selecting the inputs for the regression

In [14]:
data_with_targets.shape

(700, 12)

In [15]:
data_with_targets.iloc[:, 0:14]   # iloc excludes the ending index

Unnamed: 0,Reason_1,Reason_2,Reason_3,Reason_4,Month Value,Transportation Expense,Age,Body Mass Index,Education,Children,Pets,Excessive Absenteeism
0,False,False,False,True,7,289,33,30,0,2,1,1
1,False,False,False,False,7,118,50,31,0,1,0,0
2,False,False,False,True,7,179,38,31,0,0,0,0
3,True,False,False,False,7,279,39,24,0,2,0,1
4,False,False,False,True,7,289,33,30,0,2,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...
695,True,False,False,False,5,179,40,22,1,2,0,1
696,True,False,False,False,5,225,28,24,0,1,2,0
697,True,False,False,False,5,330,28,25,1,0,0,1
698,False,False,False,True,5,235,32,25,1,0,0,0


In [16]:
data_with_targets.iloc[:,:-1]

Unnamed: 0,Reason_1,Reason_2,Reason_3,Reason_4,Month Value,Transportation Expense,Age,Body Mass Index,Education,Children,Pets
0,False,False,False,True,7,289,33,30,0,2,1
1,False,False,False,False,7,118,50,31,0,1,0
2,False,False,False,True,7,179,38,31,0,0,0
3,True,False,False,False,7,279,39,24,0,2,0
4,False,False,False,True,7,289,33,30,0,2,1
...,...,...,...,...,...,...,...,...,...,...,...
695,True,False,False,False,5,179,40,22,1,2,0
696,True,False,False,False,5,225,28,24,0,1,2
697,True,False,False,False,5,330,28,25,1,0,0
698,False,False,False,True,5,235,32,25,1,0,0


In [17]:
unscaled_inputs = data_with_targets.iloc[:,:-1]

In [18]:
unscaled_inputs

Unnamed: 0,Reason_1,Reason_2,Reason_3,Reason_4,Month Value,Transportation Expense,Age,Body Mass Index,Education,Children,Pets
0,False,False,False,True,7,289,33,30,0,2,1
1,False,False,False,False,7,118,50,31,0,1,0
2,False,False,False,True,7,179,38,31,0,0,0
3,True,False,False,False,7,279,39,24,0,2,0
4,False,False,False,True,7,289,33,30,0,2,1
...,...,...,...,...,...,...,...,...,...,...,...
695,True,False,False,False,5,179,40,22,1,2,0
696,True,False,False,False,5,225,28,24,0,1,2
697,True,False,False,False,5,330,28,25,1,0,0
698,False,False,False,True,5,235,32,25,1,0,0


### Standardize the data 

In [19]:
#from sklearn.preprocessing import StandardScaler
#absenteeism_scaler = StandardScaler()

In [20]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd

class CustomScaler(BaseEstimator, TransformerMixin):

    def __init__(self, columns, copy=True, with_mean=True, with_std=True):
        self.scaler = StandardScaler(copy=copy, with_mean=with_mean, with_std=with_std)
        self.columns = columns
        self.mean_ = None
        self.var_ = None

    
    def fit(self, X, y=None):
        self.scaler.fit(X[self.columns], y)
        self.mean_ = np.mean(X[self.columns])
        self.var_ = np.var(X[self.columns])
        return self
    
    def transform(self, X, y=None, copy=None): 
        init_col_order = X.columns
        X_scaled = pd.DataFrame(self.scaler.transform(X[self.columns]), columns=self.columns)
        X_not_scaled = X.loc[:, ~X.columns.isin(self.columns)]
        return pd.concat([X_not_scaled, X_scaled], axis =1)[init_col_order]


In [21]:
unscaled_inputs.columns.values

array(['Reason_1', 'Reason_2', 'Reason_3', 'Reason_4', 'Month Value',
       'Transportation Expense', 'Age', 'Body Mass Index', 'Education',
       'Children', 'Pets'], dtype=object)

In [22]:
#columns_to_scale = ['Month Value','Day of the week', 'Transportation Expense', 'Distance to Work',
      # 'Age', 'Daily Work Load Average', 'Body Mass Index','Children', 'Pets']

columns_to_omit = ['Reason_1', 'Reason_2', 'Reason_3', 'Reason_4','Education']

In [24]:
columns_to_scale = [x for x in unscaled_inputs.columns.values if x not in columns_to_omit]

In [25]:
absenteeism_scaler = CustomScaler(columns_to_scale)

In [26]:
absenteeism_scaler.fit(unscaled_inputs)

  return var(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


AttributeError: 'CustomScaler' object has no attribute 'copy'

AttributeError: 'CustomScaler' object has no attribute 'copy'

AttributeError: 'CustomScaler' object has no attribute 'copy'

In [27]:
scaled_inputs = absenteeism_scaler.transform(unscaled_inputs)

In [28]:
scaled_inputs

Unnamed: 0,Reason_1,Reason_2,Reason_3,Reason_4,Month Value,Transportation Expense,Age,Body Mass Index,Education,Children,Pets
0,False,False,False,True,0.182726,1.005844,-0.536062,0.767431,0,0.880469,0.268487
1,False,False,False,False,0.182726,-1.574681,2.130803,1.002633,0,-0.019280,-0.589690
2,False,False,False,True,0.182726,-0.654143,0.248310,1.002633,0,-0.919030,-0.589690
3,True,False,False,False,0.182726,0.854936,0.405184,-0.643782,0,0.880469,-0.589690
4,False,False,False,True,0.182726,1.005844,-0.536062,0.767431,0,0.880469,0.268487
...,...,...,...,...,...,...,...,...,...,...,...
695,True,False,False,False,-0.388293,-0.654143,0.562059,-1.114186,1,0.880469,-0.589690
696,True,False,False,False,-0.388293,0.040034,-1.320435,-0.643782,0,-0.019280,1.126663
697,True,False,False,False,-0.388293,1.624567,-1.320435,-0.408580,1,-0.919030,-0.589690
698,False,False,False,True,-0.388293,0.190942,-0.692937,-0.408580,1,-0.919030,-0.589690


In [29]:
scaled_inputs.shape

(700, 11)

### Split the data into train & test and shuffle

### Import the relevant modfule

In [30]:
from sklearn.model_selection import train_test_split

### SPLIT

In [31]:
train_test_split(scaled_inputs, targets) 

[     Reason_1  Reason_2  Reason_3  Reason_4  Month Value  \
 513     False     False     False      True     1.039256   
 22       True     False     False     False     0.468236   
 193     False     False     False      True    -0.673803   
 565      True     False     False     False     1.610276   
 13       True     False     False     False     0.182726   
 ..        ...       ...       ...       ...          ...   
 328     False     False     False      True     1.324766   
 507      True     False     False     False     1.039256   
 88      False     False     False      True     1.324766   
 212      True     False     False     False    -0.388293   
 684     False     False     False      True    -0.388293   
 
      Transportation Expense       Age  Body Mass Index  Education  Children  \
 513                1.005844  1.973929         2.178644          0 -0.919030   
 22                 2.092381 -1.320435         0.061825          0 -0.019280   
 193                1.0360

In [32]:
x_train, x_test, y_train, y_test = train_test_split(scaled_inputs, targets, train_size = 0.8, random_state = 20) 

In [33]:
print(x_train.shape, y_train.shape)

(560, 11) (560,)


In [34]:
print(x_test.shape, y_test.shape)

(140, 11) (140,)


### LOgistic regression with sklearn

In [35]:
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

### Training the Model 

In [36]:
reg = LogisticRegression()

In [37]:
reg.fit(x_train, y_train)     #sklearn.linear_model.LogisticRegression.fit(x,y) fits the model according to the given training data 

In [38]:
reg.score(x_train, y_train)  #.score - returns the mean accuracy on the given test data and labels 

0.7732142857142857

### MAnually check the accuracy

In [39]:
model_outputs = reg.predict(x_train)
model_outputs

array([0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1,
       1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0,
       0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0,
       0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0,
       1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1,
       1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1,
       1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1,
       0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0,
       0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0,

In [40]:
y_train

array([0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1,
       1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1,
       1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0,
       0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1,
       0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1,
       1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0,
       1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0,
       1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0,

In [41]:
model_outputs == y_train

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True, False,  True, False, False,  True,  True,  True,  True,
       False,  True, False,  True, False, False,  True,  True,  True,
       False,  True,  True,  True,  True,  True,  True,  True,  True,
       False, False, False, False,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True, False,  True,  True,  True,
        True,  True,  True,  True,  True, False,  True,  True,  True,
        True,  True,  True,  True,  True,  True, False,  True,  True,
        True,  True,  True, False,  True,  True,  True,  True,  True,
       False,  True, False,  True,  True, False, False, False,  True,
        True,  True,  True,  True,  True,  True,  True, False,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True, False,  True,  True,  True,  True,
       False,  True,  True,  True,  True, False,  True,  True,  True,
        True,  True,

In [42]:
np.sum(model_outputs==y_train)

np.int64(433)

In [43]:
# accuracy = correct prediction/ total no. of observations
# total no. of observations = model_outputs.shape[0]
model_outputs.shape[0]

560

In [44]:
np.sum(model_outputs==y_train)/model_outputs.shape[0]

np.float64(0.7732142857142857)

### FInding the intecept and coefficient

In [45]:
reg.intercept_

array([-1.6469898])

In [46]:
reg.coef_

array([[ 2.80000644,  0.95174778,  3.1140605 ,  0.83835931,  0.15897713,
         0.60513709, -0.16990589,  0.27998236, -0.21017416,  0.34842434,
        -0.27721907]])

In [47]:
unscaled_inputs.columns.values

array(['Reason_1', 'Reason_2', 'Reason_3', 'Reason_4', 'Month Value',
       'Transportation Expense', 'Age', 'Body Mass Index', 'Education',
       'Children', 'Pets'], dtype=object)

In [48]:
feature_name = unscaled_inputs.columns.values

In [49]:
summary_table = pd.DataFrame(columns=['Feature name'], data = feature_name)

In [50]:
summary_table['Coefficient'] = np.transpose(reg.coef_)

In [51]:
summary_table

Unnamed: 0,Feature name,Coefficient
0,Reason_1,2.800006
1,Reason_2,0.951748
2,Reason_3,3.114061
3,Reason_4,0.838359
4,Month Value,0.158977
5,Transportation Expense,0.605137
6,Age,-0.169906
7,Body Mass Index,0.279982
8,Education,-0.210174
9,Children,0.348424


In [52]:
summary_table.index = summary_table.index + 1
summary_table.loc[0] = ['Intercept', reg.intercept_[0]]
summary_table = summary_table.sort_index()
summary_table

Unnamed: 0,Feature name,Coefficient
0,Intercept,-1.64699
1,Reason_1,2.800006
2,Reason_2,0.951748
3,Reason_3,3.114061
4,Reason_4,0.838359
5,Month Value,0.158977
6,Transportation Expense,0.605137
7,Age,-0.169906
8,Body Mass Index,0.279982
9,Education,-0.210174


### Interpreting the coefficients 

In [53]:
#bias/intercept coefficient/weight
#odds_ratio - exponential of coefficients
summary_table['Odds_ratio'] = np.exp(summary_table.Coefficient)

In [54]:
summary_table

Unnamed: 0,Feature name,Coefficient,Odds_ratio
0,Intercept,-1.64699,0.192629
1,Reason_1,2.800006,16.444753
2,Reason_2,0.951748,2.590233
3,Reason_3,3.114061,22.51227
4,Reason_4,0.838359,2.31257
5,Month Value,0.158977,1.172311
6,Transportation Expense,0.605137,1.831503
7,Age,-0.169906,0.843744
8,Body Mass Index,0.279982,1.323106
9,Education,-0.210174,0.810443


In [55]:
summary_table.sort_values('Odds_ratio', ascending=False)

Unnamed: 0,Feature name,Coefficient,Odds_ratio
3,Reason_3,3.114061,22.51227
1,Reason_1,2.800006,16.444753
2,Reason_2,0.951748,2.590233
4,Reason_4,0.838359,2.31257
6,Transportation Expense,0.605137,1.831503
10,Children,0.348424,1.416833
8,Body Mass Index,0.279982,1.323106
5,Month Value,0.158977,1.172311
7,Age,-0.169906,0.843744
9,Education,-0.210174,0.810443


In [56]:
# Backward elimination - the idea is that we can simplify out model by removing all features which have clise to no contribution to the model 
#when we have the p-values, we get rid of all coefficients with p-values>0.05

### Testing the model 

In [57]:
reg.score(x_test, y_test)

0.75

In [62]:
predicted_proba = reg.predict_proba(x_test)
predicted_proba

array([[0.71342516, 0.28657484],
       [0.5873216 , 0.4126784 ],
       [0.44016153, 0.55983847],
       [0.78163061, 0.21836939],
       [0.08407928, 0.91592072],
       [0.3348226 , 0.6651774 ],
       [0.29971206, 0.70028794],
       [0.13112385, 0.86887615],
       [0.78627908, 0.21372092],
       [0.74906578, 0.25093422],
       [0.49395555, 0.50604445],
       [0.22492002, 0.77507998],
       [0.07135527, 0.92864473],
       [0.73173354, 0.26826646],
       [0.30957854, 0.69042146],
       [0.54726422, 0.45273578],
       [0.55051921, 0.44948079],
       [0.53926379, 0.46073621],
       [0.40197149, 0.59802851],
       [0.05365482, 0.94634518],
       [0.70030387, 0.29969613],
       [0.78163061, 0.21836939],
       [0.42028246, 0.57971754],
       [0.42028246, 0.57971754],
       [0.24801464, 0.75198536],
       [0.74567806, 0.25432194],
       [0.51026557, 0.48973443],
       [0.8569309 , 0.1430691 ],
       [0.20365204, 0.79634796],
       [0.78163061, 0.21836939],
       [0.

In [63]:
predicted_proba.shape

(140, 2)

In [64]:
predicted_proba[:,1]

array([0.28657484, 0.4126784 , 0.55983847, 0.21836939, 0.91592072,
       0.6651774 , 0.70028794, 0.86887615, 0.21372092, 0.25093422,
       0.50604445, 0.77507998, 0.92864473, 0.26826646, 0.69042146,
       0.45273578, 0.44948079, 0.46073621, 0.59802851, 0.94634518,
       0.29969613, 0.21836939, 0.57971754, 0.57971754, 0.75198536,
       0.25432194, 0.48973443, 0.1430691 , 0.79634796, 0.21836939,
       0.36947677, 0.67913195, 0.68508325, 0.52870791, 0.21836939,
       0.53505228, 0.22144744, 0.73673169, 0.40500758, 0.60504297,
       0.21072119, 0.45227108, 0.23749326, 0.39847178, 0.82763577,
       0.56771922, 0.69120847, 0.28657484, 0.2192347 , 0.2032712 ,
       0.57634482, 0.32954238, 0.6651774 , 0.26937528, 0.83323682,
       0.43484145, 0.88365871, 0.23125087, 0.33433749, 0.34451397,
       0.69915101, 0.6549938 , 0.29244583, 0.79186052, 0.20752232,
       0.26838009, 0.08710411, 0.22144744, 0.73215219, 0.30536526,
       0.22144744, 0.2900789 , 0.90443841, 0.46065771, 0.60175

### Save the model

In [65]:
#pickle[module] is a python module used to convert a python object into a charactrer stream
import pickle

In [66]:
with open('model', 'wb') as file:    #model-file name , wb-write byte
    pickle.dump(reg, file)           #dump-save, reg-object to be dumped

In [67]:
with open('scaler', 'wb') as file:
    pickle.dump(absenteeism_scaler, file)