# Creating a logistic regression to predict absenteeism

## Import the relevant libraries

In [1]:
import pandas as pd
import numpy as np

## Load the data

In [2]:
data_preprocessed = pd.read_csv('Absenteeism_preprocessed.csv')

In [3]:
data_preprocessed.head()

Unnamed: 0.1,Unnamed: 0,Month,Day of The Week,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism Time in Hours,reason_1,reason_2,reason_3,reason_4
0,0,7,1,289,36,33,239.554,30,0,2,1,4,0,0,0,1
1,1,7,1,118,13,50,239.554,31,0,1,0,0,0,0,0,0
2,2,7,2,179,51,38,239.554,31,0,0,0,2,0,0,0,1
3,3,7,3,279,5,39,239.554,24,0,2,0,4,1,0,0,0
4,4,7,3,289,36,33,239.554,30,0,2,1,2,0,0,0,1


## Create the targets

In [4]:
data_preprocessed['Absenteeism Time in Hours'].median()

3.0

In [5]:
targets = np.where(data_preprocessed['Absenteeism Time in Hours'] > 
                   data_preprocessed['Absenteeism Time in Hours'].median(), 1, 0)

In [6]:
targets

array([1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0,
       1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1,
       0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
       0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0,
       1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1,
       0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0,
       0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0,
       0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0,

In [7]:
data_preprocessed['Excessive Absenteeism'] = targets

In [8]:
data_preprocessed.head()

Unnamed: 0.1,Unnamed: 0,Month,Day of The Week,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism Time in Hours,reason_1,reason_2,reason_3,reason_4,Excessive Absenteeism
0,0,7,1,289,36,33,239.554,30,0,2,1,4,0,0,0,1,1
1,1,7,1,118,13,50,239.554,31,0,1,0,0,0,0,0,0,0
2,2,7,2,179,51,38,239.554,31,0,0,0,2,0,0,0,1,0
3,3,7,3,279,5,39,239.554,24,0,2,0,4,1,0,0,0,1
4,4,7,3,289,36,33,239.554,30,0,2,1,2,0,0,0,1,0


## A comment on the targets

In [9]:
targets.sum() / targets.shape[0]

0.45571428571428574

In [10]:
data_with_targets = data_preprocessed.drop(['Absenteeism Time in Hours','Day of The Week',
                                            'Daily Work Load Average','Distance to Work'],axis=1)

In [11]:
data_with_targets is data_preprocessed

False

In [12]:
data_with_targets.head()

Unnamed: 0.1,Unnamed: 0,Month,Transportation Expense,Age,Body Mass Index,Education,Children,Pets,reason_1,reason_2,reason_3,reason_4,Excessive Absenteeism
0,0,7,289,33,30,0,2,1,0,0,0,1,1
1,1,7,118,50,31,0,1,0,0,0,0,0,0
2,2,7,179,38,31,0,0,0,0,0,0,1,0
3,3,7,279,39,24,0,2,0,1,0,0,0,1
4,4,7,289,33,30,0,2,1,0,0,0,1,0


## Select the inputs for the regression

In [13]:
data_with_targets.shape

(700, 13)

In [14]:
data_with_targets.iloc[:,:14]

Unnamed: 0.1,Unnamed: 0,Month,Transportation Expense,Age,Body Mass Index,Education,Children,Pets,reason_1,reason_2,reason_3,reason_4,Excessive Absenteeism
0,0,7,289,33,30,0,2,1,0,0,0,1,1
1,1,7,118,50,31,0,1,0,0,0,0,0,0
2,2,7,179,38,31,0,0,0,0,0,0,1,0
3,3,7,279,39,24,0,2,0,1,0,0,0,1
4,4,7,289,33,30,0,2,1,0,0,0,1,0
5,5,7,179,38,31,0,0,0,0,0,0,1,0
6,6,7,361,28,27,0,1,4,0,0,0,1,1
7,7,7,260,36,23,0,4,0,0,0,0,1,1
8,8,7,155,34,25,0,2,0,0,0,1,0,1
9,9,7,235,37,29,1,1,1,0,0,0,1,1


In [15]:
data_with_targets.iloc[:,:-1]

Unnamed: 0.1,Unnamed: 0,Month,Transportation Expense,Age,Body Mass Index,Education,Children,Pets,reason_1,reason_2,reason_3,reason_4
0,0,7,289,33,30,0,2,1,0,0,0,1
1,1,7,118,50,31,0,1,0,0,0,0,0
2,2,7,179,38,31,0,0,0,0,0,0,1
3,3,7,279,39,24,0,2,0,1,0,0,0
4,4,7,289,33,30,0,2,1,0,0,0,1
5,5,7,179,38,31,0,0,0,0,0,0,1
6,6,7,361,28,27,0,1,4,0,0,0,1
7,7,7,260,36,23,0,4,0,0,0,0,1
8,8,7,155,34,25,0,2,0,0,0,1,0
9,9,7,235,37,29,1,1,1,0,0,0,1


In [16]:
unscaled_inputs = data_with_targets.iloc[:,:-1]

## Standardize the data

In [17]:
from sklearn.preprocessing import StandardScaler

absenteeism_scaler = StandardScaler()

In [18]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import StandardScaler

class CustomScaler(BaseEstimator,TransformerMixin): 
    
    def __init__(self,columns,copy=True,with_mean=True,with_std=True):
        self.scaler = StandardScaler(copy,with_mean,with_std)
        self.columns = columns
        self.mean_ = None
        self.var_ = None

    def fit(self, X, y=None):
        self.scaler.fit(X[self.columns], y)
        self.mean_ = np.mean(X[self.columns])
        self.var_ = np.var(X[self.columns])
        return self

    def transform(self, X, y=None, copy=None):
        init_col_order = X.columns
        X_scaled = pd.DataFrame(self.scaler.transform(X[self.columns]), columns=self.columns)
        X_not_scaled = X.loc[:,~X.columns.isin(self.columns)]
        return pd.concat([X_not_scaled, X_scaled], axis=1)[init_col_order]

In [19]:
unscaled_inputs.columns.values

array(['Unnamed: 0', 'Month', 'Transportation Expense', 'Age',
       'Body Mass Index', 'Education', 'Children', 'Pets', 'reason_1',
       'reason_2', 'reason_3', 'reason_4'], dtype=object)

In [20]:
#columns_to_scale = ['Month Value','Day of the Week', 'Transportation Expense', 'Distance to Work',
       #'Age', 'Daily Work Load Average', 'Body Mass Index', 'Children', 'Pet']

columns_to_omit = ['Reason_1', 'Reason_2', 'Reason_3', 'Reason_4','Education']

In [21]:
columns_to_scale = [x for x in unscaled_inputs.columns.values if x not in columns_to_omit]

In [22]:
absenteeism_scaler = CustomScaler(columns_to_scale)

In [23]:
absenteeism_scaler.fit(unscaled_inputs)

CustomScaler(columns=['Unnamed: 0', 'Month', 'Transportation Expense', 'Age',
                      'Body Mass Index', 'Children', 'Pets', 'reason_1',
                      'reason_2', 'reason_3', 'reason_4'],
             copy=None, with_mean=None, with_std=None)

In [24]:
scaled_inputs = absenteeism_scaler.transform(unscaled_inputs)

In [25]:
scaled_inputs

Unnamed: 0.1,Unnamed: 0,Month,Transportation Expense,Age,Body Mass Index,Education,Children,Pets,reason_1,reason_2,reason_3,reason_4
0,-1.729578,0.182726,1.005844,-0.536062,0.767431,0,0.880469,0.268487,-0.577350,-0.092981,-0.314485,0.821365
1,-1.724629,0.182726,-1.574681,2.130803,1.002633,0,-0.019280,-0.589690,-0.577350,-0.092981,-0.314485,-1.217485
2,-1.719681,0.182726,-0.654143,0.248310,1.002633,0,-0.919030,-0.589690,-0.577350,-0.092981,-0.314485,0.821365
3,-1.714732,0.182726,0.854936,0.405184,-0.643782,0,0.880469,-0.589690,1.732051,-0.092981,-0.314485,-1.217485
4,-1.709783,0.182726,1.005844,-0.536062,0.767431,0,0.880469,0.268487,-0.577350,-0.092981,-0.314485,0.821365
5,-1.704835,0.182726,-0.654143,0.248310,1.002633,0,-0.919030,-0.589690,-0.577350,-0.092981,-0.314485,0.821365
6,-1.699886,0.182726,2.092381,-1.320435,0.061825,0,-0.019280,2.843016,-0.577350,-0.092981,-0.314485,0.821365
7,-1.694937,0.182726,0.568211,-0.065439,-0.878984,0,2.679969,-0.589690,-0.577350,-0.092981,-0.314485,0.821365
8,-1.689988,0.182726,-1.016322,-0.379188,-0.408580,0,0.880469,-0.589690,-0.577350,-0.092981,3.179797,-1.217485
9,-1.685040,0.182726,0.190942,0.091435,0.532229,1,-0.019280,0.268487,-0.577350,-0.092981,-0.314485,0.821365


In [26]:
scaled_inputs.shape

(700, 12)

## Split the data into train & test and shuffle

### Import the relevant module

In [27]:
from sklearn.model_selection import train_test_split

### Split

In [28]:
train_test_split(scaled_inputs, targets)

[     Unnamed: 0     Month  Transportation Expense       Age  Body Mass Index  \
 685    1.660296 -0.388293               -1.574681  2.130803         1.002633   
 494    0.715090  0.468236                0.568211 -0.065439        -0.878984   
 663    1.551424 -0.673803                2.092381 -1.320435         0.061825   
 8     -1.689988  0.182726               -1.016322 -0.379188        -0.408580   
 41    -1.526681  0.753746               -1.574681  0.091435         0.297027   
 192   -0.779424 -0.673803               -1.574681  2.130803         1.002633   
 522    0.853654  1.039256                1.036026  0.562059        -0.408580   
 71    -1.378219  1.039256                1.036026  0.562059        -0.408580   
 544    0.962526  1.324766               -0.654143  0.248310         1.002633   
 162   -0.927885 -0.959313                0.387122  1.660180         1.237836   
 167   -0.903142 -0.959313               -1.016322 -0.379188        -0.408580   
 32    -1.571219  0.468236  

In [29]:
x_train, x_test, y_train, y_test = train_test_split(scaled_inputs, targets, #train_size = 0.8, 
                                                                            test_size = 0.2, random_state = 20)

In [30]:
print (x_train.shape, y_train.shape)

(560, 12) (560,)


In [31]:
print (x_test.shape, y_test.shape)

(140, 12) (140,)


## Logistic regression with sklearn

In [32]:
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

### Training the model

In [33]:
reg = LogisticRegression()

In [34]:
reg.fit(x_train,y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

In [35]:
reg.score(x_train,y_train)

0.7839285714285714

### Manually check the accuracy

In [36]:
model_outputs = reg.predict(x_train)
model_outputs

array([0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1,
       1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0,
       0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0,
       0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0,
       1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1,
       1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1,
       1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1,
       0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0,
       0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0,

In [37]:
y_train

array([0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1,
       1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1,
       1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0,
       0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1,
       0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1,
       1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0,
       1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0,
       1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0,

In [38]:
model_outputs == y_train

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True, False,  True, False, False,  True,  True,  True,  True,
       False,  True, False,  True, False, False,  True,  True,  True,
       False,  True,  True,  True,  True,  True,  True,  True,  True,
        True, False, False, False,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True, False,  True,  True,  True,
        True,  True,  True,  True,  True, False,  True,  True,  True,
        True,  True,  True,  True, False,  True, False,  True,  True,
        True,  True,  True, False,  True,  True,  True,  True,  True,
       False,  True, False,  True,  True, False, False, False,  True,
        True,  True,  True,  True,  True,  True,  True, False,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True, False,  True,  True,  True,  True,
        True,  True,  True,  True,  True, False,  True,  True,  True,
        True,  True,

In [39]:
np.sum((model_outputs==y_train))

439

In [40]:
model_outputs.shape[0]

560

In [41]:
np.sum((model_outputs==y_train)) / model_outputs.shape[0]

0.7839285714285714

### Finding the intercept and coefficients

In [42]:
reg.intercept_

array([-0.19112896])

In [43]:
reg.coef_

array([[-0.21330678,  0.14524549,  0.68300095, -0.1871582 ,  0.34246972,
        -0.19767121,  0.36265285, -0.30997392,  2.08188898,  0.34609206,
         1.54141448,  1.32163478]])

In [44]:
unscaled_inputs.columns.values

array(['Unnamed: 0', 'Month', 'Transportation Expense', 'Age',
       'Body Mass Index', 'Education', 'Children', 'Pets', 'reason_1',
       'reason_2', 'reason_3', 'reason_4'], dtype=object)

In [45]:
feature_name = unscaled_inputs.columns.values

In [46]:
summary_table = pd.DataFrame (columns=['Feature name'], data = feature_name)

summary_table['Coefficient'] = np.transpose(reg.coef_)

summary_table

Unnamed: 0,Feature name,Coefficient
0,Unnamed: 0,-0.213307
1,Month,0.145245
2,Transportation Expense,0.683001
3,Age,-0.187158
4,Body Mass Index,0.34247
5,Education,-0.197671
6,Children,0.362653
7,Pets,-0.309974
8,reason_1,2.081889
9,reason_2,0.346092


In [47]:
summary_table.index = summary_table.index + 1
summary_table.loc[0] = ['Intercept', reg.intercept_[0]]
summary_table = summary_table.sort_index()
summary_table

Unnamed: 0,Feature name,Coefficient
0,Intercept,-0.191129
1,Unnamed: 0,-0.213307
2,Month,0.145245
3,Transportation Expense,0.683001
4,Age,-0.187158
5,Body Mass Index,0.34247
6,Education,-0.197671
7,Children,0.362653
8,Pets,-0.309974
9,reason_1,2.081889


## Interpreting the coefficients

In [48]:
summary_table['Odds_ratio'] = np.exp(summary_table.Coefficient)

In [49]:
summary_table

Unnamed: 0,Feature name,Coefficient,Odds_ratio
0,Intercept,-0.191129,0.826026
1,Unnamed: 0,-0.213307,0.807908
2,Month,0.145245,1.156323
3,Transportation Expense,0.683001,1.97981
4,Age,-0.187158,0.829313
5,Body Mass Index,0.34247,1.408422
6,Education,-0.197671,0.82064
7,Children,0.362653,1.437137
8,Pets,-0.309974,0.733466
9,reason_1,2.081889,8.019603


In [50]:
summary_table.sort_values('Odds_ratio', ascending=False)

Unnamed: 0,Feature name,Coefficient,Odds_ratio
9,reason_1,2.081889,8.019603
11,reason_3,1.541414,4.671193
12,reason_4,1.321635,3.749546
3,Transportation Expense,0.683001,1.97981
7,Children,0.362653,1.437137
10,reason_2,0.346092,1.413533
5,Body Mass Index,0.34247,1.408422
2,Month,0.145245,1.156323
4,Age,-0.187158,0.829313
0,Intercept,-0.191129,0.826026


## Testing the model

In [51]:
reg.score(x_test,y_test)

0.7357142857142858

In [52]:
predicted_proba = reg.predict_proba(x_test)
predicted_proba

array([[0.75552031, 0.24447969],
       [0.56720051, 0.43279949],
       [0.39771569, 0.60228431],
       [0.82865746, 0.17134254],
       [0.05080024, 0.94919976],
       [0.36614971, 0.63385029],
       [0.22234776, 0.77765224],
       [0.11322093, 0.88677907],
       [0.75132389, 0.24867611],
       [0.67791034, 0.32208966],
       [0.51775983, 0.48224017],
       [0.20301379, 0.79698621],
       [0.03976709, 0.96023291],
       [0.73187874, 0.26812126],
       [0.26883896, 0.73116104],
       [0.53042122, 0.46957878],
       [0.60924576, 0.39075424],
       [0.48350331, 0.51649669],
       [0.46483076, 0.53516924],
       [0.03295126, 0.96704874],
       [0.65934641, 0.34065359],
       [0.8283575 , 0.1716425 ],
       [0.40754356, 0.59245644],
       [0.41009478, 0.58990522],
       [0.24461256, 0.75538744],
       [0.73512636, 0.26487364],
       [0.53617392, 0.46382608],
       [0.8912575 , 0.1087425 ],
       [0.17272807, 0.82727193],
       [0.82925615, 0.17074385],
       [0.

In [53]:
predicted_proba.shape

(140, 2)

In [54]:
predicted_proba[:,1]

array([0.24447969, 0.43279949, 0.60228431, 0.17134254, 0.94919976,
       0.63385029, 0.77765224, 0.88677907, 0.24867611, 0.32208966,
       0.48224017, 0.79698621, 0.96023291, 0.26812126, 0.73116104,
       0.46957878, 0.39075424, 0.51649669, 0.53516924, 0.96704874,
       0.34065359, 0.1716425 , 0.59245644, 0.58990522, 0.75538744,
       0.26487364, 0.46382608, 0.1087425 , 0.82727193, 0.17074385,
       0.29478103, 0.73678581, 0.74615805, 0.49399494, 0.17059444,
       0.62220172, 0.17359692, 0.83619992, 0.40639967, 0.6937232 ,
       0.17070127, 0.40202153, 0.25823352, 0.10577936, 0.83951812,
       0.63263559, 0.63953826, 0.34344527, 0.20570957, 0.16872522,
       0.51635511, 0.08074855, 0.7379615 , 0.22954254, 0.88970792,
       0.44282022, 0.90394095, 0.17665368, 0.08936662, 0.09147489,
       0.65516359, 0.63622707, 0.24364715, 0.87911668, 0.23175146,
       0.33034222, 0.01315807, 0.25090843, 0.79275201, 0.39392592,
       0.25031369, 0.07607071, 0.92691276, 0.43505654, 0.59574

## Save the model

In [55]:
import pickle

In [56]:
with open('model', 'wb') as file:
    pickle.dump(reg, file)

In [57]:
with open('scaler', 'wb') as file:
    pickle.dump(absenteeism_scaler, file)