In [1]:
#importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
#Reading the client data sheet
dataset = pd.read_csv("insurance_pre.csv")

In [3]:
#displaying the data from the data sheet.
dataset

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.900,0,yes,16884.92400
1,18,male,33.770,1,no,1725.55230
2,28,male,33.000,3,no,4449.46200
3,33,male,22.705,0,no,21984.47061
4,32,male,28.880,0,no,3866.85520
...,...,...,...,...,...,...
1333,50,male,30.970,3,no,10600.54830
1334,18,female,31.920,0,no,2205.98080
1335,18,female,36.850,0,no,1629.83350
1336,21,female,25.800,0,no,2007.94500


In [4]:
#displaying the data columns from dataset
dataset.columns

Index(['age', 'sex', 'bmi', 'children', 'smoker', 'charges'], dtype='object')

In [5]:
#converting text data to numeric using dummies method
dataset = pd.get_dummies(dataset, drop_first = True)

In [6]:
#displaying data post the conversion
dataset

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27.900,0,16884.92400,False,True
1,18,33.770,1,1725.55230,True,False
2,28,33.000,3,4449.46200,True,False
3,33,22.705,0,21984.47061,True,False
4,32,28.880,0,3866.85520,True,False
...,...,...,...,...,...,...
1333,50,30.970,3,10600.54830,True,False
1334,18,31.920,0,2205.98080,False,False
1335,18,36.850,0,1629.83350,False,False
1336,21,25.800,0,2007.94500,False,False


In [7]:
#assigning independent, dependent values
independent=dataset[['age', 'bmi', 'children', 'sex_male', 'smoker_yes']]
dependent=dataset[['charges']]

In [8]:
#displaying independent assigned variable values.
independent

Unnamed: 0,age,bmi,children,sex_male,smoker_yes
0,19,27.900,0,False,True
1,18,33.770,1,True,False
2,28,33.000,3,True,False
3,33,22.705,0,True,False
4,32,28.880,0,True,False
...,...,...,...,...,...
1333,50,30.970,3,True,False
1334,18,31.920,0,False,False
1335,18,36.850,0,False,False
1336,21,25.800,0,False,False


In [9]:
#displaying dependent variable values
dependent

Unnamed: 0,charges
0,16884.92400
1,1725.55230
2,4449.46200
3,21984.47061
4,3866.85520
...,...
1333,10600.54830
1334,2205.98080
1335,1629.83350
1336,2007.94500


In [11]:
#split into training set and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(independent, dependent, test_size = 0.30, random_state = 0)

In [12]:
#display X_train training set data values
X_train

Unnamed: 0,age,bmi,children,sex_male,smoker_yes
1163,18,28.215,0,False,False
196,39,32.800,0,False,False
438,52,46.750,5,False,False
183,44,26.410,0,False,False
1298,33,27.455,2,True,False
...,...,...,...,...,...
763,27,26.030,0,True,False
835,42,35.970,2,True,False
1216,40,25.080,0,True,False
559,19,35.530,0,True,False


In [13]:
#display the size of training set data
X_train.shape

(936, 5)

In [14]:
#display test set data values
y_train

Unnamed: 0,charges
1163,2200.83085
196,5649.71500
438,12592.53450
183,7419.47790
1298,5261.46945
...,...
763,3070.80870
835,7160.33030
1216,5415.66120
559,1646.42970


In [15]:
#from sklearn linear_model module we are importing LinearRegression here.
#to the regressor varibale we are assigning LinearRegression() method values
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

In [16]:
#here we are assigning the slope value to weight variable
weight = regressor.coef_

In [17]:
#displaying weight variable value
weight

array([[  257.8006705 ,   321.06004271,   469.58113407,   -41.74825718,
        23418.6671912 ]])

In [18]:
#here we are assinging the coefficient value to bais variable.
bais = regressor.intercept_

In [19]:
#displaying the bais variable value
bais

array([-12057.244846])

In [20]:
#assigning the predicting values to y_pred variable
y_pred = regressor.predict(X_test)

In [21]:
#importing r2_score method from metrics library to calculate evaluation metrics.
#to the r_Score variable we are assigning the predicted values
from sklearn .metrics import r2_score
r_score = r2_score(y_test, y_pred)

In [22]:
#displaying the r_score value to know the predicted value accuracy.
r_score

0.7894790349867009

In [23]:
#importing pickle library from deployment phase
import pickle

In [24]:
#assinging the saving model file name to filename variable
filename = "finalized_ML_RegressionAssignment_LinearRegressor.sav"
#using pickle dump method we are writing the file on disk
pickle.dump(regressor, open(filename, 'wb'))

In [25]:
#loading the saved model to loaded_model variable
loaded_model = pickle.load(open("finalized_ML_RegressionAssignment_LinearRegressor.sav", 'rb'))

In [28]:
#here we are pedicting the value like we want and assigning to result variable.
result = loaded_model.predict([[42, 2020.60, 3, True, False]])



In [29]:
#displaying the predicted value for the given 5 data values
result

array([[648871.30075237]])