## Importing the libraries

In [1]:
import numpy as np
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('insurance.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## check Null data

In [3]:
dataset.isnull().sum()

age         0
sex         0
bmi         0
children    0
smoker      0
region      0
charges     0
dtype: int64

## Encoding categorical data

In [4]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
ct= ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1,4,5])],remainder='passthrough')
X=np.array(ct.fit_transform(X))


## Splitting the dataset into the Training set and Test set

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

## Feature Scaling

In [6]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train[:, 8:] = sc.fit_transform(X_train[:, 8:])
X_test[:,8:] = sc.transform(X_test[:, 8:])

## Training the Stochastic Gradient Descent Regression model on the Training set

In [7]:
from sklearn.linear_model import SGDRegressor
SGDreg = SGDRegressor(loss='squared_loss',alpha=0.001, penalty='l2')
reg=SGDreg.fit(X_train,y_train)




## Predicting the Test set results

In [8]:
y_pred = reg.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[11295.67  9724.53]
 [ 9641.37  8547.69]
 [38101.51 45702.02]
 [16400.92 12950.07]
 [ 7091.77  9644.25]
 [ 4161.1   4500.34]
 [ 1752.18  2198.19]
 [14411.27 11436.74]
 [ 9056.87  7537.16]
 [ 7564.87  5425.02]
 [ 4580.55  6753.04]
 [10367.02 10493.95]
 [ 8915.45  7337.75]
 [ 4001.98  4185.1 ]
 [27949.54 18310.74]
 [10804.86 10702.64]
 [11461.6  12523.6 ]
 [ 6239.79  3490.55]
 [ 8335.33  6457.84]
 [27176.62 33475.82]
 [33606.6  23967.38]
 [14421.3  12643.38]
 [11893.41 23045.57]
 [32187.96 23065.42]
 [ 4363.45  1674.63]
 [ 9375.91  4667.61]
 [ 1280.18  3732.63]
 [ 9882.13  7682.67]
 [ 3971.4   3756.62]
 [10521.94  8413.46]
 [ 9057.46  8059.68]
 [40067.74 48970.25]
 [15827.93 12979.36]
 [14019.59 20630.28]
 [24805.17 14571.89]
 [ 5333.3   4137.52]
 [12721.64  8347.16]
 [30738.99 51194.56]
 [33478.68 40003.33]
 [ 3813.33  1880.49]
 [ 4163.61  5458.05]
 [ 4195.52  2867.12]
 [30569.81 20149.32]
 [39434.15 47496.49]
 [27840.38 36149.48]
 [ 5166.27 26018.95]
 [10694.88 19749.38]
 [ 7965.14  6

## Evaluating the Model Performance

In [9]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.7996754318546645