In [11]:
# Import libraries

import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.ensemble import GradientBoostingRegressor

In [2]:
from sklearn import set_config

set_config(display = 'diagram')

In [5]:
df = pd.read_csv('claim.csv', index_col = 0)
df.head()

Unnamed: 0,age,gender,bmi,smoker,claim
0,39.0,male,23.2,No,1121.87
1,24.0,male,30.1,No,1131.51
2,38.0,male,33.3,No,1135.94
3,38.0,male,33.7,No,1136.4
4,38.0,male,34.1,No,1137.01


In [14]:
transformer = ColumnTransformer(transformers = [
    ('tnf1', OneHotEncoder(sparse = False, handle_unknown = 'ignore'), [1,3]),
    ('tnf2', StandardScaler(), [0,2])
], remainder = 'passthrough')

In [15]:
from sklearn.model_selection import train_test_split

In [16]:
xtrain, xtest, ytrain, ytest = train_test_split(df.drop(columns = ['claim']), df['claim'], test_size = 0.2, random_state = 1)

In [17]:
model = Pipeline(steps = [('transformer', transformer), ('model', GradientBoostingRegressor())])

In [18]:
model.fit(xtrain, ytrain)

In [19]:
ypred = model.predict(xtest)

In [20]:
ypred

array([ 8474.66681044,  9114.83897362, 20211.74662864,  7830.66498743,
        8237.84367411,  9419.52620348,  8581.76516069, 11166.23322075,
        8518.54929479,  9194.92556964,  6528.75735523,  7649.69079832,
       21888.65677857,  9884.67005443,  9195.23083876,  9734.82609214,
        8518.54929479,  6401.06958498, 11113.2130072 ,  8381.85189449,
        8933.70146608,  8713.19407018,  7478.14745035,  7952.4282894 ,
       10463.38474658,  8097.07111022,  6554.7278558 ,  8581.13256833,
        7716.25184446,  8062.56863242,  8332.52598254,  8342.66036698,
        8114.79314794,  8056.84928602, 19134.1321477 ,  7664.61256797,
        7932.71429586,  6833.99322765,  9920.16568291, 20530.5806324 ,
        6856.06275868,  8645.25184076,  9041.76746077,  9702.79492119,
        5093.08538377,  7382.95662653,  6746.19195409,  7070.18630872,
        7295.6416097 ,  9711.87697251,  7226.89241219,  8651.67512912,
        7712.7784904 , 41349.275564  , 25282.14481665,  7087.23702708,
      

In [21]:
import pickle

pickle.dump(model, open('model.pkl', 'wb'))