In [1]:
import numpy as np
import pandas as pd

In [2]:
df=pd.read_csv("insurance - insurance.csv")

In [3]:
df.isnull().sum()

age         0
sex         0
bmi         0
children    0
smoker      0
region      0
charges     0
dtype: int64

In [4]:
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [5]:
x=df.drop(columns=["charges",'region'])
y=df['charges']

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [7]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [8]:
numeric_features=x_train.select_dtypes(include='int').columns.tolist()
categorical_features=x_train.select_dtypes(include='object').columns.tolist()

In [9]:
categorical_features

['sex', 'smoker']

In [10]:
numeric_features

['age', 'children']

In [11]:
numeric_trans=Pipeline(steps=[
    ('trf1',StandardScaler())
])
cat_trans=Pipeline(steps=[
    ('trf1',OneHotEncoder(drop='first',sparse_output=False,handle_unknown='ignore'))
])

In [12]:
preprocessor=ColumnTransformer(transformers=[
    ('numeric',numeric_trans,numeric_features),
    ('categorical',cat_trans,categorical_features)
],remainder='passthrough')

Linear Regression

In [13]:
from sklearn.linear_model import  LinearRegression

In [14]:
pipe_LR=Pipeline(steps=[
    ('preprocessing',preprocessor),
    ('Linear_Model',LinearRegression())
])

In [15]:
pipe_LR.fit(x_train,y_train)

The format of the columns of the 'remainder' transformer in ColumnTransformer.transformers_ will change in version 1.7 to match the format of the other transformers.
At the moment the remainder columns are stored as indices (of type int). With the same ColumnTransformer configuration, in the future they will be stored as column names (of type str).



In [16]:
y_pred=pipe_LR.predict(x_test)

In [17]:
from sklearn.metrics import r2_score

In [18]:
r2_score(y_test,y_pred)

0.7811302113434094

DecisionTree Regressor

In [19]:
from sklearn.tree import DecisionTreeRegressor

In [20]:
pipe_DTR=Pipeline(steps=[
    ('preprocessing',preprocessor),
    ('tree_Model',DecisionTreeRegressor())
])

In [21]:
pipe_DTR.fit(x_train,y_train)

The format of the columns of the 'remainder' transformer in ColumnTransformer.transformers_ will change in version 1.7 to match the format of the other transformers.
At the moment the remainder columns are stored as indices (of type int). With the same ColumnTransformer configuration, in the future they will be stored as column names (of type str).



In [22]:
y_pred=pipe_DTR.predict(x_test)

In [23]:
r2_score(y_test,y_pred)

0.7220537940974023

RandomForestRegressor

In [24]:
from sklearn.ensemble import RandomForestRegressor

In [25]:
pipe_RFR=Pipeline(steps=[
    ('preprocessing',preprocessor),
    ('RFR_Model',RandomForestRegressor())
])

In [26]:
pipe_RFR.fit(x_train,y_train)

The format of the columns of the 'remainder' transformer in ColumnTransformer.transformers_ will change in version 1.7 to match the format of the other transformers.
At the moment the remainder columns are stored as indices (of type int). With the same ColumnTransformer configuration, in the future they will be stored as column names (of type str).



In [27]:
y_pred=pipe_RFR.predict(x_test)

In [28]:
r2_score(y_test,y_pred)

0.8556754920297629

XGBoost Regressor

In [29]:
from xgboost import XGBRegressor

In [30]:
pipe_XGR=Pipeline(steps=[
    ('preprocessing',preprocessor),
    ('XGR_Model',XGBRegressor(n_estimators=100,     # number of boosting rounds
    max_depth=3,          # depth of each tree
    learning_rate=0.1,    # step size shrinkage
    subsample=0.8,        # fraction of samples used per tree
    colsample_bytree=0.8, # fraction of features used per tree
    random_state=42
))
])

In [32]:
pipe_XGR.fit(x_train,y_train)

The format of the columns of the 'remainder' transformer in ColumnTransformer.transformers_ will change in version 1.7 to match the format of the other transformers.
At the moment the remainder columns are stored as indices (of type int). With the same ColumnTransformer configuration, in the future they will be stored as column names (of type str).



In [33]:
y_pred=pipe_XGR.predict(x_test)

In [34]:
r2_score(y_test,y_pred)

0.8795938784280184