# LASSO Model

In [18]:
## -----------
# READ DATA
## -----------
import pandas as pd
insurance_df = pd.read_csv("insurance.csv")

In [19]:
## -----------
# DATA TRANSFORMATION
## -----------

sex_function = lambda x: int(x != 'female')

def sex_function(x):
    return int(x != 'female')

# Binary transformation
insurance_df['sex'] = insurance_df['sex'].apply(sex_function)
insurance_df['smoker'] = insurance_df['smoker'].apply(lambda x: int(x != 'no'))

# Multiclass transformation
region_dummies = pd.get_dummies(insurance_df['region'], drop_first = True)

insurance_df=pd.concat([insurance_df, region_dummies], axis=1)
insurance_df.drop(["region"], axis=1, inplace = True)

In [20]:
## -----------
# TRAIN AND TEST SPLIT
## -----------

from sklearn.model_selection import train_test_split
y = insurance_df['charges']
X = insurance_df.drop(columns = ['charges'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

y = insurance_df['charges']
X = insurance_df.drop(columns = ['charges'])

train = pd.concat([y_train,  X_train], axis=1)
test = pd.concat([y_test, X_test], axis = 1)

In [44]:
## -----------
# MODEL
## -----------

from sklearn import linear_model

reg = linear_model.Lasso(alpha=8)
reg.fit(X_train, y_train)

Lasso(alpha=8)

In [45]:
## -----------
# EVALUATION
## -----------

from sklearn.metrics import mean_squared_error, r2_score

# Make a prediction
yhat = reg.predict(X_test)

# Evaluation
r2 = r2_score(y_test, yhat)
r2

0.6723316141312212

In [46]:
import pickle 

with open("lasso", 'wb') as file:
    pickle.dump(reg, file)