In [1]:
import numpy as np
import pandas as pd 

from sklearn.metrics import mean_squared_error, mean_absolute_error,r2_score
from sklearn.model_selection import train_test_split
from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn.linear_model import LinearRegression, Lasso, Ridge

import seaborn as sns
import matplotlib.pyplot as plt





In [2]:
medical_df = pd.read_csv('medical_insurance.csv')
medical_df


FileNotFoundError: [Errno 2] No such file or directory: 'medical_insurance.csv'

In [None]:
medical_df.info()

In [None]:
medical_df.describe()

In [None]:
medical_df['smoker'] = medical_df['smoker'].replace({'yes':1,'no':0})

In [None]:
medical_df['gender'] = medical_df['gender'].replace({'female':0,'male':1})
medical_df.info()

In [None]:
medical_df.drop(['region'], axis =1, inplace=True)

In [None]:
medical_df.info()

## Feature Selection

In [None]:
medical_df

## No Multicolinearity

In [None]:
vif_list = []
for i in range(medical_df.shape[1]-1):
    vif_score = variance_inflation_factor(medical_df.to_numpy(),i)
    vif_list.append(vif_score)


In [None]:
for i in range(medical_df.shape[1]-1):
    vif_score = variance_inflation_factor(medical_df.to_numpy(),i)
    print(f"VIF score for {medical_df.columns[i]}", vif_score)

## Model Training

In [None]:
x = medical_df.drop('charges', axis =1)
y = medical_df['charges']


In [None]:
x_train, x_test, y_train, y_test = train_test_split(x,y, random_state=22,test_size=0.25)

In [None]:
x_train

In [None]:
x_test

In [None]:
y_train

In [None]:
y_test

## Instantiating Linear Regression Model

In [None]:
linear_reg = LinearRegression()
linear_reg.fit(x_train,y_train)

In [None]:
linear_reg.intercept_  # Gradient Descent C and M

In [None]:
linear_reg.coef_

#### Evaluation on Training Data

In [None]:
y_pred_train = linear_reg.predict(x_train)
y_pred_train

In [None]:
mse = mean_squared_error(y_train, y_pred_train)
mse

In [None]:
mae = mean_absolute_error(y_train,y_pred_train)
mae

In [None]:
root_mse = np.sqrt(mse)
root_mse

In [None]:
r2score = r2_score(y_train,y_pred_train)
r2score

#### Evaluation on Testing Data

In [None]:
y_pred_test = linear_reg.predict(x_test)


In [None]:
mse_test = mean_squared_error(y_test,y_pred_test)
mse_test

In [None]:
mae_test = mean_absolute_error(y_test, y_pred_test)
mae_test

In [None]:
r2_test = r2_score(y_test, y_pred_test)
r2_test 

### Lasso Regression 

##### Evaluation on Training Data 

In [None]:
linear_reg_lasso = Lasso()
linear_reg_lasso.fit(x_train,y_train)

In [None]:
y_pred_train_lasso = linear_reg_lasso.predict(x_train)
mse = mean_squared_error(y_train,y_pred_train_lasso)
mae = mean_absolute_error(y_train,y_pred_train_lasso)
r_square_lasso = r2_score(y_train,y_pred_train_lasso)
r_square_lasso

#### Evaluation on Testing Data

In [None]:
y_pred_test_lasso = linear_reg_lasso.predict(x_test)
mse = mean_squared_error(y_test,y_pred_test_lasso)
mae = mean_absolute_error(y_test,y_pred_test_lasso)
r_square_lasso = r2_score(y_test,y_pred_test_lasso)
r_square_lasso

## RIDGE Regression

In [None]:
ridge_model = Ridge()
ridge_model.fit(x_train,y_train)

#### Gradient Descent

In [None]:
ridge_model.coef_

In [None]:
ridge_model.intercept_


## Evaluation on Training Data 

In [None]:
y_pred_train_ridge = ridge_model.predict(x_train)


In [None]:
mse = mean_squared_error(y_train,y_pred_train_ridge)
mae = mean_absolute_error(y_train,y_pred_train_ridge)
r_square_lasso = r2_score(y_train,y_pred_train_ridge)
r_square_lasso

## Evaluation on Testing Data

In [None]:
y_pred_test_ridge = linear_reg_lasso.predict(x_test)
mse = mean_squared_error(y_test,y_pred_test_ridge)
mae = mean_absolute_error(y_test,y_pred_test_ridge)
r_square_lasso = r2_score(y_test,y_pred_test_ridge)
r_square_lasso