## Libraries importing

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LinearRegression,Lasso,Ridge
from sklearn.model_selection import train_test_split,GridSearchCV,RandomizedSearchCV

from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error

from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
from sklearn.datasets import load_boston


In [6]:
df=load_boston()
df

{'data': array([[6.3200e-03, 1.8000e+01, 2.3100e+00, ..., 1.5300e+01, 3.9690e+02,
         4.9800e+00],
        [2.7310e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9690e+02,
         9.1400e+00],
        [2.7290e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9283e+02,
         4.0300e+00],
        ...,
        [6.0760e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
         5.6400e+00],
        [1.0959e-01, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9345e+02,
         6.4800e+00],
        [4.7410e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
         7.8800e+00]]),
 'target': array([24. , 21.6, 34.7, 33.4, 36.2, 28.7, 22.9, 27.1, 16.5, 18.9, 15. ,
        18.9, 21.7, 20.4, 18.2, 19.9, 23.1, 17.5, 20.2, 18.2, 13.6, 19.6,
        15.2, 14.5, 15.6, 13.9, 16.6, 14.8, 18.4, 21. , 12.7, 14.5, 13.2,
        13.1, 13.5, 18.9, 20. , 21. , 24.7, 30.8, 34.9, 26.6, 25.3, 24.7,
        21.2, 19.3, 20. , 16.6, 14.4, 19.4, 19.7, 20.5, 25. , 23.4, 18.9,
        35.4, 24.7, 3

In [7]:
x=pd.DataFrame(df.data,columns=df.feature_names)
y=df.target

In [8]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=1) # we are shuffling here more than one times 
                                                                                 # we are getting good accuracy

## Linear Regression

In [9]:
lr_model=LinearRegression()
lr_model.fit(x_train,y_train)

LinearRegression()

###### testing evaluation

In [10]:
y_pred=lr_model.predict(x_test)

r2=r2_score(y_test,y_pred)
print('r2 squared value:',r2)

mse=mean_squared_error(y_test,y_pred)
print('mean squared error:',mse)


r2 squared value: 0.7836295385076297
mean squared error: 19.83132367206308


###### training evaluation

In [11]:
y_pred_train=lr_model.predict(x_train)

r2=r2_score(y_train,y_pred_train)
print('r2 squared value:',r2)

mse=mean_squared_error(y_train,y_pred_train)
print('mean squared error:',mse)


r2 squared value: 0.7103879080674731
mean squared error: 23.513334449327022


# Lasso Regression

In [12]:
ls_model=Lasso(alpha=0.5)
ls_model.fit(x_train,y_train)

Lasso(alpha=0.5)

###### testing evaluation

In [13]:
y_pred=ls_model.predict(x_test)

r2=r2_score(y_test,y_pred)
print('r2 squared value:',r2)

mse=mean_squared_error(y_test,y_pred)
print('mean squared error:',mse)


r2 squared value: 0.7390320547059956
mean squared error: 23.91888317593272


###### Training Data

In [14]:
y_pred_train=ls_model.predict(x_train)

r2=r2_score(y_train,y_pred_train)
print('r2 squared value:',r2)

mse=mean_squared_error(y_train,y_pred_train)
print('mean squared error:',mse)


r2 squared value: 0.6767887090980561
mean squared error: 26.241221939540807


# Ridge Regression

In [15]:
rd_model=Ridge(alpha=0.5)
rd_model.fit(x_train,y_train)

Ridge(alpha=0.5)

###### Testing Data

In [16]:
y_pred=rd_model.predict(x_test)

r2=r2_score(y_test,y_pred)
print('r2 squared value:',r2)

mse=mean_squared_error(y_test,y_pred)
print('mean squared error:',mse)


r2 squared value: 0.7880378162997098
mean squared error: 19.427285232027728


###### Training data

In [17]:
y_pred_train=rd_model.predict(x_train)

r2=r2_score(y_train,y_pred_train)
print('r2 squared value:',r2)

mse=mean_squared_error(y_train,y_pred_train)
print('mean squared error:',mse)


r2 squared value: 0.7086646054727523
mean squared error: 23.65324777268543


# Hyper parameter tuning

### Lasso Regration

#### GridSearchCV

In [18]:
ls_model=Lasso()
hyp={'alpha':np.arange(0.1,1.1,0.01)}

gscv_ls_model=GridSearchCV(ls_model,hyp,cv=5)
gscv_ls_model.fit(x_train,y_train)

GridSearchCV(cv=5, estimator=Lasso(),
             param_grid={'alpha': array([0.1 , 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 ,
       0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 , 0.31,
       0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 , 0.41, 0.42,
       0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5 , 0.51, 0.52, 0.53,
       0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6 , 0.61, 0.62, 0.63, 0.64,
       0.65, 0.66, 0.67, 0.68, 0.69, 0.7 , 0.71, 0.72, 0.73, 0.74, 0.75,
       0.76, 0.77, 0.78, 0.79, 0.8 , 0.81, 0.82, 0.83, 0.84, 0.85, 0.86,
       0.87, 0.88, 0.89, 0.9 , 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97,
       0.98, 0.99, 1.  , 1.01, 1.02, 1.03, 1.04, 1.05, 1.06, 1.07, 1.08,
       1.09])})

In [19]:
gscv_ls_model.best_estimator_

Lasso(alpha=0.1)

In [20]:
ls_model=gscv_ls_model.best_estimator_
ls_model.fit(x_train,y_train)

Lasso(alpha=0.1)

In [21]:
## Testing Evaluation
y_pred = ls_model.predict(x_test)

r2 = r2_score(y_test, y_pred)
print('R2 Squared value:', r2)

mse = mean_squared_error(y_test,y_pred)
print('Mean Squared Error:', mse)

R2 Squared value: 0.7793374045694275
Mean Squared Error: 20.22471700674124


In [22]:
## Training Evaluation
y_pred_train = ls_model.predict(x_train)

r2 = r2_score(y_train, y_pred_train)
print('R2 Squared value:', r2)

mse = mean_squared_error(y_train, y_pred_train)
print('Mean Squared Error:', mse)

R2 Squared value: 0.690635383653484
Mean Squared Error: 25.117023403284197


#### RandomizedSearchCV

In [23]:
ls_model=Lasso()
hyp={'alpha':np.arange(0.1,1.1,0.01)}
rscv_ls_model=RandomizedSearchCV(ls_model,hyp,cv=5)
rscv_ls_model.fit(x_train,y_train)

RandomizedSearchCV(cv=5, estimator=Lasso(),
                   param_distributions={'alpha': array([0.1 , 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 ,
       0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 , 0.31,
       0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 , 0.41, 0.42,
       0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5 , 0.51, 0.52, 0.53,
       0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6 , 0.61, 0.62, 0.63, 0.64,
       0.65, 0.66, 0.67, 0.68, 0.69, 0.7 , 0.71, 0.72, 0.73, 0.74, 0.75,
       0.76, 0.77, 0.78, 0.79, 0.8 , 0.81, 0.82, 0.83, 0.84, 0.85, 0.86,
       0.87, 0.88, 0.89, 0.9 , 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97,
       0.98, 0.99, 1.  , 1.01, 1.02, 1.03, 1.04, 1.05, 1.06, 1.07, 1.08,
       1.09])})

In [24]:
rscv_ls_model.best_estimator_

Lasso(alpha=0.13)

In [25]:
ls_model=rscv_ls_model.best_estimator_
ls_model.fit(x_train,y_train)

Lasso(alpha=0.13)

In [26]:
## Testing Evaluation
y_pred = ls_model.predict(x_test)

r2 = r2_score(y_test, y_pred)
print('R2 Squared value:', r2)

mse = mean_squared_error(y_test,y_pred)
print('Mean Squared Error:', mse)

R2 Squared value: 0.7762053788072244
Mean Squared Error: 20.51178122156559


In [27]:
## Training Evaluation
y_pred_train = ls_model.predict(x_train)

r2 = r2_score(y_train, y_pred_train)
print('R2 Squared value:', r2)

mse = mean_squared_error(y_train, y_pred_train)
print('Mean Squared Error:', mse)

R2 Squared value: 0.6892880633593488
Mean Squared Error: 25.226411075860195


## Ridge Regression

#### GridSearchCV

In [28]:
rd_model = Ridge()
hyp = {'alpha': np.arange(0.1,1.1,0.01)}

gscv_rd_model = GridSearchCV(rd_model, hyp, cv=5)
gscv_rd_model.fit(x_train,y_train)

GridSearchCV(cv=5, estimator=Ridge(),
             param_grid={'alpha': array([0.1 , 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 ,
       0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 , 0.31,
       0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 , 0.41, 0.42,
       0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5 , 0.51, 0.52, 0.53,
       0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6 , 0.61, 0.62, 0.63, 0.64,
       0.65, 0.66, 0.67, 0.68, 0.69, 0.7 , 0.71, 0.72, 0.73, 0.74, 0.75,
       0.76, 0.77, 0.78, 0.79, 0.8 , 0.81, 0.82, 0.83, 0.84, 0.85, 0.86,
       0.87, 0.88, 0.89, 0.9 , 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97,
       0.98, 0.99, 1.  , 1.01, 1.02, 1.03, 1.04, 1.05, 1.06, 1.07, 1.08,
       1.09])})

In [29]:
gscv_rd_model.best_estimator_

Ridge(alpha=0.14999999999999997)

In [30]:
rd_model = gscv_rd_model.best_estimator_
rd_model.fit(x_train,y_train)

Ridge(alpha=0.14999999999999997)

In [31]:
## Testing Evaluation
y_pred = rd_model.predict(x_test)

r2 = r2_score(y_test, y_pred)
print('R2 Squared value:', r2)

mse = mean_squared_error(y_test,y_pred)
print('Mean Squared Error:', mse)

R2 Squared value: 0.7856850129049255
Mean Squared Error: 19.642930220428056


In [32]:
## Training Evaluation
y_pred_train = rd_model.predict(x_train)

r2 = r2_score(y_train, y_pred_train)
print('R2 Squared value:', r2)

mse = mean_squared_error(y_train, y_pred_train)
print('Mean Squared Error:', mse)

R2 Squared value: 0.7101347189100609
Mean Squared Error: 23.5338906398419


#### RandomizedSearchCV

In [33]:
rd_model = Ridge()
hyp = {'alpha': np.arange(0.1,1.1,0.01)}

rscv_rd_model = RandomizedSearchCV(rd_model, hyp, cv=5)
rscv_rd_model.fit(x_train,y_train)

RandomizedSearchCV(cv=5, estimator=Ridge(),
                   param_distributions={'alpha': array([0.1 , 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 ,
       0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 , 0.31,
       0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 , 0.41, 0.42,
       0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5 , 0.51, 0.52, 0.53,
       0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6 , 0.61, 0.62, 0.63, 0.64,
       0.65, 0.66, 0.67, 0.68, 0.69, 0.7 , 0.71, 0.72, 0.73, 0.74, 0.75,
       0.76, 0.77, 0.78, 0.79, 0.8 , 0.81, 0.82, 0.83, 0.84, 0.85, 0.86,
       0.87, 0.88, 0.89, 0.9 , 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97,
       0.98, 0.99, 1.  , 1.01, 1.02, 1.03, 1.04, 1.05, 1.06, 1.07, 1.08,
       1.09])})

In [34]:
rscv_rd_model.best_estimator_

Ridge(alpha=0.13999999999999999)

In [35]:
rd_model=rscv_rd_model.best_estimator_
rd_model.fit(x_train,y_train)

Ridge(alpha=0.13999999999999999)

In [36]:
## Testing Evaluation
y_pred = rd_model.predict(x_test)

r2 = r2_score(y_test, y_pred)
print('R2 Squared value:', r2)

mse = mean_squared_error(y_test,y_pred)
print('Mean Squared Error:', mse)

R2 Squared value: 0.7855757714339014
Mean Squared Error: 19.65294269142501


In [37]:
## Training Evaluation
y_pred_train = rd_model.predict(x_train)

r2 = r2_score(y_train, y_pred_train)
print('R2 Squared value:', r2)

mse = mean_squared_error(y_train, y_pred_train)
print('Mean Squared Error:', mse)

R2 Squared value: 0.7101638092528592
Mean Squared Error: 23.53152882216054
