In [1]:
#Data handling libraries
import pandas as pd
import numpy as np

#Using sklearn boston dataset
from sklearn.datasets import load_boston

#Train-test split
from sklearn.model_selection import train_test_split, GridSearchCV

#importing ml libraries
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.metrics import mean_squared_error

#Ignore warnings
import warnings
warnings.filterwarnings('ignore')

## Loading data from sklearn boston dataset

In [2]:
boston = load_boston()

In [3]:
df = pd.DataFrame(boston.data, columns=boston.feature_names)
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


## Loading target feature from sklearn boston dataset

In [4]:
y = pd.DataFrame(boston.target, columns=['price'])
y.head()

Unnamed: 0,price
0,24.0
1,21.6
2,34.7
3,33.4
4,36.2


In [6]:
#Train test split
xtrain, xtest, ytrain, ytest = train_test_split(df,y,test_size=0.3,random_state=40)

# Linear regression

In [None]:
lr = LinearRegression()

In [None]:
lr.fit(xtrain,ytrain)

In [None]:
pred = lr.predict(xtest)

In [None]:
mean_squared_error(ytest,pred)

# Lasso Regression

In [None]:
ls = Lasso(alpha=0.1) #without hyperparameter tuning alpha=0.1 it was 23 now 22

In [None]:
ls.fit(xtrain,ytrain)

In [None]:
pred_la = ls.predict(xtest)

In [None]:
mean_squared_error(ytest, pred_la)

# Ridge Regression

In [None]:
rr = Ridge(alpha=40)  #without hyperparameter tuning alpha=40 it was 22.35 now 22.3

In [None]:
rr.fit(xtrain,ytrain)

In [None]:
pred_rr = rr.predict(xtest)

In [None]:
mean_squared_error(ytest, pred_rr)

# Manual hyperparameter tuning

In [None]:
#For ridge
list = [0.001,0.01,0.1,1,10]
for i in list:
    rr = Ridge(alpha= i )
    rr.fit(xtrain,ytrain)
    pred_rr = rr.predict(xtest)
    print(f'the parameter {i} gives mse as:',mean_squared_error(ytest,pred_rr))

In [None]:
#For lasso
list = [0.001,0.01,0.1,1,10]
for i in list:
    ls = Lasso(alpha= i )
    ls.fit(xtrain,ytrain)
    pred_la = ls.predict(xtest)
    print(f'the parameter {i} gives mse as:',mean_squared_error(ytest,pred_la))

# Hyperparameter tuning using grid search cv

In [None]:
param_rd = {'alpha':[1e-15, 1e-13, 1e-11, 1e-9, 1e-7, 1e-5, 1e-3, 1e-1, 0, 1,2,3,4,5,6,7,8,9,10,20,30,40,50,60,70,80,90,100,200,300,400,500]} #1*10 raise to -15

In [None]:
param_ls = {'alpha':[1e-15, 1e-13, 1e-11, 1e-9, 1e-7, 1e-5, 1e-3, 1e-1, 0, 1,2,3,4,5,6,7,8,9,10,20,30,40,50,60,70,80,90,100,200,300,400,500]} #1*10 raise to -15

In [None]:
model_rd = GridSearchCV(Ridge(), param_rd, cv=10)

In [None]:
model_rd.fit(df,y)    #do not fit on xtrain and ytrain instead fit on entire data bcz we are using cv=10

In [None]:
model_rd.best_params_

In [None]:
model_rd.best_score_

In [None]:
model_ls = GridSearchCV(Lasso(), param_ls, cv=10)

In [None]:
model_ls.fit(df,y)

In [None]:
model_ls.best_params_

In [None]:
model_ls.best_score_