In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.model_selection import GridSearchCV
from sklearn import metrics

import warnings
warnings.filterwarnings('ignore')

In [2]:
boston = load_boston()

In [3]:
df = pd.DataFrame(boston.data, columns = boston.feature_names)

In [4]:
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


In [5]:
df['MEDV'] = boston.target

In [6]:
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [7]:
df.describe()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
count,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0
mean,3.613524,11.363636,11.136779,0.06917,0.554695,6.284634,68.574901,3.795043,9.549407,408.237154,18.455534,356.674032,12.653063,22.532806
std,8.601545,23.322453,6.860353,0.253994,0.115878,0.702617,28.148861,2.10571,8.707259,168.537116,2.164946,91.294864,7.141062,9.197104
min,0.00632,0.0,0.46,0.0,0.385,3.561,2.9,1.1296,1.0,187.0,12.6,0.32,1.73,5.0
25%,0.082045,0.0,5.19,0.0,0.449,5.8855,45.025,2.100175,4.0,279.0,17.4,375.3775,6.95,17.025
50%,0.25651,0.0,9.69,0.0,0.538,6.2085,77.5,3.20745,5.0,330.0,19.05,391.44,11.36,21.2
75%,3.677083,12.5,18.1,0.0,0.624,6.6235,94.075,5.188425,24.0,666.0,20.2,396.225,16.955,25.0
max,88.9762,100.0,27.74,1.0,0.871,8.78,100.0,12.1265,24.0,711.0,22.0,396.9,37.97,50.0


In [8]:
X = df.drop('MEDV', axis = 1)
y = df['MEDV']

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 66)

In [10]:
pipe = Pipeline(steps = [('scale', StandardScaler()), ('linreg', LinearRegression())])

In [11]:
param = {'linreg__n_jobs':[1,2,3]}

In [12]:
grid = GridSearchCV(estimator = pipe, param_grid = param, cv = 3, scoring = 'r2')

In [13]:
grid.fit(X_train, y_train)

GridSearchCV(cv=3,
             estimator=Pipeline(steps=[('scale', StandardScaler()),
                                       ('linreg', LinearRegression())]),
             param_grid={'linreg__n_jobs': [1, 2, 3]}, scoring='r2')

In [14]:
grid.score(X_test, y_test)

0.8111288663608667

In [15]:
grid.best_params_

{'linreg__n_jobs': 1}

In [16]:
def ridge_pipe(cv):
    pipe = Pipeline(steps = [('scale', StandardScaler()), ('ridge', Ridge())])
    params = {'ridge__alpha': np.linspace(0, 0.2, 5)}
    grid = GridSearchCV(estimator = pipe, cv = cv, param_grid = params, scoring = 'r2')
    grid.fit(X_train, y_train)
    print(grid.best_params_)
    return grid.score(X_test, y_test)         

ridge_pipe(4)

In [18]:
def lasso_pipe(cv):
    pipe = Pipeline(steps = [('scale', StandardScaler()), ('lasso', Lasso())])
    
    params = {'lasso__alpha': np.linspace(0, 0.2, 5)}
    grid = GridSearchCV(estimator = pipe, cv = cv, param_grid = params, scoring = 'r2')
    grid.fit(X_train, y_train)
    
    print(grid.best_params_)
    return grid.score(X_test, y_test)

lasso_pipe(5)         

{'lasso__alpha': 0.05}


0.8133101251641616