In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_boston

# Loading Data

In [2]:
loaded_data = load_boston()
dataset = pd.DataFrame(loaded_data.data, columns = loaded_data.feature_names)
dataset['MEDV'] = loaded_data.target
dataset.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [3]:
dataset.isnull().sum()

CRIM       0
ZN         0
INDUS      0
CHAS       0
NOX        0
RM         0
AGE        0
DIS        0
RAD        0
TAX        0
PTRATIO    0
B          0
LSTAT      0
MEDV       0
dtype: int64

In [4]:
X = dataset.iloc[:,0:13]
y = dataset.iloc[:,13]

# Training the models

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 100)

## 1. Linear Regression

In [6]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.metrics import r2_score

pipeline_lr = Pipeline([('scaler', StandardScaler()), ('estimator', LinearRegression())])
clf1 = GridSearchCV(pipeline_lr, {})
clf1.fit(X_train, y_train)

y_pred = clf1.predict(X_test)
print('R^2 = {}'.format(r2_score(y_test, y_pred)))

R^2 = 0.7246154314616731


## 2. Ridge Regression with Polynomial Features

In [7]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Ridge

pipeline_ridge = Pipeline([('poly', PolynomialFeatures()), ('scaler', StandardScaler()), ('estimator', Ridge())])
alpha_vals = [0.01, 0.03, 0.1, 0.3, 1, 3, 10]
degree_vals = [1, 2, 3, 4]
clf2 = GridSearchCV(pipeline_ridge, {'poly__degree': degree_vals, 'estimator__alpha': alpha_vals})
clf2.fit(X_train, y_train)

print('Best alpha is {} with degree {} polynomial'.format(clf2.best_params_['estimator__alpha'], clf2.best_params_['poly__degree']))
y_pred = clf2.predict(X_test)
print('R^2 = {}'.format(r2_score(y_test, y_pred)))

Best alpha is 0.1 with degree 2 polynomial
R^2 = 0.8550462920272358


## 3. Support Vector Classifier

In [8]:
from sklearn.svm import SVR

pipeline_svr = Pipeline([('scaler', StandardScaler()), ('estimator', SVR())])
gamma_vals = [0.01, 0.03, 0.1, 0.3, 1, 3, 'scale']
clf3 = GridSearchCV(pipeline_svr, {'estimator__gamma': gamma_vals})
clf3.fit(X_train, y_train)

print('Best gamma is {}'.format(clf3.best_params_['estimator__gamma']))
y_pred = clf3.predict(X_test)
print('R^2 = {}'.format(r2_score(y_test, y_pred)))

Best gamma is 0.03
R^2 = 0.6197451954854953
