In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn import svm,datasets
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [2]:
Boston = datasets.load_boston()

In [3]:
X = Boston.data
sc = StandardScaler()
X_std = sc.fit_transform(X)

In [4]:
X_train, X_test, Y_train, Y_test = train_test_split(X_std, Boston.target)

## LinearRegression

In [5]:
clfLR = LinearRegression()
grid = {'n_jobs' : [1,2,3]}
LR = GridSearchCV(clfLR,grid)
LR.fit(X_train,Y_train)

final_clf = LR.best_estimator_
print(final_clf)

final_clf.fit(X_train,Y_train)
final_prediction = final_clf.predict(X_test)
r2_score(Y_test,final_prediction)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)


0.71163889531520175

## Logistic Regression Using SGD

In [6]:
# Used warnings just to remove the deprecated warning generated due to the use of n_iter
import warnings
warnings.filterwarnings('ignore')

clfSGD = SGDRegressor()
grid = { 'alpha' : [0.001, 0.0001, 0.00001],
        'n_iter' : [100,1000,10000],
        'learning_rate' : ['constant','optimal','invscaling']
       }
SGD = GridSearchCV(clfSGD,grid)
SGD.fit(X_train,Y_train)


final_SGD = SGD.best_estimator_
print(final_SGD)

final_SGD.fit(X_train,Y_train)
final_prediction = final_SGD.predict(X_test)
r2_score(Y_test,final_prediction)

SGDRegressor(alpha=0.001, average=False, epsilon=0.1, eta0=0.01,
       fit_intercept=True, l1_ratio=0.15, learning_rate='optimal',
       loss='squared_loss', max_iter=None, n_iter=10000, penalty='l2',
       power_t=0.25, random_state=None, shuffle=True, tol=None, verbose=0,
       warm_start=False)


0.71175025604763309

## Decision Tree Regressor

In [7]:
clfDT = DecisionTreeRegressor()
grid = { 'max_depth' : [2,3,4,5,6,7,8,9],
       'criterion' : ["mse", "friedman_mse", "mae"]}
DT = GridSearchCV(clfDT,grid)
DT.fit(X_train,Y_train)

final_DT = DT.best_estimator_
print(final_DT)

final_DT.fit(X_train,Y_train)
final_prediction = final_DT.predict(X_test)
r2_score(Y_test,final_prediction)

DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,
           max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           presort=False, random_state=None, splitter='best')


0.80609010093116262

## SVM Regressor

In [8]:
clfSVM = svm.SVR()
grid = {'kernel' : [ 'linear', 'poly', 'rbf', 'sigmoid']}
SVM = GridSearchCV(clfSVM,grid)
SVM.fit(X_train,Y_train)

final_SVM = SVM.best_estimator_
print(final_SVM)

final_SVM.fit(X_train,Y_train)
final_prediction = final_SVM.predict(X_test)
r2_score(Y_test,final_prediction)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',
  kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)


0.65539195025561703