In [29]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

In [30]:
from sklearn.datasets import load_boston

In [31]:
X, y = load_boston(return_X_y = True)

In [32]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.3, random_state = 0)

In [33]:
lr = LinearRegression().fit(X_train, y_train)

In [34]:
lr.score(X_train, y_train)

0.7645451026942549

In [35]:
lr.score(X_valid, y_valid)

0.6733825506400194

In [36]:
lr_params = {
    'fit_intercept':[True,False], 
    'normalize':[True,False],
    'copy_X':('True', 'False')
}

In [38]:
grid_lr = GridSearchCV(lr, lr_params, cv=10)
grid_lr.fit(X_train, y_train)



GridSearchCV(cv=10, error_score='raise-deprecating',
       estimator=LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'fit_intercept': [True, False], 'normalize': [True, False], 'copy_X': ('True', 'False')},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [39]:
print(grid_lr.best_params_)
print(grid_lr.best_score_)
print(grid_lr.best_estimator_)

{'copy_X': 'True', 'fit_intercept': True, 'normalize': True}
0.7134653190143675
LinearRegression(copy_X='True', fit_intercept=True, n_jobs=None,
         normalize=True)


Ridge

In [44]:
from sklearn.linear_model import Ridge

In [45]:
alphas = np.array([0.001, 0.01, 0.1, 1, 10, 100])

In [47]:
model_Ridge = Ridge()
grid_Ridge = GridSearchCV(estimator=model_Ridge, param_grid=dict(alpha=alphas))
grid_Ridge.fit(X_train, y_train)



GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'alpha': array([1.e-03, 1.e-02, 1.e-01, 1.e+00, 1.e+01, 1.e+02])},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [48]:
print(grid_Ridge.best_params_)
print(grid_Ridge.best_score_)
print(grid_Ridge.best_estimator_)

{'alpha': 0.1}
0.7159209994990412
Ridge(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)


Lasso

In [51]:
from sklearn.linear_model import Lasso

In [52]:
alphas = np.array([0.001, 0.01, 0.1, 1, 10, 100])

In [53]:
model_Lasso = Lasso()
grid_Lasso = GridSearchCV(estimator=model_Lasso, param_grid=dict(alpha=alphas))
grid_Lasso.fit(X_train, y_train)



GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'alpha': array([1.e-03, 1.e-02, 1.e-01, 1.e+00, 1.e+01, 1.e+02])},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [54]:
print(grid_Lasso.best_params_)
print(grid_Lasso.best_score_)
print(grid_Lasso.best_estimator_)

{'alpha': 0.001}
0.7155049180359111
Lasso(alpha=0.001, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)


Сравниваем результаты на валидационной выборке

In [55]:
estimators = {
    'regression': grid_lr,
    'Ridge': grid_Ridge,
    'Lasso': grid_Lasso
}

In [56]:
for k in estimators:
    v = estimators[k]
    print(k, "CV Accuracy:", v.best_score_, "Validation Accuracy:", v.best_estimator_.score(X_valid, y_valid))

regression CV Accuracy: 0.7134653190143675 Validation Accuracy: 0.6733825506400194
Ridge CV Accuracy: 0.7159209994990412 Validation Accuracy: 0.6725462491471208
Lasso CV Accuracy: 0.7155049180359111 Validation Accuracy: 0.6731947086233128
