Для выполнения домашнего задания необходимо взять boston house-prices datase (sklearn.datasets.load_boston) и сделать тоже самое для задачи регрессии (попробовать разные алгоритмы, поподбирать параметры, вывести итоговое качество).

In [29]:
from sklearn import datasets
from matplotlib import pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Lasso, Ridge, HuberRegressor, ElasticNet
from sklearn.tree import DecisionTreeRegressor
%matplotlib inline
import numpy as np
import pandas as pd
import random
from jupyterthemes import jtplot
jtplot.style()

In [30]:
boston = datasets.load_boston()
print(boston.DESCR)

.. _boston_dataset:

Boston house prices dataset
---------------------------

**Data Set Characteristics:**  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pu

In [31]:
boston.data.shape

(506, 13)

In [32]:
df = pd.DataFrame(boston.data, columns=boston.feature_names)
df.head(5)

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


In [33]:
X, y = boston['data'], boston['target']

In [34]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [35]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

### Лассо регрессия

In [36]:
lasso_reg = Lasso()

In [37]:
lasso_params = {
    'alpha': np.logspace(-7, 2, 1000)
}
grid_lasso = GridSearchCV(lasso_reg, lasso_params, cv=10, verbose=2, n_jobs=-1)
grid_lasso.fit(X_train, y_train)

Fitting 10 folds for each of 1000 candidates, totalling 10000 fits


GridSearchCV(cv=10, estimator=Lasso(), n_jobs=-1,
             param_grid={'alpha': array([1.00000000e-07, 1.02096066e-07, 1.04236067e-07, 1.06420924e-07,
       1.08651577e-07, 1.10928986e-07, 1.13254132e-07, 1.15628013e-07,
       1.18051653e-07, 1.20526094e-07, 1.23052400e-07, 1.25631660e-07,
       1.28264983e-07, 1.30953502e-07, 1.33698374e-07, 1.36500781e-07,
       1.39361927e-07, 1.42283046e-07, 1.4526...
       6.20572881e+01, 6.33580499e+01, 6.46860766e+01, 6.60419396e+01,
       6.74262224e+01, 6.88395207e+01, 7.02824426e+01, 7.17556092e+01,
       7.32596543e+01, 7.47952252e+01, 7.63629826e+01, 7.79636013e+01,
       7.95977700e+01, 8.12661920e+01, 8.29695852e+01, 8.47086827e+01,
       8.64842328e+01, 8.82969996e+01, 9.01477631e+01, 9.20373200e+01,
       9.39664831e+01, 9.59360829e+01, 9.79469667e+01, 1.00000000e+02])},
             verbose=2)

In [38]:
print(grid_lasso.best_params_)
print(grid_lasso.best_score_)
print(grid_lasso.best_estimator_)

{'alpha': 0.00046415888336127773}
0.7032567859768619
Lasso(alpha=0.00046415888336127773)


### Ридж-регрессия

In [12]:
rige_reg = Ridge()

In [13]:
rige_params = {
    'alpha': np.logspace(-7, 2, 1000),
    'solver': ['svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']
}
grid_rige = GridSearchCV(rige_reg, rige_params, cv=10, verbose=2, n_jobs=-1)
grid_rige.fit(X_train, y_train)

Fitting 10 folds for each of 6000 candidates, totalling 60000 fits


GridSearchCV(cv=10, estimator=Ridge(), n_jobs=-1,
             param_grid={'alpha': array([1.00000000e-07, 1.02096066e-07, 1.04236067e-07, 1.06420924e-07,
       1.08651577e-07, 1.10928986e-07, 1.13254132e-07, 1.15628013e-07,
       1.18051653e-07, 1.20526094e-07, 1.23052400e-07, 1.25631660e-07,
       1.28264983e-07, 1.30953502e-07, 1.33698374e-07, 1.36500781e-07,
       1.39361927e-07, 1.42283046e-07, 1.4526...
       6.74262224e+01, 6.88395207e+01, 7.02824426e+01, 7.17556092e+01,
       7.32596543e+01, 7.47952252e+01, 7.63629826e+01, 7.79636013e+01,
       7.95977700e+01, 8.12661920e+01, 8.29695852e+01, 8.47086827e+01,
       8.64842328e+01, 8.82969996e+01, 9.01477631e+01, 9.20373200e+01,
       9.39664831e+01, 9.59360829e+01, 9.79469667e+01, 1.00000000e+02]),
                         'solver': ['svd', 'cholesky', 'lsqr', 'sparse_cg',
                                    'sag', 'saga']},
             verbose=2)

In [14]:
print(grid_rige.best_params_)
print(grid_rige.best_score_)
print(grid_rige.best_estimator_)

{'alpha': 13.650078065460137, 'solver': 'sparse_cg'}
0.7004182684937927
Ridge(alpha=13.650078065460137, solver='sparse_cg')


### Регрессия Хьюберта

In [24]:
huber_reg = HuberRegressor()

In [25]:
huber_params = {
    'alpha': np.logspace(-7, 2, 100),
    'epsilon': np.linspace(1.35, 2, 50)
}
grid_huber = GridSearchCV(huber_reg, huber_params, cv=10, verbose=2, n_jobs=-1)
grid_huber.fit(X_train, y_train)

Fitting 10 folds for each of 5000 candidates, totalling 50000 fits


GridSearchCV(cv=10, estimator=HuberRegressor(), n_jobs=-1,
             param_grid={'alpha': array([1.00000000e-07, 1.23284674e-07, 1.51991108e-07, 1.87381742e-07,
       2.31012970e-07, 2.84803587e-07, 3.51119173e-07, 4.32876128e-07,
       5.33669923e-07, 6.57933225e-07, 8.11130831e-07, 1.00000000e-06,
       1.23284674e-06, 1.51991108e-06, 1.87381742e-06, 2.31012970e-06,
       2.84803587e-06, 3.51119173e-...
       1.54897959, 1.5622449 , 1.5755102 , 1.58877551, 1.60204082,
       1.61530612, 1.62857143, 1.64183673, 1.65510204, 1.66836735,
       1.68163265, 1.69489796, 1.70816327, 1.72142857, 1.73469388,
       1.74795918, 1.76122449, 1.7744898 , 1.7877551 , 1.80102041,
       1.81428571, 1.82755102, 1.84081633, 1.85408163, 1.86734694,
       1.88061224, 1.89387755, 1.90714286, 1.92040816, 1.93367347,
       1.94693878, 1.96020408, 1.97346939, 1.98673469, 2.        ])},
             verbose=2)

In [26]:
print(grid_huber.best_params_)
print(grid_huber.best_score_)
print(grid_huber.best_estimator_)

{'alpha': 0.35111917342151344, 'epsilon': 2.0}
0.7006519794987502
HuberRegressor(alpha=0.35111917342151344, epsilon=2.0)


### ElasticNet регрессия

Как и регрессия Хьюберта объединяет l1 и l2 регуляризации

In [18]:
elast_reg = ElasticNet()

In [19]:
elast_params = {
    'alpha': np.logspace(-7, 2, 200),
    'l1_ratio': np.linspace(0, 1, 50)
}
grid_elast = GridSearchCV(elast_reg, elast_params, cv=10, verbose=2, n_jobs=-1)
grid_elast.fit(X_train, y_train)

Fitting 10 folds for each of 10000 candidates, totalling 100000 fits


  model = cd_fast.enet_coordinate_descent(


GridSearchCV(cv=10, estimator=ElasticNet(), n_jobs=-1,
             param_grid={'alpha': array([1.00000000e-07, 1.10975250e-07, 1.23155060e-07, 1.36671636e-07,
       1.51671689e-07, 1.68318035e-07, 1.86791360e-07, 2.07292178e-07,
       2.30043012e-07, 2.55290807e-07, 2.83309610e-07, 3.14403547e-07,
       3.48910121e-07, 3.87203878e-07, 4.29700470e-07, 4.76861170e-07,
       5.29197874e-07, 5.87278661e-07, 6...
       0.30612245, 0.32653061, 0.34693878, 0.36734694, 0.3877551 ,
       0.40816327, 0.42857143, 0.44897959, 0.46938776, 0.48979592,
       0.51020408, 0.53061224, 0.55102041, 0.57142857, 0.59183673,
       0.6122449 , 0.63265306, 0.65306122, 0.67346939, 0.69387755,
       0.71428571, 0.73469388, 0.75510204, 0.7755102 , 0.79591837,
       0.81632653, 0.83673469, 0.85714286, 0.87755102, 0.89795918,
       0.91836735, 0.93877551, 0.95918367, 0.97959184, 1.        ])},
             verbose=2)

In [20]:
print(grid_elast.best_params_)
print(grid_elast.best_score_)
print(grid_elast.best_estimator_)

{'alpha': 0.040554607358408275, 'l1_ratio': 0.0}
0.7003032608428936
ElasticNet(alpha=0.040554607358408275, l1_ratio=0.0)


### DecisionTree

In [21]:
tree_reg = DecisionTreeRegressor()

In [22]:
tree_params = {
    'max_depth': range(1, 11),
    'splitter': ['best', 'random'],
    'criterion': ['mse', 'mae', 'friedman_mse'],
    'min_samples_leaf': [1, 2, 4, 8, 16]
}
grid_tree = GridSearchCV(tree_reg, tree_params, cv=10, verbose=2, n_jobs=-1)
grid_tree.fit(X_train, y_train)

Fitting 10 folds for each of 300 candidates, totalling 3000 fits


GridSearchCV(cv=10, estimator=DecisionTreeRegressor(), n_jobs=-1,
             param_grid={'criterion': ['mse', 'mae', 'friedman_mse'],
                         'max_depth': range(1, 11),
                         'min_samples_leaf': [1, 2, 4, 8, 16],
                         'splitter': ['best', 'random']},
             verbose=2)

In [23]:
print(grid_tree.best_params_)
print(grid_tree.best_score_)
print(grid_tree.best_estimator_)

{'criterion': 'mse', 'max_depth': 9, 'min_samples_leaf': 1, 'splitter': 'random'}
0.7583003181193312
DecisionTreeRegressor(max_depth=9, splitter='random')


# Сравниваем на валидационной выборке

In [27]:
estimators = {
    'lasso': grid_lasso,
    'rige': grid_rige,
    'huber': grid_huber,
    'elasticNet': grid_elast,
    'tree': grid_tree
}

In [39]:
for k in estimators:
    v = estimators[k]
    print(k, "CV R^2:", v.best_score_, "Validation R^2:", v.best_estimator_.score(X_test, y_test))

lasso CV R^2: 0.699343937197225 Validation R^2: 0.7341340766298776
rige CV R^2: 0.7004182684937927 Validation R^2: 0.7363378197591282
huber CV R^2: 0.7006519794987502 Validation R^2: 0.7349964508691502
elasticNet CV R^2: 0.7003032608428936 Validation R^2: 0.7361187401757827
tree CV R^2: 0.7583003181193312 Validation R^2: 0.8994619976237317
