In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

In [None]:
from sklearn.datasets import load_boston
boston_dataset = load_boston()
df = pd.DataFrame(boston_dataset.data, columns=boston_dataset.feature_names)
df.head()

In [None]:
y = boston_dataset.target
x = df

In [None]:
x1 = pd.get_dummies(x, columns=['RAD'])
x2 = pd.get_dummies(x1, columns=['TAX'])

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [None]:
x_scal = scaler.fit_transform(x2)

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x_scal, y, test_size=0.3, random_state=42)

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import BaggingRegressor

In [None]:
from scipy.stats import uniform

In [None]:
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV

#### RandomizedSearchCV

In [None]:
models=[ 
      {'name':'Lr',"model": LinearRegression()  , 'params':{'fit_intercept':[True, False], 'normalize':[True, False]}},
      {'name':'R',"model": Ridge(), 'params':{'alpha': uniform(loc=0, scale=4), 'solver':['svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']}},
      {'name':'L',"model": Lasso(), 'params':{'alpha': uniform(loc=0, scale=4), 'selection':['cyclic', 'random']}},
      {'name':'RF',"model": RandomForestRegressor(), 'params':{'n_estimators':[10,25,50,100,150,200], 'criterion':['squared_error', 'absolute_error', 'poisson'], 'max_depth':[3,5,7,9,11]}},
      {'name':'KN',"model": KNeighborsRegressor(), 'params':{'n_neighbors':list(range(1,30)),'weights': ['uniform', 'distance'], 'p':[1,2,3]}},
      {'name':'DT',"model": DecisionTreeRegressor(), 'params':{'criterion':['squared_error', 'friedman_mse', 'absolute_error', 'poisson'], 'max_depth':[3,5,7,9,11]}},
      {'name':'BR',"model": BaggingRegressor(), 'params':{'n_estimators':[10,25,50,100,150,200], 'max_samples':list(range(100, 1000, 10))}}
]

In [None]:
res=[]
for v in  models:
    res.append((v['name'], RandomizedSearchCV(v['model'], v['params'], cv=10).fit(x_train, y_train)))

In [None]:
for r in res:
    print(r[0], r[1].best_score_, r[1].best_params_)

In [None]:
best_model = res[3][1].best_estimator_

In [None]:
y_pred = best_model.predict(x_test)
best_model.score(x_test, y_test)

#### Попробуем улучшить качество в лучших окрестнястях пространства гиперпараметров, которые показал RandomizedSearch, с помощью GridSearch

Оставим только RandomForest и BaggingRegressor

In [None]:
models_2=[ 
      {'name':'RF',"model": RandomForestRegressor(), 'params':{'n_estimators':list(range(180, 220)), 'max_depth':list(range(9, 15))}},
      {'name':'BR',"model": BaggingRegressor(), 'params':{'n_estimators':list(range(180, 220)), 'max_samples':list(range(200, 300))}}
]

#### GridSearchCV

In [None]:
# models_2=[ 
#       {'name':'Lr',"model": LinearRegression()  , 'params':{'fit_intercept':[True, False], 'normalize':[True, False]}},
#       {'name':'R',"model": Ridge(), 'params':{'alpha': np.linspace(0, 2, 50), 'solver':['svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']}},
#       {'name':'L',"model": Lasso(), 'params':{'alpha': np.linspace(0, 2, 50), 'selection':['cyclic', 'random']}},
#       {'name':'RF',"model": RandomForestRegressor(), 'params':{'n_estimators':[10,25,50,100,150,200], 'criterion':['squared_error', 'absolute_error', 'poisson'], 'max_depth':[3,5,7,9,11]}},
#       {'name':'KN',"model": KNeighborsRegressor(), 'params':{'n_neighbors':list(range(1,30)),'weights': ['uniform', 'distance'], 'p':[1,2,3]}},
#       {'name':'DT',"model": DecisionTreeRegressor(), 'params':{'criterion':['squared_error', 'friedman_mse', 'absolute_error', 'poisson'], 'max_depth':[3,5,7,9,11]}},
#       {'name':'BR',"model": BaggingRegressor(), 'params':{'n_estimators':[10,25,50,100,150,200], 'max_samples':list(range(100, 1000, 10))}}
# ]

In [None]:
# res_2=[]
# for v in  models_2:
#     res_2.append((v['name'], GridSearchCV(v['model'], v['params'], cv=10).fit(x_train, y_train)))

In [None]:
# for r in res_2:
#     print(r[0], r[1].best_score_, r[1].best_params_)