In [45]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn import tree
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler, MaxAbsScaler
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor,GradientBoostingRegressor, VotingRegressor
from sklearn.model_selection import train_test_split,GridSearchCV, KFold, RandomizedSearchCV
from sklearn.pipeline import make_pipeline

In [2]:
dataset = datasets.load_boston()
x_data = np.array(dataset.data, dtype='f8')
y_data = np.array(dataset.target, dtype='f8')

In [3]:
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, random_state=2152)

In [4]:
y_test.shape

(102,)

In [24]:
kfold = KFold(n_splits=5, shuffle=True, random_state=2152)
print(kfold)
kfold.get_n_splits(x_train,y_train)
print(kfold)

KFold(n_splits=5, random_state=2152, shuffle=True)
KFold(n_splits=5, random_state=2152, shuffle=True)


In [6]:
DT = tree.DecisionTreeRegressor()

In [7]:
DT = DT.fit(x_train, y_train)

In [8]:
print(DT.feature_importances_)

[3.98884024e-02 1.57838703e-03 3.09693408e-03 3.97139269e-05
 1.94050567e-02 2.82520117e-01 7.27240928e-03 6.96803495e-02
 6.92411925e-04 1.03307287e-02 3.51175249e-02 5.70749197e-03
 5.24670473e-01]


In [9]:
RF = RandomForestRegressor(random_state=2152)

In [10]:
RF = RF.fit(x_train, y_train)

In [11]:
print(RF.feature_importances_)

[0.03925089 0.00134612 0.0052216  0.00062473 0.01911799 0.34688514
 0.0129015  0.05466519 0.00406036 0.01394559 0.0199461  0.01179777
 0.47023702]


In [12]:
AB = AdaBoostRegressor(n_estimators=60, random_state=2152)

In [13]:
AB = AB.fit(x_train, y_train)

In [14]:
print(AB.feature_importances_)

[2.89181049e-02 3.33908987e-04 2.58223733e-02 2.50551918e-03
 4.83905160e-02 1.91327837e-01 7.45383677e-03 1.43458163e-01
 1.69572147e-02 5.03242986e-02 7.73607890e-02 9.64783398e-03
 3.97499604e-01]


In [15]:
GB = GradientBoostingRegressor(n_estimators=100, random_state=2152)

In [16]:
GB = GB.fit(x_train, y_train)

In [17]:
print(GB.feature_importances_)

[0.02274596 0.0006442  0.00657924 0.00080208 0.03217446 0.33840867
 0.00613826 0.08254749 0.00487854 0.00726185 0.03862516 0.01035625
 0.44883784]


In [18]:
ensemble = VotingRegressor(estimators=[('DecisionTree',DT),('RandomForest',RF),('AdaBoost',AB),('GradientBoost',GB)])

In [59]:
parameters = [{
    'DecisionTree__max_depth':[4,6], 
    'RandomForest__max_depth':[4,6],
    'RandomForest__n_estimators':[50,100],
    'AdaBoost__n_estimators':[50,100],
    'GradientBoost__max_depth':[4,6],
    'GradientBoost__n_estimators':[50,100]

}]

model = GridSearchCV(
    estimator=ensemble, 
    param_grid=parameters, 
    cv=kfold,
    n_jobs=4
    )

In [26]:
model.fit(x_train, y_train)

GridSearchCV(cv=KFold(n_splits=5, random_state=2152, shuffle=True),
             error_score=nan,
             estimator=VotingRegressor(estimators=[('DecisionTree',
                                                    DecisionTreeRegressor(ccp_alpha=0.0,
                                                                          criterion='mse',
                                                                          max_depth=None,
                                                                          max_features=None,
                                                                          max_leaf_nodes=None,
                                                                          min_impurity_decrease=0.0,
                                                                          min_impurity_split=None,
                                                                          min_samples_leaf=1,
                                                                          min_samp

In [67]:
ensemble.get_params().keys()

dict_keys(['estimators', 'n_jobs', 'weights', 'DecisionTree', 'RandomForest', 'AdaBoost', 'GradientBoost', 'DecisionTree__ccp_alpha', 'DecisionTree__criterion', 'DecisionTree__max_depth', 'DecisionTree__max_features', 'DecisionTree__max_leaf_nodes', 'DecisionTree__min_impurity_decrease', 'DecisionTree__min_impurity_split', 'DecisionTree__min_samples_leaf', 'DecisionTree__min_samples_split', 'DecisionTree__min_weight_fraction_leaf', 'DecisionTree__presort', 'DecisionTree__random_state', 'DecisionTree__splitter', 'RandomForest__bootstrap', 'RandomForest__ccp_alpha', 'RandomForest__criterion', 'RandomForest__max_depth', 'RandomForest__max_features', 'RandomForest__max_leaf_nodes', 'RandomForest__max_samples', 'RandomForest__min_impurity_decrease', 'RandomForest__min_impurity_split', 'RandomForest__min_samples_leaf', 'RandomForest__min_samples_split', 'RandomForest__min_weight_fraction_leaf', 'RandomForest__n_estimators', 'RandomForest__n_jobs', 'RandomForest__oob_score', 'RandomForest__ra

In [27]:
model.score(x_test,y_test)

0.8905115130311674

In [30]:
model.best_score_

0.8230225846556081

In [31]:
model.best_params_

{'AdaBoost__n_estimators': 100,
 'DecisionTree__max_depth': 6,
 'GradientBoost__max_depth': 4,
 'GradientBoost__n_estimators': 100,
 'RandomForest__max_depth': 6,
 'RandomForest__n_estimators': 100}

In [60]:
scaler = [MinMaxScaler(),StandardScaler(),RobustScaler(),MaxAbsScaler()]

In [62]:
for sc in scaler:
  x_data = np.array(dataset.data, dtype='f8')
  y_data = np.array(dataset.target, dtype='f8')
  x_data = sc.fit_transform(x_data)
  x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, random_state=2152)
  model.fit(x_train, y_train)
  print(f"Scaler : {sc} \nBestScore : {model.best_score_} \nBestParams : {model.best_params_}")
  print(f"ModelScore : {model.score(x_test,y_test)} \n ==========")

Scaler : MinMaxScaler(copy=True, feature_range=(0, 1)) 
BestScore : 0.8254755887006932 
BestParams : {'AdaBoost__n_estimators': 50, 'DecisionTree__max_depth': 6, 'GradientBoost__max_depth': 4, 'GradientBoost__n_estimators': 100, 'RandomForest__max_depth': 6, 'RandomForest__n_estimators': 50}
ModelScore : 0.8925245533916384 
Scaler : StandardScaler(copy=True, with_mean=True, with_std=True) 
BestScore : 0.8257818512234415 
BestParams : {'AdaBoost__n_estimators': 50, 'DecisionTree__max_depth': 6, 'GradientBoost__max_depth': 4, 'GradientBoost__n_estimators': 100, 'RandomForest__max_depth': 6, 'RandomForest__n_estimators': 100}
ModelScore : 0.8919608904181375 
Scaler : RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
             with_scaling=True) 
BestScore : 0.8247918165814984 
BestParams : {'AdaBoost__n_estimators': 50, 'DecisionTree__max_depth': 6, 'GradientBoost__max_depth': 4, 'GradientBoost__n_estimators': 100, 'RandomForest__max_depth': 6, 'RandomForest__n_

In [56]:
parameters2 = [{
    'DecisionTree__max_depth':[4,6,8], 
    'RandomForest__max_depth':[4,6,8],
    'RandomForest__n_estimators':[50,100,150],
    'AdaBoost__n_estimators':[50,100,150],
    'GradientBoost__max_depth':[4,6,8],
    'GradientBoost__n_estimators':[50,100,150]

}]

model2 = RandomizedSearchCV(
    estimator=ensemble,
    param_distributions = parameters2, 
    cv=kfold,
    n_iter=20,
    n_jobs=4
    )

In [57]:
scaler = [MinMaxScaler(),StandardScaler(),RobustScaler(),MaxAbsScaler()]

for sc in scaler:
  x_data = np.array(dataset.data, dtype='f8')
  y_data = np.array(dataset.target, dtype='f8')
  x_data = sc.fit_transform(x_data)
  x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, random_state=2152)
  model2.fit(x_train, y_train)
  print(f"Scaler : {sc} \nBestScore : {model2.best_score_} \nBestParams : {model2.best_params_}")
  print(f"ModelScore : {model2.score(x_test,y_test)} \n ==========")

Scaler : MinMaxScaler(copy=True, feature_range=(0, 1)) 
BestScore : 0.8267439841654742 
BestParams : {'RandomForest__n_estimators': 150, 'RandomForest__max_depth': 6, 'GradientBoost__n_estimators': 100, 'GradientBoost__max_depth': 4, 'DecisionTree__max_depth': 6, 'AdaBoost__n_estimators': 150}
ModelScore : 0.8933073091803947 
Scaler : StandardScaler(copy=True, with_mean=True, with_std=True) 
BestScore : 0.8248214085075027 
BestParams : {'RandomForest__n_estimators': 150, 'RandomForest__max_depth': 8, 'GradientBoost__n_estimators': 100, 'GradientBoost__max_depth': 4, 'DecisionTree__max_depth': 8, 'AdaBoost__n_estimators': 50}
ModelScore : 0.8832947372494151 
Scaler : RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
             with_scaling=True) 
BestScore : 0.8172226671216208 
BestParams : {'RandomForest__n_estimators': 50, 'RandomForest__max_depth': 8, 'GradientBoost__n_estimators': 50, 'GradientBoost__max_depth': 4, 'DecisionTree__max_depth': 6, 'AdaBoost__n