# Import

In [1]:
# Basic
import pandas as pd
import numpy as np

import warnings

warnings.simplefilter('ignore')

# ML Toolkit
from robusta.optimizer import GridSearchCV, RandomSearchCV, OptunaCV
from robusta.crossval import crossval_predict
from robusta.pipeline import *
from robusta.stack import *

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.datasets import make_regression
from dask_ml.preprocessing import RobustScaler

# Model
from robusta.testing import get_estimator, get_estimator_name
from robusta.testing import ESTIMATORS, PARAM_SPACE

# Plot
from jupyterthemes import jtplot
import matplotlib

jtplot.style('gruvboxd')
matplotlib.use('nbagg')

Using TensorFlow backend.


# Data

In [36]:
X, y = make_regression(n_samples=200, n_features=10, n_informative=2, 
                       random_state=666, noise=0.2)

X = pd.DataFrame(X)
y = pd.Series(y)

In [37]:
X

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.135661,-0.430775,0.585261,-2.744542,-2.091578,-0.527383,-0.104947,-0.539386,-1.133136,0.290818
1,-1.162613,0.487434,1.891200,1.681331,1.465606,-0.827095,0.800757,-0.345333,-0.089977,0.300130
2,1.479978,-1.492034,0.783114,0.876042,0.860270,-0.510942,1.000717,0.904345,-0.215002,1.320471
3,0.499060,-0.794232,0.282603,1.123826,-0.086081,-1.915057,-1.025033,1.696374,-0.890490,-0.510916
4,0.081877,0.196557,0.778831,-0.079289,-1.728170,0.826598,-1.777999,-0.420409,0.939043,-0.131891
...,...,...,...,...,...,...,...,...,...,...
195,0.943557,-0.082599,-0.867088,0.295086,-0.404854,1.203706,1.032915,1.760090,-0.981685,-1.473304
196,-1.615359,-0.748560,0.209846,1.371128,0.688954,-0.666664,1.705340,0.176047,1.115852,-0.868293
197,0.861696,-2.155197,-0.749322,1.224041,-0.355526,-0.155421,-0.849080,0.519710,0.035508,1.253223
198,1.037315,-0.395199,-2.165005,0.370486,-0.039071,0.047767,0.075644,0.247760,2.023676,-0.760564


# Task

In [38]:
scoring = 'neg_mean_squared_error'
cv = 5

# Optimizer

## Optuna

In [46]:
model = make_pipeline(
    StackingRegressor([
        ('knn', get_estimator('KNeighbors', 'regressor')),
        ('lin', get_estimator('LinearRegression', 'regressor')),
    ], 
        meta_estimator=get_estimator('Ridge', 'regressor'),
        n_jobs=None)
)

model

Pipeline(memory=None,
         steps=[('stackingregressor',
                 StackingRegressor(cv=5,
                                   estimators=[('knn',
                                                KNeighborsRegressor(algorithm='auto',
                                                                    leaf_size=30,
                                                                    metric='minkowski',
                                                                    metric_params=None,
                                                                    n_jobs=None,
                                                                    n_neighbors=5,
                                                                    p=2,
                                                                    weights='uniform')),
                                               ('lin',
                                                LinearRegression(copy_X=True,
                                         

In [48]:
optimizer = OptunaCV(model, cv, scoring, param_space=None, 
                     max_iter=10, verbose=2, n_jobs=-1)
optimizer.fit(X, y)

params = optimizer.best_params_

FOUND MODELS:


{'stackingregressor__meta_estimator': Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
       normalize=False, random_state=None, solver='auto', tol=0.001),
 'stackingregressor__knn': KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform'),
 'stackingregressor__lin': LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)}


FOUND PARAMETERS:


{'stackingregressor__meta_estimator__alpha': (1e-06, 1000000.0, 'log'),
 'stackingregressor__meta_estimator__fit_intercept': {False, True},
 'stackingregressor__meta_estimator__normalize': {False, True},
 'stackingregressor__knn__n_neighbors': (1, 30, 1),
 'stackingregressor__knn__weights': {'distance', 'uniform'},
 'stackingregressor__knn__algorithm': {'ball_tree', 'kd_tree'},
 'stackingregressor__knn__leaf_size': (10, 100, 10),
 'stackingregressor__knn__p': {1, 2},
 'stackingregressor__lin__fit_intercept': {False, True}}


[02:06:49] ITER: 1/10      SCORE: -0.0694 ± 0.0228      ETA: 37 sec
stackingregressor__meta_estimator__alpha            3.8526720932705167
stackingregressor__meta_estimator__fit_intercept                  True
stackingregressor__meta_estimator__normalize                     False
stackingregressor__knn__n_neighbors                                  4
stackingregressor__knn__weights                               distance
stackingregressor__knn__algorithm                              kd_tree
stackingregressor__knn__leaf_size                                   10
stackingregressor__knn__p                                            2
stackingregressor__lin__fit_intercept                             True
dtype: object

[02:06:53] ITER: 2/10      SCORE: -0.0455 ± 0.0095      ETA: 33 sec
stackingregressor__meta_estimator__alpha            0.040980805223454105
stackingregressor__meta_estimator__fit_intercept                    True
stackingregressor__meta_estimator__normalize                   

KeyboardInterrupt: 