In [1]:
import warnings

warnings.filterwarnings('ignore')

### Import boston dataset

In [2]:
from sklearn.datasets import load_boston as load_dataset

dataset = load_dataset()

#### Import train_test_split and generate X_train, X_test, y_train, y_test

In [3]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    dataset.data, dataset.target, test_size=0.3, random_state=0)

### Import the metric and the estimators

In [5]:
from sklearn.metrics import r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor

# NOTE: The default hyperparameters are intentionally bad
# to give more room for improvement during the BTBSession demo
default_hyperparams = {
    'n_estimators': 2,
    'max_features': 'log2',
    'min_samples_split': 2,
    'min_samples_leaf': 2,
}

rf = RandomForestRegressor(random_state=0, **default_hyperparams)

rf.fit(X_train, y_train)
pred = rf.predict(X_test)

r2_score(y_test, pred)

0.7149946643194653

In [7]:
et = ExtraTreesRegressor(random_state=0, **default_hyperparams)

et.fit(X_train, y_train)
pred = et.predict(X_test)

r2_score(y_test, pred)

0.6108880572971567

In [8]:
tunables = {
    'random_forest': {
        'n_estimators': {'type': 'int', 'default': 2, 'range': [1, 1000]},
        'max_features': {'type': 'str', 'default': 'log2', 'range': [None, 'auto', 'log2', 'sqrt']},
        'min_samples_split': {'type': 'int', 'default': 2, 'range': [2, 20]},
        'min_samples_leaf': {'type': 'int', 'default': 2, 'range': [1, 20]},
    },
    'extra_trees': {
        'n_estimators': {'type': 'int', 'default': 2, 'range': [1, 1000]},
        'max_features': {'type': 'str', 'default': 'log2', 'range': [None, 'auto', 'log2', 'sqrt']},
        'min_samples_split': {'type': 'int', 'default': 2, 'range': [2, 20]},
        'min_samples_leaf': {'type': 'int', 'default': 2, 'range': [1, 20]},
    }
}

In [9]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer

models = {
    'random_forest': RandomForestRegressor,
    'extra_trees': ExtraTreesRegressor,
}

def build_model(name, hyperparameters):
    model_class = models[name]
    return model_class(random_state=0, **hyperparameters)

def score_model(name, hyperparameters):
    model = build_model(name, hyperparameters)
    r2_scorer = make_scorer(r2_score)
    scores = cross_val_score(model, X_train, y_train, scoring=r2_scorer)
    return scores.mean()

In [10]:
from btb.session import BTBSession

session = BTBSession(tunables, score_model, verbose=True)

In [11]:
session.run(iterations=2)  # Run two iterations, this will execute with default hyperparameters

HBox(children=(IntProgress(value=0, max=2), HTML(value='')))




{'id': 'd02e055a1279d42b74169151047c542c',
 'name': 'extra_trees',
 'config': {'n_estimators': 2,
  'max_features': 'log2',
  'min_samples_split': 2,
  'min_samples_leaf': 2},
 'score': 0.7294475145162741}

In [12]:
session.proposals

{'40e9094fa4b1901410b62c95ecbe9a21': {'id': '40e9094fa4b1901410b62c95ecbe9a21',
  'name': 'random_forest',
  'config': {'n_estimators': 2,
   'max_features': 'log2',
   'min_samples_split': 2,
   'min_samples_leaf': 2},
  'score': 0.7096417128432014},
 'd02e055a1279d42b74169151047c542c': {'id': 'd02e055a1279d42b74169151047c542c',
  'name': 'extra_trees',
  'config': {'n_estimators': 2,
   'max_features': 'log2',
   'min_samples_split': 2,
   'min_samples_leaf': 2},
  'score': 0.7294475145162741}}

In [13]:
best_proposal = session.run(iterations=100)

HBox(children=(IntProgress(value=0), HTML(value='')))




In [14]:
best_proposal

{'id': '9bf55747610213f8f5818736ee9ab4d3',
 'name': 'extra_trees',
 'config': {'n_estimators': 963,
  'max_features': 'log2',
  'min_samples_split': 2,
  'min_samples_leaf': 1},
 'score': 0.8681428320368964}

In [15]:
best_model = build_model(best_proposal['name'], best_proposal['config'])

In [16]:
best_model.fit(X_train, y_train)
pred = best_model.predict(X_test)

r2_score(y_test, pred)

0.8002871087417817