In [16]:

# Choosing the right parameters for your machine learning algorithm is a hard and crucial task, since it can make a big difference on the performance of a model. These parameters can be tuned in a manual or automatic way.

# The manual way implies training and testing models, manually changing the parameters at each step. This could end up being a time consuming task and maybe you’ll never find the optimal parameters. On the other hand, we could use algorithms that start with a potential set of hyperparameters, and try to optimize them automatically.

import h2o
from h2o.estimators.gbm import H2OGradientBoostingEstimator
from bayes_opt import BayesianOptimization
h2o.init()
h2o.remove_all()

data = h2o.upload_file('C:\\winequality_red.csv')
train_cols = [x for x in data.col_names if x not in ['quality']]
target = "quality"
train, test = data.split_frame(ratios=[0.7])


def train_model(max_depth, 
                ntrees,
                min_rows, 
                learn_rate, 
                sample_rate, 
                col_sample_rate):
    params = {
        'max_depth': int(max_depth),
        'ntrees': int(ntrees),
        'min_rows': int(min_rows),
        'learn_rate':learn_rate,
        'sample_rate':sample_rate,
        'col_sample_rate':col_sample_rate
    }
    model = H2OGradientBoostingEstimator(nfolds=5,**params)
    model.train(x=train_cols, y=target, training_frame=train)
    return -model.rmse()


Checking whether there is an H2O instance running at http://localhost:54321 ..... not found.
Attempting to start a local H2O server...
; Java HotSpot(TM) 64-Bit Server VM (build 13.0.2+8, mixed mode, sharing)
  Starting server from C:\Users\ryans\anaconda3\lib\site-packages\h2o\backend\bin\h2o.jar
  Ice root: C:\Users\ryans\AppData\Local\Temp\tmplyu415se
  JVM stdout: C:\Users\ryans\AppData\Local\Temp\tmplyu415se\h2o_ryans_started_from_python.out
  JVM stderr: C:\Users\ryans\AppData\Local\Temp\tmplyu415se\h2o_ryans_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321 ... successful.


0,1
H2O_cluster_uptime:,02 secs
H2O_cluster_timezone:,America/New_York
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.30.0.1
H2O_cluster_version_age:,3 months and 9 days
H2O_cluster_name:,H2O_from_python_ryans_w53o2h
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,1.939 Gb
H2O_cluster_total_cores:,8
H2O_cluster_allowed_cores:,8


Parse progress: |█████████████████████████████████████████████████████████| 100%


In [17]:

bounds = {
    'max_depth':(5,10),
    'ntrees': (100,500),
    'min_rows':(10,30),
    'learn_rate':(0.001, 0.01),
    'sample_rate':(0.5,0.8),
    'col_sample_rate':(0.5,0.8)
}


In [18]:

optimizer = BayesianOptimization(
    f=train_model,
    pbounds=bounds,
    random_state=1,
)
optimizer.maximize(init_points=10, n_iter=50)


optimizer.max


|   iter    |  target   | col_sa... | learn_... | max_depth | min_rows  |  ntrees   | sample... |
-------------------------------------------------------------------------------------------------
gbm Model Build progress: |███████████████████████████████████████████████| 100%
|  1        | -0.5939   |  0.6251   |  0.007483 |  5.001    |  16.05    |  158.7    |  0.5277   |
gbm Model Build progress: |███████████████████████████████████████████████| 100%
|  2        | -0.5875   |  0.5559   |  0.00411  |  6.984    |  20.78    |  267.7    |  0.7056   |
gbm Model Build progress: |███████████████████████████████████████████████| 100%
|  3        | -0.5404   |  0.5613   |  0.008903 |  5.137    |  23.41    |  266.9    |  0.6676   |
gbm Model Build progress: |███████████████████████████████████████████████| 100%
|  4        | -0.6567   |  0.5421   |  0.002783 |  9.004    |  29.37    |  225.4    |  0.7077   |
gbm Model Build progress: |███████████████████████████████████████████████| 100%
|  5   

|  45       | -0.4804   |  0.8      |  0.01     |  10.0     |  10.0     |  186.7    |  0.5      |
gbm Model Build progress: |███████████████████████████████████████████████| 100%
|  46       | -0.5498   |  0.7717   |  0.009277 |  5.666    |  29.92    |  281.4    |  0.5729   |
gbm Model Build progress: |███████████████████████████████████████████████| 100%
|  47       | -0.5556   |  0.5      |  0.01     |  5.0      |  30.0     |  303.5    |  0.5      |
gbm Model Build progress: |███████████████████████████████████████████████| 100%
|  48       | -0.5954   |  0.8      |  0.01     |  5.0      |  30.0     |  148.5    |  0.5      |
gbm Model Build progress: |███████████████████████████████████████████████| 100%
|  49       | -0.3376   |  0.5      |  0.01     |  10.0     |  10.0     |  364.4    |  0.8      |
gbm Model Build progress: |███████████████████████████████████████████████| 100%
|  50       | -0.5087   |  0.5      |  0.01     |  5.0      |  30.0     |  487.6    |  0.8      |
gbm Mod

{'target': -0.30338474254205666,
 'params': {'col_sample_rate': 0.5,
  'learn_rate': 0.01,
  'max_depth': 10.0,
  'min_rows': 10.0,
  'ntrees': 500.0,
  'sample_rate': 0.8}}

In [19]:

# Data:
# https://www.kaggle.com/uciml/red-wine-quality-cortez-et-al-2009
