In [None]:
import h5py
import numpy as np
import pandas as pd
import json
import yaml
from ludwig import LudwigModel
import copy
import ray
from ludwig.utils.misc import merge_dict

In [2]:
with open("titanic_full.yaml", 'r') as stream:
    base_model = yaml.load(stream)



  


In [3]:
#base should contain special markup so we know what parameters need to be updated.

def build_trial(base, config):
    combiner = base['combiner']
    training = base['training']
    
    
    combiner = merge_dict(combiner, {'num_fc_layers': config['num_fc_layers']})
    training = merge_dict(training, {'batch_size': config['batch_size']})
    
    new_model_def = {'input_features': base['input_features'], 
                 'output_features': base['output_features'], 
                 'combiner': combiner, 
                 'training': training}
    
    return new_model_def

In [6]:
data = '/Users/bm255022/Projects/Teradata/ludwig/examples/hyperparameters/titanic.hdf5'
metadata = '/Users/bm255022/Projects/Teradata/ludwig/examples/hyperparameters/titanic.json'

def train(base, config, reporter):
    
    new_model_def = build_trial(base, config)
    model = LudwigModel(new_model_def)
    train_stats = model.train(data_hdf5=data, train_set_metadata_json=metadata)
    return reporter(mean_accuracy=train_stats['validation']['Survived']['accuracy'][-1], done=True)



    


In [None]:
from ray.tune import register_trainable, grid_search, run_experiments

ray.shutdown()
ray.init()
grid_search_space = {
    'num_fc_layers': grid_search([1,2,3,4]),
    'batch_size': grid_search([4,16,32,64,128])
}

register_trainable('train', lambda cfg, rptr: train(base_model, cfg, rptr))
run_experiments({'my_experiment': {
    'run': 'train',
    'stop': {'mean_accuracy': 0.9},
    'config': grid_search_space}
    })
                

a more eleborate approach to specifying parameters to search over..

In [33]:
#version 1, doesn't handle lists.  extracts parameters that we want to search over.  


import re
pattern = "^{{.*}}"

def get_keys(dct, path=""):
    parameters = []
    for k, v in dct.items():
        if isinstance(dct[k], dict):
            p = get_keys(dct[k], path+k+"->" )
            if p:
                for l in p:
                    parameters.append(l)
            
        elif isinstance(dct[k], str):
            if re.match(pattern, dct[k], flags=0) is not None:
                parameters.append([path + k,dct[k]])
       
    return parameters

#updates parameters...doens't handle lists

def update_param(dct, path, value):
    if len(path) == 1:
        dct[path[0]] = value
    else:
        update_param(dct[path[0]], path[1:], value)
        
def build_model(base_model, config):
    for k, v in config.items():
        p = k.split('->')
        update_param(base_model, p, v)



In [29]:
with open("titanic_full_h.yaml", 'r') as stream:
    base_model_h = yaml.load(stream)
base_model_h
        

  


{'combiner': {'fc_size': '{{[24,48,64]}}',
  'num_fc_layers': 1,
  'type': 'concat'},
 'input_features': [{'name': 'Pclass', 'type': 'category'},
  {'name': 'Sex', 'type': 'category'},
  {'missing_value_strategy': 'fill_with_mean',
   'name': 'Age',
   'type': 'numerical'},
  {'name': 'SibSp', 'type': 'numerical'},
  {'name': 'Parch', 'type': 'numerical'},
  {'missing_value_strategy': 'fill_with_mean',
   'name': 'Fare',
   'type': 'numerical'},
  {'name': 'Embarked', 'representation': 'sparse', 'type': 'category'}],
 'output_features': [{'name': 'Survived', 'type': 'binary'}],
 'training': {'batch_size': '{{[8,16,32,64]}}',
  'bucketing_field': None,
  'decay': False,
  'decay_rate': 0.96,
  'decay_steps': 10000,
  'dropout_rate': 0.0,
  'early_stop': 5,
  'epochs': 100,
  'gradient_clipping': None,
  'increase_batch_size_on_plateau': 0,
  'increase_batch_size_on_plateau_max': 512,
  'increase_batch_size_on_plateau_patience': 5,
  'increase_batch_size_on_plateau_rate': 2,
  'learning_

In [34]:
keys = get_keys(base_model_h)
keys

[['combiner->fc_size', '{{[24,48,64]}}'],
 ['training->batch_size', '{{[8,16,32,64]}}'],
 ['training->optimizer->type', '{{[adam,sgd]}}']]

In [27]:
config = {'combiner->fc_size': 48,
          'training->batch_size': 64,
          'training->optimizer->type': 'adam'
         }

build_model(base_model_h, config)
base_model_h

{'combiner': {'fc_size': 48, 'num_fc_layers': 1, 'type': 'concat'},
 'input_features': [{'name': 'Pclass', 'type': 'category'},
  {'name': 'Sex', 'type': 'category'},
  {'missing_value_strategy': 'fill_with_mean',
   'name': 'Age',
   'type': 'numerical'},
  {'name': 'SibSp', 'type': 'numerical'},
  {'name': 'Parch', 'type': 'numerical'},
  {'missing_value_strategy': 'fill_with_mean',
   'name': 'Fare',
   'type': 'numerical'},
  {'name': 'Embarked', 'representation': 'sparse', 'type': 'category'}],
 'output_features': [{'name': 'Survived', 'type': 'binary'}],
 'training': {'batch_size': 64,
  'bucketing_field': None,
  'decay': False,
  'decay_rate': 0.96,
  'decay_steps': 10000,
  'dropout_rate': 0.0,
  'early_stop': 5,
  'epochs': 100,
  'gradient_clipping': None,
  'increase_batch_size_on_plateau': 0,
  'increase_batch_size_on_plateau_max': 512,
  'increase_batch_size_on_plateau_patience': 5,
  'increase_batch_size_on_plateau_rate': 2,
  'learning_rate': 0.001,
  'learning_rate_war