# Hyperparameter with hyperas

In the talos notebook, we have done some hyperparameter optimization via talos.

I also wanted to investigate the use of other libraries and found hyperas.

## Import stuff

In [50]:
from __future__ import print_function

import numpy as np
import pandas as pd

import ipynb.fs.defs.common as common

from hyperopt import Trials, STATUS_OK, tpe
from hyperas import optim
from hyperas.distributions import choice, uniform

Hyperas needs two functions, a data function and a model function.

Since the data function can not use default arguments (it's parsed and stored in new python file),
make sure you change the second csv argument while running the optimization.

The default function arguments is only used while calling it from a notebook.

In [1]:
def data(fn='data/B1_processed.csv', drop_shots=False):
    import pandas as pd
    import ipynb.fs.defs.common as common

    try:
        df = pd.read_csv(fn)
    except NameError:
        df = pd.read_csv('data/B1_processed.csv')
    df = df.set_index('date')
    df.index = pd.to_datetime(df.index)
    
    if drop_shots:
        df = df.drop(columns=['away-opposition-shots','away-opposition-shots-on-target','away-shots','away-shots-on-target',
                              'home-opposition-shots','home-opposition-shots-on-target','home-shots','home-shots-on-target'])
    
    train = df[:'2017']
    val = df['2018':]
    
    x_train, y_train = common.get_feables(train, as_odds=True, normalize=True)
    x_val, y_val = common.get_feables(val, as_odds=True, normalize=True)

    return x_train, y_train, x_val, y_val

The model is defined like this. 

The parameter space is defined within the function. 
For now, we have used the same parameter space as in the talos experiment.

In [3]:
def bet_model(x_train, y_train, x_val, y_val):
    from IPython.display import clear_output
    from keras.models import Sequential
    from keras.layers.core import Dense, Dropout, Activation
    from keras.layers import BatchNormalization
    from keras_tqdm import TQDMNotebookCallback
    from keras import regularizers
    import ipynb.fs.defs.common as common

    # next we can build the model exactly like we would normally do it
    model = Sequential()
    model.add(Dense({{choice([10, 20, 30])}}, input_dim=x_train.shape[1],
                    kernel_regularizer=regularizers.l1(0.001)))

    normalize = {{choice([False, True])}}
    
    if normalize:
        model.add(BatchNormalization())

    nr_hidden_layers = {{choice([0, 1, 2])}}
    
    for i in range(nr_hidden_layers):
        model.add(Dense({{choice([10, 20, 30])}},
                        kernel_regularizer=regularizers.l1(0.001)))
        if normalize:
            model.add(BatchNormalization())

    model.add(Dense(3, 
                    kernel_regularizer=regularizers.l1(0.001),
                    activation = 'softmax'
                   ))
       
    model.compile(loss='categorical_crossentropy', optimizer={{choice(['rmsprop', 'adam', 'sgd'])}})

    model.fit(x_train, y_train.clip(0,1),
              batch_size={{choice([64, 256, 512])}},
              epochs={{choice([50, 100, 200])}},
              verbose=0,
              validation_data=(x_val, y_val.clip(0,1)),
              callbacks=[TQDMNotebookCallback(show_inner=False)]
             )
       
    clear_output()    
    acc = model.evaluate(x_val, y_val.clip(0,1), verbose=0)
    return {'loss': -acc, 'status': STATUS_OK, 'model': model}

## Optimize

In [4]:
trials = Trials()
best_run, best_model = optim.minimize(model=bet_model,
                                      data=data,
                                      algo=tpe.suggest,
                                      max_evals=5,
                                      trials=trials,
                                     notebook_name="hyperas")
x_train, y_train, x_val, y_val = data()
print("Evalutation of best performing model:")
print(best_model.evaluate(x_val, y_val))

100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:17<00:00,  4.16s/it, best loss: -1.2271560116818077]
Evalutation of best performing model:
-13.646820447598284


These are the indices of the best run.

In [5]:
best_run

{'Dense': 2,
 'Dense_1': 1,
 'batch_size': 2,
 'epochs': 0,
 'normalize': 1,
 'nr_hidden_layers': 2,
 'optimizer': 0}

### Evaluation

In [6]:
def evaluate(model, x, y):
    y_ = np.clip(y, 0, 1)
    odds = np.abs(y)
    r = (model.predict(x) > (1 / odds + 0.05))  * (odds * y_ - 1)
    total = r.sum().sum()
    cnt = (r != 0).sum().sum()
    tdf = pd.Series([total, cnt, total/cnt], index=r.sum().index)
    r_ = pd.concat([r.sum(), (r != 0).sum(), r.sum() / (r !=0).sum(), tdf],axis=1, ignore_index=True)
    r_ .columns = ['Profit','Bets','Margins', 'Total']
    return r_

In [7]:
evaluate(best_model, x_train, y_train)

Unnamed: 0,Profit,Bets,Margins,Total
odds-home,8.15,393,0.020738,30.88
odds-draw,18.39,284,0.064754,762.0
odds-away,4.34,85,0.051059,0.040525


In [8]:
evaluate(best_model, x_val, y_val)

Unnamed: 0,Profit,Bets,Margins,Total
odds-home,-6.0,32,-0.1875,-6.61
odds-draw,-0.61,26,-0.023462,58.0
odds-away,0.0,0,,-0.113966


## Saving models

Hyperas has no way to save models.

The following function implements a function which is capable of deploying models.
In this function, you will see that we check if a model is an instance of h5py.File.

In future experiments, we have notices that keras does not clear the graph in tensorflow.
If we call clear_session to clear the graph, the weights are missing.

I have solved this problem by storing the keras model to a h5 file in memory during hyperparameter optimization.

In [12]:
def deploy_model(best, fn):
    from keras import backend as K
    import h5py
    import json
    with h5py.File(fn, 'w') as f:
        models = best['result']['model']
        if isinstance(models, list):
            for i, m  in enumerate(models):
                grp = f.create_group('model%02d' % i)
                if isinstance(m, h5py.File):
                    K.clear_session()    
                    common.memory_2_model(m).save(grp)
                else:
                    m.save(grp)
        else:
            grp = f.create_group('model01')
            if isinstance(m, h5py.File):
                K.clear_session()    
                common.memory_2_model(models).save(grp)
            else:
                models.save(grp)
        c = best.copy()
        c['result'] = c['result'].copy()
        del c['result']['model']
        c['book_time'] = c['book_time'].strftime('%Y-%M-%d %H:%M:%S')
        c['refresh_time'] = c['refresh_time'].strftime('%Y-%M-%d %H:%M:%S')
        s = json.dumps(c)
        dt = h5py.special_dtype(vlen=bytes)
        dset = f.create_dataset("hyperas", (100,), dtype=dt)
        dset.attrs['run'] = s

In [14]:
deploy_model(best, 'bet_model_01.h5')

## Optimizing profits

There is no need in optimizing the prediction of which team wins,
instead we need to optimize the profit

In [13]:
from keras import backend as K
K.clear_session()    

In [29]:
def profit_model(x_train, y_train, x_val, y_val):
    from IPython.display import clear_output
    from keras.models import Sequential
    from keras.layers.core import Dense, Dropout, Activation
    from keras.layers import BatchNormalization
    from keras_tqdm import TQDMNotebookCallback
    from keras import regularizers
    import ipynb.fs.defs.common as common

    # next we can build the model exactly like we would normally do it
    model = Sequential()
    model.add(Dense({{choice([10, 20, 30])}}, input_dim=x_train.shape[1],
                    kernel_regularizer=regularizers.l1(0.001)))

    normalize = {{choice([False, True])}}
    
    if normalize:
        model.add(BatchNormalization())

    nr_hidden_layers = {{choice([0, 1, 2])}}
    
    for i in range(nr_hidden_layers):
        model.add(Dense({{choice([10, 20, 30])}},
                        kernel_regularizer=regularizers.l1(0.001)))
        if normalize:
            model.add(BatchNormalization())

    model.add(Dense(3, 
                    kernel_regularizer=regularizers.l1(0.001),
                    activation = 'softmax'
                   ))
       
    model.compile(loss='categorical_crossentropy', optimizer={{choice(['rmsprop', 'adam', 'sgd'])}})

    model.fit(x_train, y_train.clip(0,1),
              batch_size={{choice([64, 256, 512])}},
              epochs={{choice([50, 100, 200])}},
              verbose=0,
              validation_data=(x_val, y_val.clip(0,1)),
              callbacks=[TQDMNotebookCallback(show_inner=False)]
             )
    
    clear_output()
    
    y_pred = model.predict(x_val)
    odds = np.abs(y_val)
    profit = ( y_pred > (1 / odds + 0.05)) * (odds * y_val.clip(0,1) - 1)
    acc = profit.sum().sum()
    
    print (acc)

    return {'loss': -acc, 'status': STATUS_OK, 'model': model}

In [34]:
profit_trials = Trials()
profit_run, profitmodel = optim.minimize(model=profit_model,
                                      data=data,
                                      algo=tpe.suggest,
                                      max_evals=5,
                                      trials=profit_trials,
                                     notebook_name="hyperas")

3.59                                                                                                                                                                 
100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:23<00:00,  5.31s/it, best loss: -33.629999999999995]


In [35]:
x_train, y_train, x_val, y_val = data()

In [36]:
evaluate(profitmodel, x_val, y_val)

Unnamed: 0,Profit,Bets,Margins,Total
odds-home,8.84,14,0.631429,33.63
odds-draw,6.79,30,0.226333,45.0
odds-away,18.0,1,18.0,0.747333


For some platforms, we see that the profit is not the same as the profit, returned by our model.
So profitmodel here, is wrong.

Let's select it from the trials

In [37]:
def select_best_trial(trials):
    import numpy as np
    best_loss = np.inf
    best_trial = None
    for i, r in enumerate(trials):
        if r['result']['loss'] < best_loss:
            best_loss = r['result']['loss']
            best_trial = r
    return best_trial, best_loss

In [40]:
best, metric = select_best_trial(profit_trials)

In [42]:
evaluate(best['result']['model'], x_val, y_val)

Unnamed: 0,Profit,Bets,Margins,Total
odds-home,8.84,14,0.631429,33.63
odds-draw,6.79,30,0.226333,45.0
odds-away,18.0,1,18.0,0.747333


In [17]:
deploy_model(best, 'profit_model_01.h5')

# Optimizing cross profit

But it is not fair to optimize something in the future.

In [55]:
def cross_profit_model(x_train, y_train, x_val, y_val):
    from keras import backend as K

    from IPython.display import clear_output
    from keras.models import Sequential
    from keras.layers.core import Dense, Dropout, Activation
    from keras.layers import BatchNormalization
    from keras_tqdm import TQDMNotebookCallback
    from keras import regularizers
    import ipynb.fs.defs.common as common
    
    from sklearn.model_selection import KFold
    kfold = KFold(n_splits=3, random_state=42)

    total_acc = 0
    models = []
    accs = []
    
    for i, (trainidx, testidx) in enumerate(list(kfold.split(x_train))):
        K.clear_session()
        
        xx_train = x_train.iloc[trainidx]
        x_test = x_train.iloc[testidx]
        
        yy_train = y_train.iloc[trainidx]
        y_test = y_train.iloc[testidx]

        # next we can build the model exactly like we would normally do it
        model = Sequential()
        model.add(Dense({{choice([10, 20, 30])}}, input_dim=x_train.shape[1],
                        kernel_regularizer=regularizers.l1(0.001)))

        normalize = {{choice([False, True])}}

        if normalize:
            model.add(BatchNormalization())

        nr_hidden_layers = {{choice([0, 1, 2])}}

        for i in range(nr_hidden_layers):
            model.add(Dense({{choice([10, 20, 30])}},
                            kernel_regularizer=regularizers.l1(0.001)))
            if normalize:
                model.add(BatchNormalization())

        model.add(Dense(3, 
                        kernel_regularizer=regularizers.l1(0.001),
                        activation = 'softmax'
                       ))

        model.compile(loss='categorical_crossentropy', optimizer={{choice(['rmsprop', 'adam', 'sgd'])}})

        model.fit(xx_train, yy_train.clip(0,1),
                  batch_size={{choice([64, 256, 512])}},
                  epochs={{choice([50, 100, 200])}},
                  verbose=0,
                  validation_data=(x_test, y_test.clip(0,1)),
                  callbacks=[TQDMNotebookCallback(show_inner=False)]
                 )
        
        clear_output()    

        models.append(common.model_2_memory(model))

        y_pred = model.predict(x_test)
        odds = np.abs(y_test)
        profit = ( y_pred > (1 / odds + 0.05)) * (odds * y_test.clip(0,1) - 1)
        acc = profit.sum().sum()
        accs.append(acc)
        
        print ('acc: %5.2f' % acc, accs)
        
        total_acc += acc

    return {'loss': -total_acc, 'status': STATUS_OK, 'model': models}

In [56]:
cross_trials = Trials()
best_run3, best_model3 = optim.minimize(model=cross_profit_model,
                                      data=data,
                                      algo=tpe.suggest,
                                      max_evals=50,
                                      trials=cross_trials,
                                     notebook_name="hyperas")

acc: 11.03                                                                                                                                                           
[-13.17, -12.32, 11.030000000000001]                                                                                                                                 
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [03:56<00:00,  5.49s/it, best loss: 0.5899999999999963]


In [57]:
x_train, y_train, x_val, y_val = data()

In [58]:
best, loss = select_best_trial(cross_trials)

In [59]:
evaluate(common.memory_2_model(best['result']['model'][2]), x_val, y_val)

Unnamed: 0,Profit,Bets,Margins,Total
odds-home,-3.28,41,-0.08,-11.47
odds-draw,0.61,21,0.029048,79.0
odds-away,-8.8,17,-0.517647,-0.14519


In [60]:
deploy_model(best, 'cross_profit_model_B1_15.h5')

## Restore the models

In [61]:
def restore_model(fn):
    import h5py
    import json
    import keras.models
    
    with h5py.File(fn, 'r') as f:
        result = json.loads(f['hyperas'].attrs['run'])
        models = []
        for i in range(100):
            name = 'model%02d' % i
            if name in f.keys():
                model = keras.models.load_model(f[name])
                models.append(model)
                pass
            else:
                break
        result['result']['model'] = models

    return result

In [166]:
result = restore_model('cross_profit_model_B1_15.h5.h5')