In [1]:
import os, sys
os.environ['PYTHONHASHSEED']=str(0)
import random
random.seed(0)
import numpy as np
np.random.seed(0)
import tensorflow as tf
tf.set_random_seed(0)

# ## uncomment for reproducibility ##
# from keras import backend as K
# session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
# sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
# K.set_session(sess)

import pandas as pd
import matplotlib.pyplot as plt
import pickle
import itertools
import time
import itertools
from IPython.display import display, clear_output

from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import SimpleRNN
from keras.initializers import glorot_uniform

from keras.optimizers import Adam
from keras.models import load_model
from keras.callbacks import ModelCheckpoint
from keras.callbacks import Callback

from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix


%config InlineBackend.figure_format = 'retina'
%matplotlib inline
%load_ext autoreload
%autoreload 2

Using TensorFlow backend.


In [2]:
VERSION = '07'

try:
    import google.colab
    from google.colab import drive

    drive.mount('/content/gdrive')
    root_path = 'gdrive/My Drive/Colab Notebooks/deep_mouse/'
    print('Working on google colab')
except:
    root_path = '../'
    print('Working locally')

directory= f'{root_path}models/{VERSION}'
if not os.path.exists(directory):
    os.makedirs(directory)    
    
sys.path.append(f'{root_path}/rsc/{VERSION}') # Adds pyLi directory to python modules path.
from load_data import load_data
from pre_process_data import pre_process_data
from create_model import create_model
from helper import play_bell, LossHistory, add_grid_and_save
from train_model import train_model
from run_training import run_training
from parameter_space import Parameter_space


Working locally


## Hyperparameters setting

In [3]:
def create_grid(params={}):
    
    grid_file_path = '{}/model_data/grid_{}.pkl'.format(root_path, VERSION)
    
    if os.path.isfile(grid_file_path):
        grid_df = pd.read_pickle(grid_file_path)
        test_index = grid_df['test_index'].max() + 1
    else:
        test_index = 0

    current_grid = {
        'version'                : ['str'     , VERSION],
        'params'                 : ['O'       , params],
        'test_index'             : [np.int    , test_index],
        'root_path'              : ['str'     , root_path],
        
        # Load and preprocess
        'batch_size_data'        : [np.int    , 200],
        'x_std'                  : [np.float  , 3.398],
        'y_std'                  : [np.float  , 2.926],
        'sigma_cut'              : [np.float  , 0.1],
        'training_size'          : [np.int    , 200],
        
        # Metrics
        'dropout'                 : [np.float  , 0.],
        'best_model_metric'      : ['str'     , 'val_acc'],
        'training_metric'        : ['str'     , 'accuracy'],
        
        # Random seeds
        'seed_skf'               : [np.int    , 0],
        'seed_numpy'             : [np.int    , 0],
        'seed_tensorflow'        : [np.int    , 0],
        'seed_random'            : [np.int    , 0],
        'seed_sklearn'           : [np.int    , 0],
        'seed_model'             : [np.int    , 0],
        
        # Model properties
        'input_shape'            : ['O'       , None],
        'LSTM_size'              : [np.int    , 200],
        'dropout'                : [np.float  , 0],
        
        # Training parameters
        'learning_rate'          : [np.float  , 1.3e-4],
        'batch_size'             : [np.int    , 32],
        'epochs'                 : [np.int    , 200],
        'test_size'              : [np.float  , 0.15],
        'skf_n_splits'           : [np.int    , 5],
        
        # Outputs
        'best_model_paths'       : ['O'       , []],
        'best_model_accuracies'  : ['O'       , []],
        'fit_outs'               : ['O'       , []]}

    for key, value in params.items():
        current_grid[key][1] = value
    return current_grid

### Run training

In [4]:
grid = create_grid()

X_train, X_test, y_train, y_test = load_data(grid)
X_train, y_train = pre_process_data(X_train, y_train, grid)
X_test, y_test = pre_process_data(X_test, y_test, grid)

In [5]:
import time
def test_params(next_point_to_probe):
    with open (f'{root_path}log.txt', 'a') as f:
        f.write(f'{time.ctime()}  -  starting {next_point_to_probe}\n')
    print(f'Testing {next_point_to_probe}')
    grid = create_grid(next_point_to_probe)
    grid = run_training(X_train, y_train, grid, verbose=True)
    
    val_acc_history = [fit_out.history['val_acc'] for fit_out in grid['fit_outs'][1]]
    val_accuracy_estimator = np.mean(val_acc_history, 0)[130:180].mean()
    with open (f'{root_path}log.txt', 'a') as f:
        f.write(f'{time.ctime()}  -  finished {next_point_to_probe}\n')
    
    return val_accuracy_estimator

In [6]:
# %%time


ps = Parameter_space(bounds={
    'LSTM_size' : (100, 300, 'int'),
    'dropout' : (0, 0.3, 'linear')
})

ps.bayesopt(test_params, kappa=0.25)


# ## BAYESIAN SEARCH
grid_file_path = '{}/model_data/grid_{}.pkl'.format(root_path, VERSION)
if os.path.isfile(grid_file_path):
    grid_df = pd.read_pickle(grid_file_path)


    for fo, param in zip(grid_df.fit_outs, grid_df.params):
        somma = [x.history['val_acc'] for x in fo]    
        s = np.mean(somma, 0)[130:180].mean()
        ps.register_point(param, s)

for _ in range(20):
    next_point_to_probe = ps.get_random_next(time_seed=True)
    target = test_params(next_point_to_probe)
    ps.register_point(point=next_point_to_probe, target=target)

Testing {'LSTM_size': 267, 'dropout': 0.15183611922415335}
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.

CV validation 1 of 5
Instructions for updating:
Use tf.cast instead.
[####################] acc: 0.688 eta: 00 hours, 00 min, 00 sec
CV validation 2 of 5
[####################] acc: 0.906 eta: 00 hours, 00 min, 00 sec
CV validation 3 of 5
[####################] acc: 0.844 eta: 00 hours, 00 min, 00 sec
CV validation 4 of 5
[####################] acc: 0.750 eta: 00 hours, 00 min, 00 sec
CV validation 5 of 5
[####################] acc: 0.656 eta: 00 hours, 00 min, 00 sec  --  Model trained in 00 hours, 50 min, 31 sec
Testing {'LSTM_size': 191, 'dropout': 0.23387354485692247}

CV validation 1 of 5
[#########-----------] acc: 0.656 eta: 00 hours, 04 min, 24 sec

KeyboardInterrupt: 