In [58]:
import os, sys
os.environ['PYTHONHASHSEED']=str(0)
import random
random.seed(0)
import numpy as np
np.random.seed(0)
import tensorflow as tf
tf.set_random_seed(0)

# ## uncomment for reproducibility ##
# from keras import backend as K
# session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
# sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
# K.set_session(sess)

import pandas as pd
import matplotlib.pyplot as plt
import pickle
import itertools
import time
import itertools
from IPython.display import display, clear_output

from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import SimpleRNN
from keras.initializers import glorot_uniform

from keras.optimizers import Adam
from keras.models import load_model
from keras.callbacks import ModelCheckpoint


from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix


sys.path.append("../rsc") # Adds pyLi directory to python modules path.
# import lithium as li
# import other as ot
from load_data import load_data
from pre_process_data import pre_process_data
from create_model import create_model
from helper import play_bell, LossHistory
from train_model import train_model

%config InlineBackend.figure_format = 'retina'
%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [59]:
VERSION = '08'

directory= f'../models/{VERSION}'
if not os.path.exists(directory):
    os.makedirs(directory)

## Hyperparameters setting

In [60]:
def create_grid(params={}):
    if os.path.isfile('../model_data/grid_{}.pkl'.format(VERSION)):
        grid_df = pd.read_pickle('../model_data/grid_{}.pkl'.format(VERSION))
        test_index = grid_df['test_index'].max() + 1
    else:
        test_index = 0

    current_grid = {
        'version'                : ['str'     , VERSION],
        'params'                 : ['O'       , params],
        'test_index'             : [np.int    , test_index],
        
        # Load and preprocess
        'batch_size_data'        : [np.int    , 200],
        'x_std'                  : [np.float  , 3.398],
        'y_std'                  : [np.float  , 2.926],
        'sigma_cut'              : [np.float  , 0.1],
        'training_size'          : [np.int    , 200],
        
        # Metrics
        'dropout'                 : [np.float  , 0.],
        'best_model_metric'      : ['str'     , 'val_acc'],
        'training_metric'        : ['str'     , 'accuracy'],
        
        # Random seeds
        'seed_skf'               : [np.int    , 0],
        'seed_numpy'             : [np.int    , 0],
        'seed_tensorflow'        : [np.int    , 0],
        'seed_random'            : [np.int    , 0],
        'seed_sklearn'           : [np.int    , 0],
        'seed_model'             : [np.int    , 0],
        
        # Model properties
        'input_shape'            : ['O'       , None],
        'LSTM_size'              : [np.int    , 200],
        'dropout'                : [np.float  , 0],
        
        # Training parameters
        'learning_rate'          : [np.float  , 1.3e-4],
        'batch_size'             : [np.int    , 32],
        'epochs'                 : [np.int    , 200],
        'test_size'              : [np.float  , 0.15],
        'skf_n_splits'           : [np.int    , 5],
        
        # Outputs
        'best_model_paths'       : ['O'       , []],
        'best_model_accuracies'  : ['O'       , []],
        'fit_outs'               : ['O'       , []]}

    for key, value in params.items():
        current_grid[key][1] = value
    return current_grid

def add_grid_and_save(grid):
    if os.path.isfile('../model_data/grid_{}.pkl'.format(grid['version'][1])):
        grid_df = pd.read_pickle('../model_data/grid_{}.pkl'.format(grid['version'][1]))
    else:
        grid_df = pd.DataFrame()
        
    grid_df = grid_df.append({key:grid[key][1] for key in grid.keys()}, ignore_index=True)        
    grid_df.to_pickle('../model_data/grid_{}.pkl'.format(grid['version'][1]))

### Run training

In [61]:
grid = create_grid()

X_train, X_test, y_train, y_test = load_data(grid)
X_train, y_train = pre_process_data(X_train, y_train, grid)
X_test, y_test = pre_process_data(X_test, y_test, grid)

In [62]:
X_train.shape

(421, 199, 2)

In [63]:
def test_params(training_points):
    grid = create_grid({'training_size':training_points})
    grid = run_training(X_train[:training_points, ...], y_train[:training_points], grid, verbose=True)
    return np.mean(grid['best_model_accuracies'][1])

In [64]:
def run_training(X, y, grid, verbose=True):
    initial_time = time.time()
    np.random.seed(grid['seed_numpy'][1])
    tf.set_random_seed(grid['seed_tensorflow'][1])
    random.seed(grid['seed_random'][1])

    grid['input_shape'][1] = (X.shape[1], 2)
    
    
    accuracies = []
    for index_split in range(grid['skf_n_splits'][1]): # looping on uncompleted CV trainings
        model = create_model(grid)
        print(f"\nCV validation {index_split+1} of {grid['skf_n_splits'][1]}")
        best_model_path = '../models/{}/best_model_{}_{}.pkl'.format(grid['version'][1], grid['test_index'][1], index_split)
        grid['best_model_paths'][1].append(best_model_path)
        grid = train_model(X, y, model, grid)
        grid['best_model_accuracies'][1].append(np.max(grid['fit_outs'][1][-1].history['val_acc']))
    
    add_grid_and_save(grid)

    total_time = time.strftime("%H hours, %M min, %S sec", time.gmtime((time.time() - initial_time)))
    print('  --  Model trained in {}'.format(total_time))
    return grid

In [65]:
%%time

local_path = '../model_data/grid_{}.pkl'.format(VERSION)
drive_path = 'G:/Il mio Drive/Colab Notebooks/deep_mouse/model_data/grid_{}.pkl'.format(VERSION)

if os.path.isfile(local_path):
    grid_df = pd.read_pickle(local_path)

for training_points in [100]:
    target = test_params(training_points)



CV validation 1 of 5
[####################] acc: 0.938 eta: 00 hours, 00 min, 00 sec
CV validation 2 of 5
[####################] acc: 0.938 eta: 00 hours, 00 min, 00 sec
CV validation 3 of 5
[####################] acc: 0.719 eta: 00 hours, 00 min, 00 sec
CV validation 4 of 5
[####################] acc: 0.969 eta: 00 hours, 00 min, 00 sec
CV validation 5 of 5
[####################] acc: 0.938 eta: 00 hours, 00 min, 00 sec  --  Model trained in 00 hours, 12 min, 00 sec
Wall time: 12min


In [78]:
colab_df = pd.read_pickle(drive_path)
local_df = pd.read_pickle(local_path)

concat_path = '../model_data/grid_mix_{}.pkl'.format(VERSION)
df = pd.concat([colab_df, local_df], axis=0, ignore_index=True)
df.to_pickle(concat_path)