In [1]:
import os
import h5py
import numpy as np
from functions.preprocess import input_shaping, split_index
from functions.metrics import compute_rmse, compute_pearson
from functions.channel_mapping import channel_mapping
from tensorflow.keras import initializers
from tensorflow.keras.models import Sequential, Model
from keras.layers.recurrent import LSTM
from tensorflow.keras.layers import Dense, Activation, Lambda , Input , Flatten ,Conv2D, MaxPooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from keras.utils import np_utils
from tensorflow import random
import time as timer
from keras.models import load_model

from keras.preprocessing import sequence
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Embedding, SpatialDropout1D
from keras.layers import LSTM, SimpleRNN, GRU
from keras.regularizers import l2
from keras.constraints import maxnorm
from keras.datasets import imdb

from functions.QRNN import QRNN

# from torch.autograd import Variable
# import torch
# from torchinfo import summary
import tensorflow as tf
# tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
# tf.compat.v1.enable_eager_execution()

In [2]:
print ("Starting simulation")
run_start = timer.time()
seed = 2020
feature_list = ['sua_rate', 'mua_rate']
feature = feature_list[1] # select which spike feature: SUA=0, MUA=1

# specify filename to be processed (choose from the list available at https://zenodo.org/record/583331)
file_name = 'indy_20160627_01'          # file name
# file_name = 'indy_20170131_02'          # file name
kinematic_folder = 'kinematic_data/'    # kinematic data folder
feature_folder = 'spike_data/features/' # spike features folder
result_folder = 'results/'              # results folder

wdw_time = 0.05 # window size in second
lag = -32 # lag between kinematic and feature data (minus indicate feature lagging behaind kinematic)
delta_time = 0.004 # sampling interval in second
wdw_samp = int(round(wdw_time/delta_time))
ol_samp = wdw_samp-1

# open spike features from hdf5 file
feature_file = feature_folder+file_name+'_spike_features_'+str(int(wdw_time*1e3))+'ms.h5'
print ("Loading input features from file: "+feature_file)
channel_mapping_file = r'F:/dropbox/Dropbox (Imperial NGNI)/NGNI Share/Workspace/Zheng/Research_Topics/signal processing plantform/prediction/decoding/raw_data/indy_20170127_03.nwb'

with h5py.File(feature_file,'r') as f:
    input_feature = f[feature][()]
    # with h5py.File(channel_mapping_file, "r") as f:
        # channel_loc =f['/general/extracellular_ephys/electrode_map'][()]
        # input_feature = channel_mapping(input_feature,channel_loc)
    input_feature = input_feature.reshape(input_feature.shape[0],-1)
# open kinematic data from hdf5 file
kinematic_file = kinematic_folder+file_name+'_kinematic_data.h5'
print ("Loading kinematic data from file: "+kinematic_file)
with h5py.File(kinematic_file,'r') as f:
    cursor_vel = f['cursor_vel'][()] # in mm/s
#%%
# set LSTM hyperparameters
units = 150 # SUA: 200, MUA: 150
epochs = 6
batch_size = 64
dropout = 0.
lrate = 0.0035 # SUA: 0.002, MUA: 0.0035
print("Hyperparameters >> units={}, epochs={}, batch_size={}, dropout={:.1f}, lrate={:.4f}".format(
    units, epochs, batch_size, dropout, lrate))          

# Define dictionary of parameters    
num_layers = 1 # number of layers
optimizer = 'RMSprop' # optimizer
timesteps = 1 # number of timesteps (lag + current)
input_dim = input_feature.shape[1] # input dimension
print(input_dim)
output_dim = cursor_vel.shape[1] # output dimension
print(output_dim)
verbose = 0

load_name = result_folder+file_name+'model.h5'
save_name = result_folder+file_name+'model.h5'

params = {'num_layers':num_layers,'units':units, 'epochs':epochs, 'batch_size':batch_size, 'dropout':dropout, 'lrate':lrate,
          'timesteps':timesteps,'input_dim':input_dim,'output_dim':output_dim,'seed':seed,'fit_gen':False,
          'optimizer':optimizer, 'stateful':True, 'shuffle':True, 'verbose':verbose, 'load':False,
          'load_name':load_name,'save':False, 'save_name':save_name, 'retrain':True} 

num_fold = 10 # number of folds

# initialise performance scores (RMSE and CC) with nan values
loss_train = np.full((num_fold, epochs), np.nan)
loss_valid = np.copy(loss_train)
rmse_valid = np.full((num_fold, output_dim), np.nan)
rmse_test = np.copy(rmse_valid)
cc_valid = np.copy(rmse_valid)
cc_test = np.copy(rmse_valid)
time_train = np.full((num_fold), np.nan)
time_test = np.copy(time_train) 

print ("Formatting input feature data")
tstep = timesteps # timestep (lag + current) samples
stride = 1 # number of samples to be skipped
X_in = input_shaping(input_feature, tstep, stride)

print ("Formatting output (kinematic) data")
diff_samp = cursor_vel.shape[0]-X_in.shape[0]
Y_out = cursor_vel[diff_samp:,:] # in mm/s (remove it for new corrected velocity)

print ("Splitting input dataset into training, validation, and testing subdataset")
all_train_idx, all_valid_idx, all_test_idx = split_index(Y_out, num_fold)

for i in range(num_fold):    
    train_idx = all_train_idx[i]
    valid_idx = all_valid_idx[i]
    test_idx = all_test_idx[i]
    
    # specify training dataset
    X_train = X_in[train_idx,:]            
    Y_train = Y_out[train_idx,:]
    
    # specify validation dataset
    X_valid = X_in[valid_idx,:]
    Y_valid = Y_out[valid_idx,:]
    
    # specify validation dataset
    X_test = X_in[test_idx,:]
    Y_test = Y_out[test_idx,:]
    
    epsilon = 1e-4
    # Standardize (z-score) input dataset
    X_train_mean = np.nanmean(X_train, axis=0)
    X_train_std = np.nanstd(X_train, axis=0) 
    X_train = (X_train - X_train_mean)/(X_train_std+epsilon)
    X_valid = (X_valid - X_train_mean)/(X_train_std +epsilon)
    X_test = (X_test - X_train_mean)/(X_train_std +epsilon)
    
    # Zero mean (centering) output dataset
    Y_train_mean = np.nanmean(Y_train, axis=0)
    Y_train_std = np.nanstd(Y_train, axis=0) 
    Y_train = (Y_train - Y_train_mean)/(Y_train_std+epsilon)
    Y_valid = (Y_valid - Y_train_mean)/(Y_train_std +epsilon)
    Y_test = (Y_test - Y_train_mean)/(Y_train_std +epsilon)
           
    #Re-align data to take lag into account
    if lag < 0:
        X_train = X_train[:lag,:] # remove lag first from end (X lag behind Y)
        Y_train = Y_train[-lag:,:] # reomve lag first from beginning
        X_valid = X_valid[:lag,:]
        Y_valid = Y_valid[-lag:,:]
        X_test = X_test[:lag,:]
        Y_test = Y_test[-lag:,:]
    if lag > 0:
        X_train = X_train[lag:,:] # reomve lag first from beginning
        Y_train = Y_train[:-lag,:] # remove lag first from end (X lead in front of Y)
        X_valid = X_valid[lag:,:]
        Y_valid = Y_valid[:-lag,:]            
        X_test = X_test[lag:,:]
        Y_test = Y_test[:-lag,:]
        
    # set seed to get reproducible results
np.random.seed(seed)
random.set_seed(seed)
print("Instantiating and training model...")  
# X_train = np.moveaxis(X_train,1,-1)
# X_valid = np.moveaxis(X_valid,1,-1)
# X_test = np.moveaxis(X_test,1,-1)
print(X_train.shape)
print(Y_train.shape)

Starting simulation
Loading input features from file: spike_data/features/indy_20160627_01_spike_features_50ms.h5
Loading kinematic data from file: kinematic_data/indy_20160627_01_kinematic_data.h5
Hyperparameters >> units=150, epochs=6, batch_size=64, dropout=0.0, lrate=0.0035
100
2
Formatting input feature data
Formatting output (kinematic) data
Splitting input dataset into training, validation, and testing subdataset
Instantiating and training model...
(672552, 1, 100)
(672552, 2)


In [3]:
batch_size = 32
print('Build model...')
model = Sequential()
tf.config.run_functions_eagerly(True)
model.add(QRNN(32, window_size=1, dropout=0.2, 
               kernel_regularizer=l2(1e-4), bias_regularizer=l2(1e-4), 
               kernel_constraint=maxnorm(10), bias_constraint=maxnorm(10)))
model.add(Dense(2))
model.compile(loss = "mean_squared_error",  optimizer = "adam", metrics = ['mse'])
model.fit(X_train, Y_train, epochs=1, batch_size=batch_size, verbose=1)
model.summary()

Build model...




Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
qrnn (QRNN)                  (None, 32)                9696      
_________________________________________________________________
dense (Dense)                (None, 2)                 66        
Total params: 9,762
Trainable params: 9,762
Non-trainable params: 0
_________________________________________________________________


In [4]:
tf.config.run_functions_eagerly(True)
model.save('qrnn_pls.h5')
converter = tf.lite.TFLiteConverter.from_keras_model(model)
from tensorflow.python.ops.numpy_ops import np_config
np_config.enable_numpy_behavior()
# converter.optimizations = [tf.lite.Optimize.DEFAULT]
# converter.representative_dataset = representative_dataset
# # Ensure that if any ops can't be quantized, the converter throws an error
# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
# # Set the input and output tensors to uint8 (APIs added in r2.3)
# converter.inference_input_type = tf.uint8
# converter.inference_output_type = tf.uint8
tf.config.run_functions_eagerly(True)
tflite_model = converter.convert()
with open('qrnn.tflite', 'wb') as f:
  f.write(tflite_model)

INFO:tensorflow:Assets written to: C:\Users\alexo\AppData\Local\Temp\tmp0gmukq5c\assets
