In [1]:
import os
import h5py
import numpy as np
from functions.preprocess import input_shaping, split_index
from functions.metrics import compute_rmse, compute_pearson
from functions.channel_mapping import channel_mapping
from tensorflow.keras import initializers
from tensorflow.keras.models import Sequential, Model
from keras.layers.recurrent import LSTM
from tensorflow.keras.layers import Dense, Activation, Lambda , Input , Flatten ,Conv2D, MaxPooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from keras.utils import np_utils
from tensorflow import random
import time as timer
from keras.models import load_model

# from torch.autograd import Variable
# import torch
# from torchinfo import summary
import tensorflow as tf

In [2]:
print(tf.config.get_visible_devices())

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [3]:
def pearson_r(y_true, y_pred):
    x = y_true
    y = y_pred
    mx = tf.reduce_mean(x, axis=1, keepdims=True)
    my = tf.reduce_mean(y, axis=1, keepdims=True)
    xm, ym = x - mx, y - my
    t1_norm = tf.nn.l2_normalize(xm, axis = 1)
    t2_norm = tf.nn.l2_normalize(ym, axis = 1)
    cosine = tf.keras.losses.cosine_similarity(t1_norm, t2_norm, axis=1)
    return cosine

In [7]:
print ("Starting simulation")
run_start = timer.time()
seed = 2020
feature_list = ['sua_rate', 'mua_rate']
feature = feature_list[1] # select which spike feature: SUA=0, MUA=1

# specify filename to be processed (choose from the list available at https://zenodo.org/record/583331)
file_name = 'indy_20160915_01'           # file name
# file_name = 'indy_20170131_02'          # file name
kinematic_folder = 'kinematic_data/'    # kinematic data folder
feature_folder = 'spike_data/features/' # spike features folder
result_folder = 'results/'              # results folder

wdw_time = 0.128 # window size in second
lag = -32 # lag between kinematic and feature data (minus indicate feature lagging behaind kinematic)
delta_time = 0.004 # sampling interval in second
wdw_samp = int(round(wdw_time/delta_time))
ol_samp = wdw_samp-1

# open spike features from hdf5 file
feature_file = feature_folder+file_name+'_spike_features_'+str(int(wdw_time*1e3))+'ms.h5'
print ("Loading input features from file: "+feature_file)
channel_mapping_file = r'F:/dropbox/Dropbox (Imperial NGNI)/NGNI Share/Workspace/Zheng/Research_Topics/signal processing plantform/prediction/decoding/raw_data/indy_20170127_03.nwb'

with h5py.File(feature_file,'r') as f:
    input_feature = f[feature][()]
    # with h5py.File(channel_mapping_file, "r") as f:
        # channel_loc =f['/general/extracellular_ephys/electrode_map'][()]
        # input_feature = channel_mapping(input_feature,channel_loc)
    input_feature = input_feature.reshape(input_feature.shape[0],-1)
# open kinematic data from hdf5 file
kinematic_file = kinematic_folder+file_name+'_kinematic_data.h5'
print ("Loading kinematic data from file: "+kinematic_file)
with h5py.File(kinematic_file,'r') as f:
    cursor_vel = f['cursor_vel'][()] # in mm/s
#%%
# set LSTM hyperparameters
units = 150 # SUA: 200, MUA: 150
epochs = 6
batch_size = 64
dropout = 0.
lrate = 0.0035 # SUA: 0.002, MUA: 0.0035
print("Hyperparameters >> units={}, epochs={}, batch_size={}, dropout={:.1f}, lrate={:.4f}".format(
    units, epochs, batch_size, dropout, lrate))          

# Define dictionary of parameters    
num_layers = 1 # number of layers
optimizer = 'RMSprop' # optimizer
timesteps = 1 # number of timesteps (lag + current)
input_dim = input_feature.shape[1] # input dimension
print(input_dim)
output_dim = cursor_vel.shape[1] # output dimension
print(output_dim)
verbose = 0

load_name = result_folder+file_name+'model.h5'
save_name = result_folder+file_name+'model.h5'

params = {'num_layers':num_layers,'units':units, 'epochs':epochs, 'batch_size':batch_size, 'dropout':dropout, 'lrate':lrate,
          'timesteps':timesteps,'input_dim':input_dim,'output_dim':output_dim,'seed':seed,'fit_gen':False,
          'optimizer':optimizer, 'stateful':True, 'shuffle':True, 'verbose':verbose, 'load':False,
          'load_name':load_name,'save':False, 'save_name':save_name, 'retrain':True} 

num_fold = 10 # number of folds

# initialise performance scores (RMSE and CC) with nan values
loss_train = np.full((num_fold, epochs), np.nan)
loss_valid = np.copy(loss_train)
rmse_valid = np.full((num_fold, output_dim), np.nan)
rmse_test = np.copy(rmse_valid)
cc_valid = np.copy(rmse_valid)
cc_test = np.copy(rmse_valid)
time_train = np.full((num_fold), np.nan)
time_test = np.copy(time_train) 

print ("Formatting input feature data")
tstep = timesteps # timestep (lag + current) samples
stride = 1 # number of samples to be skipped
X_in = input_shaping(input_feature, tstep, stride)

print ("Formatting output (kinematic) data")
diff_samp = cursor_vel.shape[0]-X_in.shape[0]
Y_out = cursor_vel[diff_samp:,:] # in mm/s (remove it for new corrected velocity)

print ("Splitting input dataset into training, validation, and testing subdataset")
all_train_idx, all_valid_idx, all_test_idx = split_index(Y_out, num_fold)
unit_list = [5,10,15,20]
for j in unit_list:
    for i in range(num_fold):    
        train_idx = all_train_idx[i]
        valid_idx = all_valid_idx[i]
        test_idx = all_test_idx[i]
        
        # specify training dataset
        X_train = X_in[train_idx,:]            
        Y_train = Y_out[train_idx,:]
        
        # specify validation dataset
        X_valid = X_in[valid_idx,:]
        Y_valid = Y_out[valid_idx,:]
        
        # specify validation dataset
        X_test = X_in[test_idx,:]
        Y_test = Y_out[test_idx,:]
        
        epsilon = 1e-4
        # Standardize (z-score) input dataset
        X_train_mean = np.nanmean(X_train, axis=0)
        X_train_std = np.nanstd(X_train, axis=0) 
        X_train = (X_train - X_train_mean)/(X_train_std+epsilon)
        X_valid = (X_valid - X_train_mean)/(X_train_std +epsilon)
        X_test = (X_test - X_train_mean)/(X_train_std +epsilon)
        
        # Zero mean (centering) output dataset
        Y_train_mean = np.nanmean(Y_train, axis=0)
        Y_train_std = np.nanstd(Y_train, axis=0) 
        Y_train = (Y_train - Y_train_mean)/(Y_train_std+epsilon)
        Y_valid = (Y_valid - Y_train_mean)/(Y_train_std +epsilon)
        Y_test = (Y_test - Y_train_mean)/(Y_train_std +epsilon)
            
        #Re-align data to take lag into account
        if lag < 0:
            X_train = X_train[:lag,:] # remove lag first from end (X lag behind Y)
            Y_train = Y_train[-lag:,:] # reomve lag first from beginning
            X_valid = X_valid[:lag,:]
            Y_valid = Y_valid[-lag:,:]
            X_test = X_test[:lag,:]
            Y_test = Y_test[-lag:,:]
        if lag > 0:
            X_train = X_train[lag:,:] # reomve lag first from beginning
            Y_train = Y_train[:-lag,:] # remove lag first from end (X lead in front of Y)
            X_valid = X_valid[lag:,:]
            Y_valid = Y_valid[:-lag,:]            
            X_test = X_test[lag:,:]
            Y_test = Y_test[:-lag,:]
                    
        # set seed to get reproducible results
        np.random.seed(seed)
        random.set_seed(seed)
        print("Instantiating and training model...with {} lstm units".format(j))  
        # X_train = np.moveaxis(X_train,1,-1)
        # X_valid = np.moveaxis(X_valid,1,-1)
        # X_test = np.moveaxis(X_test,1,-1)
        model_name='lstm'
        save_path = result_folder+file_name+'_'+model_name+optimizer+'_batch_size_{}_'.format(batch_size)+'_lr_{}_'.format(lrate)+'_'+feature+'_'+str(int(wdw_time*1e3))+'/'
        batch_size = 32
        window_size = 5
        rnn = Sequential()    
        rnn.add(LSTM(j, input_shape = (1,100)))
        rnn.add(Flatten())
        rnn.add(Dense(5))
        rnn.add(Dropout(0.2))
        rnn.add(Activation('relu'))
        rnn.add(Dense(2))
        opt = Adam(learning_rate=1e-3, decay=1e-3 / 200)
        rnn.compile(loss='mse', optimizer=opt, metrics=['mse',pearson_r])
        
        callback = tf.keras.callbacks.EarlyStopping(monitor='val_pearson_r', patience=4,restore_best_weights=True)
        rnn.fit(X_train, Y_train, validation_data=(X_valid, Y_valid), epochs=6, batch_size=64,verbose=False,shuffle=True,callbacks=callback)
        Y_valid_predict = rnn.predict(X_valid)
        start = timer.time()
        Y_test_predict = rnn.predict(X_test)
        end = timer.time()
        print("Model testing took {:.2f} seconds".format(end - start)) 
        time_test[i] = end - start
        # Compute performance metrics    
        rmse_vld = compute_rmse(Y_valid, Y_valid_predict)
        rmse_tst = compute_rmse(Y_test, Y_test_predict)
        cc_vld = compute_pearson(Y_valid, Y_valid_predict)
        cc_tst = compute_pearson(Y_test, Y_test_predict)
        rmse_valid[i,:] = rmse_vld
        rmse_test[i,:] = rmse_tst
        cc_valid[i,:] = cc_vld
        cc_test[i,:] = cc_tst
        if os.path.exists(save_path) == False:
            os.makedirs(save_path)
            
        print("Fold-{} | Validation RMSE: {:.4f}".format(i, np.nanmean(rmse_vld)))
        print("Fold-{} | Validation CC: {:.4f}".format(i, np.nanmean(cc_vld)))
        print("Fold-{} | Testing RMSE: {:.4f}".format(i, np.nanmean(rmse_tst)))
        print("Fold-{} | Testing CC: {:.4f}".format(i, np.nanmean(cc_tst)))

    run_end = timer.time()
    mean_rmse_valid = np.nanmean(rmse_valid, axis=0) 
    mean_rmse_test = np.nanmean(rmse_test, axis=0)
    mean_cc_valid = np.nanmean(cc_valid, axis=0)
    mean_cc_test = np.nanmean(cc_test, axis=0)
    mean_time =  np.nanmean(time_train, axis=0)
    print("----------------------------------------------------------------------")
    print("Validation Mean RMSE: %.5f " %(np.mean(mean_rmse_valid)))
    print("Validation Mean CC: %.5f " %(np.mean(mean_cc_valid)))
    print("Testing Mean RMSE: %.5f " %(np.mean(mean_rmse_test)))
    print("Testing Mean CC: %.5f " %(np.mean(mean_cc_test)))
    print("----------------------------------------------------------------------")

    result_filename = result_folder+file_name+'_10_lstm5_6e'+str(j)+'_'+feature+'_'+str(int(wdw_time*1e3))+'ms.h5'
    print ("Storing results into file: "+result_filename)
    with h5py.File(result_filename,'w') as f:
        f['Y_true'] = Y_test
        f['Y_predict'] = Y_test_predict
        f['rmse_valid'] = rmse_valid
        f['rmse_test'] = rmse_test
        f['cc_valid'] = cc_valid
        f['cc_test'] = cc_test
        f['time_train'] = time_train
        f['time_test'] = time_test

    run_time = run_end - run_start
    print ("Finished whole processes within %.2f seconds" % run_time)
    rnn.summary()
    micro_valid = np.ones((1,1,100))
    output_micro = rnn.predict(micro_valid)
    print(output_micro)
    file_name_save = 'lstm'+str(j)+'.h5'
    rnn.save(file_name_save)

Starting simulation
Loading input features from file: spike_data/features/indy_20160915_01_spike_features_128ms.h5
Loading kinematic data from file: kinematic_data/indy_20160915_01_kinematic_data.h5
Hyperparameters >> units=150, epochs=6, batch_size=64, dropout=0.0, lrate=0.0035
100
2
Formatting input feature data
Formatting output (kinematic) data
Splitting input dataset into training, validation, and testing subdataset
Instantiating and training model...with 5 lstm units
Model testing took 0.73 seconds
Fold-0 | Validation RMSE: 0.6276
Fold-0 | Validation CC: 0.6981
Fold-0 | Testing RMSE: 0.5986
Fold-0 | Testing CC: 0.8082
Instantiating and training model...with 5 lstm units
Model testing took 0.73 seconds
Fold-1 | Validation RMSE: 0.6105
Fold-1 | Validation CC: 0.8015
Fold-1 | Testing RMSE: 0.5364
Fold-1 | Testing CC: 0.7808
Instantiating and training model...with 5 lstm units
Model testing took 0.78 seconds
Fold-2 | Validation RMSE: 0.5311
Fold-2 | Validation CC: 0.7789
Fold-2 | Tes

  mean_time =  np.nanmean(time_train, axis=0)


Instantiating and training model...with 10 lstm units
Model testing took 0.76 seconds
Fold-0 | Validation RMSE: 0.6261
Fold-0 | Validation CC: 0.7093
Fold-0 | Testing RMSE: 0.5687
Fold-0 | Testing CC: 0.8293
Instantiating and training model...with 10 lstm units
Model testing took 0.74 seconds
Fold-1 | Validation RMSE: 0.5801
Fold-1 | Validation CC: 0.8219
Fold-1 | Testing RMSE: 0.5352
Fold-1 | Testing CC: 0.7810
Instantiating and training model...with 10 lstm units
Model testing took 0.72 seconds
Fold-2 | Validation RMSE: 0.5337
Fold-2 | Validation CC: 0.7756
Fold-2 | Testing RMSE: 0.5860
Fold-2 | Testing CC: 0.7649
Instantiating and training model...with 10 lstm units
Model testing took 0.72 seconds
Fold-3 | Validation RMSE: 0.6057
Fold-3 | Validation CC: 0.7684
Fold-3 | Testing RMSE: 0.6352
Fold-3 | Testing CC: 0.8393
Instantiating and training model...with 10 lstm units
Model testing took 0.73 seconds
Fold-4 | Validation RMSE: 0.6475
Fold-4 | Validation CC: 0.8481
Fold-4 | Testing R

In [31]:
rnn.summary()
micro_valid = np.ones((1,1,100))
output_micro = rnn.predict(micro_valid)
print(output_micro)

Model: "sequential_35"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_35 (LSTM)               (None, 150)               150600    
_________________________________________________________________
flatten_35 (Flatten)         (None, 150)               0         
_________________________________________________________________
dense_70 (Dense)             (None, 5)                 755       
_________________________________________________________________
dropout_35 (Dropout)         (None, 5)                 0         
_________________________________________________________________
activation_35 (Activation)   (None, 5)                 0         
_________________________________________________________________
dense_71 (Dense)             (None, 2)                 12        
Total params: 151,367
Trainable params: 151,367
Non-trainable params: 0
_______________________________________________

In [33]:
rnn.save('lstm150.h5')
converter = tf.lite.TFLiteConverter.from_keras_model(rnn)
# converter.optimizations = [tf.lite.Optimize.DEFAULT]
# converter.representative_dataset = representative_dataset
# # Ensure that if any ops can't be quantized, the converter throws an error
# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
# # Set the input and output tensors to uint8 (APIs added in r2.3)
# converter.inference_input_type = tf.uint8
# converter.inference_output_type = tf.uint8
tflite_model = converter.convert()
with open('lstm150.tflite', 'wb') as f:
  f.write(tflite_model)

INFO:tensorflow:Assets written to: C:\Users\alexo\AppData\Local\Temp\tmplsm32mp4\assets


INFO:tensorflow:Assets written to: C:\Users\alexo\AppData\Local\Temp\tmplsm32mp4\assets
