## 1. Import Packages

Below, we import both standard packages, and functions from the accompanying .py files

In [1]:
#Import standard packages
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from scipy import io
from scipy import stats
import pickle

#Import function to get the covariate matrix that includes spike history from previous bins
from preprocessing_funcs import get_spikes_with_history

#Import metrics
from metrics import get_R2
from metrics import get_rho

#Import decoder functions
from decoders import WienerCascadeDecoder
from decoders import WienerFilterDecoder
from decoders import DenseNNDecoder
from decoders import SimpleRNNDecoder
from decoders import GRUDecoder
from decoders import LSTMDecoder
from decoders import XGBoostDecoder

Using Theano backend.
 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using gpu device 3: GeForce GTX TITAN X (CNMeM is disabled, cuDNN 5103)


## 2. Load Data

The data that we load is in the format described below. We have another example script, "neural_preprocessing.py" that may be helpful towards putting the data in this format.

Neural data should be a matrix of size "number of time bins" x "number of neurons", where each entry is the firing rate of a given neuron in a given time bin

The output you are decoding should be a matrix of size "number of time bins" x "number of features you are decoding"

In [2]:
# folder='/Users/jig289/Dropbox/MATLAB/Projects/In_Progress/BMI/Processed_Data/'
# folder='/Users/jig289/Dropbox/Grad_School/Research/Projects/In_Progress/Decoding/DataFiles/'
folder='/home/jglaser2/Data/DecData/'

with open(folder+'s1_test_data.pickle','rb') as f:
    neural_data,vels_binned,pos_binned,acc_binned=pickle.load(f)

## 3. Preprocess Data

### 3A. User Inputs
The user can define what time period to use spikes from (with respect to the output).

In [3]:
bins_before=13 #How many bins of neural data prior to the output are used for decoding
bins_current=1 #Whether to use concurrent time bin of neural data
bins_after=0 #How many bins of neural data after (and including) the output are used for decoding

### 3B. Format Covariates

#### Format Input Covariates

In [4]:
# Format for recurrent neural networks (SimpleRNN, GRU, LSTM)
# Function to get the covariate matrix that includes spike history from previous bins
X=get_spikes_with_history(neural_data,bins_before,bins_after,bins_current)

# Format for Wiener Filter, Wiener Cascade, XGBoost, and Dense Neural Network
#Put in "flat" format, so each "neuron / time" is a single feature
X_flat=X.reshape(X.shape[0],(X.shape[1]*X.shape[2]))

#### Format Output Covariates

In [5]:
#Set decoding output
y=vels_binned

### 3C. Process Covariates
We normalize (z_score) the inputs and zero-center the outputs.

In [6]:
#Z-scoring function that works with Nans:
def zscore_nan(X,axis):
    X_zscore=(X - np.nanmean(X,axis=axis)) / np.nanstd(X,axis=axis)
    return X_zscore

In [7]:
#Normalize inputs
X=zscore_nan(X,axis=0)
X_flat=zscore_nan(X_flat,axis=0)

#Zero-center outputs
y_mean=np.mean(y,axis=0)
y=y-y_mean

### 3D. Split into training/testing sets

#### User Options

In [8]:
#Set what part of data should be part of the training/testing/validation sets
training_range=[0, 0.5]
testing_range=[0.5, 1]

#### Split Data

In [9]:
num_examples=X.shape[0]

#Note that each range has a buffer of"bins_before" bins at the beginning, and "bins_after" bins at the end
#This makes it so that the different sets don't include overlapping neural data
training_set=np.arange(np.int(np.round(training_range[0]*num_examples))+bins_before,np.int(np.round(training_range[1]*num_examples))-bins_after)
testing_set=np.arange(np.int(np.round(testing_range[0]*num_examples))+bins_before,np.int(np.round(testing_range[1]*num_examples))-bins_after)

#Get training data
X_train=X[training_set,:,:]
X_flat_train=X_flat[training_set,:]
y_train=y[training_set,:]

#Get testing data
X_test=X[testing_set,:,:]
X_flat_test=X_flat[testing_set,:]
y_test=y[testing_set,:]

## 4. Train Decoders

### 4A. Wiener Filter (Linear Regression)

In [10]:
#Declare model
model_wf=WienerFilterDecoder()
#Fit model
model_wf.fit(X_flat_train,y_train)

### 4B. Wiener Cascade (Linear Nonlinear Model)

In [11]:
#Declare model
model_wc=WienerCascadeDecoder(degree=5)
#Fit model
model_wc.fit(X_flat_train,y_train)

### 4C. XGBoost (Extreme Gradient Boosting)

In [12]:
#Declare model
model_xgb=XGBoostDecoder(max_depth=2,num_round=1000)
#Fit model
model_xgb.fit(X_flat_train, y_train)

### 4D. Dense Neural Network

In [13]:
#Declare model
model_dnn=DenseNNDecoder(units=[400,400],dropout=0,num_epochs=15)
#Fit model
model_dnn.fit(X_flat_train,y_train)

### 4E. Simple RNN

In [14]:
#Declare model
model_rnn=SimpleRNNDecoder(units=400,dropout=0,num_epochs=10)
#Fit model
model_rnn.fit(X_train,y_train)

### 4F. GRU (Gated Recurrent Unit)

In [15]:
#Declare model
model_gru=GRUDecoder(units=400,dropout=.25,num_epochs=5)
#Fit model
model_gru.fit(X_train,y_train)

### 4G. LSTM (Long Short Term Memory)

In [16]:
#Declare model
model_lstm=LSTMDecoder(units=400,dropout=0,num_epochs=15)
#Fit model
model_lstm.fit(X_train,y_train)

## 5. Save Model Fits

In [18]:
# with open('s1_models.pickle','wb') as f:
#     pickle.dump([model_wf,model_wc,model_xgb,model_dnn,model_rnn,model_gru,model_lstm],f)

## 6. Get Fits on Test Data

In [17]:
num_splits=10
num_bs=200
num_test_examples=y_test.shape[0]
num_examples_per_split=np.round(np.divide(num_test_examples,num_splits))

#Initialize
mean_R2_wf=np.empty(num_bs)
mean_R2_wc=np.empty(num_bs)
mean_R2_xgb=np.empty(num_bs)
mean_R2_dnn=np.empty(num_bs)
mean_R2_rnn=np.empty(num_bs)
mean_R2_gru=np.empty(num_bs)
mean_R2_lstm=np.empty(num_bs)

y_preds_wf=np.empty([num_splits,num_examples_per_split,y_test.shape[1]])
y_preds_wc=np.empty([num_splits,num_examples_per_split,y_test.shape[1]])
y_preds_xgb=np.empty([num_splits,num_examples_per_split,y_test.shape[1]])
y_preds_dnn=np.empty([num_splits,num_examples_per_split,y_test.shape[1]])
y_preds_rnn=np.empty([num_splits,num_examples_per_split,y_test.shape[1]])
y_preds_gru=np.empty([num_splits,num_examples_per_split,y_test.shape[1]])
y_preds_lstm=np.empty([num_splits,num_examples_per_split,y_test.shape[1]])


for i in range(num_splits):
    idx=np.arange(num_examples_per_split*i,num_examples_per_split*(i+1))
    X_test_temp=X_test[idx,:,:]
    X_flat_test_temp=X_flat_test[idx,:]
    
    #Wiener Filter
    y_preds_wf[i,:,:]=model_wf.predict(X_flat_test_temp)
    
    #Wiener Cascade
    y_preds_wc[i,:,:]=model_wc.predict(X_flat_test_temp)
    
    #XGBoost
    y_preds_xgb[i,:,:]=model_xgb.predict(X_flat_test_temp)
    
    #DNN
    y_preds_dnn[i,:,:]=model_dnn.predict(X_flat_test_temp)
    
    #RNN
    y_preds_rnn[i,:,:]=model_rnn.predict(X_test_temp)
    
    #GRU
    y_preds_gru[i,:,:]=model_gru.predict(X_test_temp)
    
    #LSTM
    y_preds_lstm[i,:,:]=model_lstm.predict(X_test_temp)

    
    
    
y_test_bs=np.empty([num_splits,num_examples_per_split,y_test.shape[1]])
for i in range(num_splits):
    idx=np.arange(num_examples_per_split*i,num_examples_per_split*(i+1))
    y_test_bs[i,:,:]=y_test[idx,:]    
    
for i in range(num_bs): 
    
    random_idxs=np.floor(num_splits*np.random.rand(10)).astype(int)
    y_test_temp=np.reshape(y_test_bs[random_idxs,:,:],[y_test_bs.shape[0]*y_test_bs.shape[1],y_test_bs.shape[2]])    
    y_predicted_wf=np.reshape(y_preds_wf[random_idxs,:,:],[y_test_bs.shape[0]*y_test_bs.shape[1],y_test_bs.shape[2]])    
    y_predicted_wc=np.reshape(y_preds_wc[random_idxs,:,:],[y_test_bs.shape[0]*y_test_bs.shape[1],y_test_bs.shape[2]])    
    y_predicted_xgb=np.reshape(y_preds_xgb[random_idxs,:,:],[y_test_bs.shape[0]*y_test_bs.shape[1],y_test_bs.shape[2]])    
    y_predicted_dnn=np.reshape(y_preds_dnn[random_idxs,:,:],[y_test_bs.shape[0]*y_test_bs.shape[1],y_test_bs.shape[2]])        
    y_predicted_rnn=np.reshape(y_preds_rnn[random_idxs,:,:],[y_test_bs.shape[0]*y_test_bs.shape[1],y_test_bs.shape[2]])    
    y_predicted_gru=np.reshape(y_preds_gru[random_idxs,:,:],[y_test_bs.shape[0]*y_test_bs.shape[1],y_test_bs.shape[2]])    
    y_predicted_lstm=np.reshape(y_preds_lstm[random_idxs,:,:],[y_test_bs.shape[0]*y_test_bs.shape[1],y_test_bs.shape[2]])    
   
    mean_R2_wf[i]=np.mean(get_R2(y_test_temp,y_predicted_wf))
    mean_R2_wc[i]=np.mean(get_R2(y_test_temp,y_predicted_wc))
    mean_R2_xgb[i]=np.mean(get_R2(y_test_temp,y_predicted_xgb))
    mean_R2_dnn[i]=np.mean(get_R2(y_test_temp,y_predicted_dnn))
    mean_R2_rnn[i]=np.mean(get_R2(y_test_temp,y_predicted_rnn))
    mean_R2_gru[i]=np.mean(get_R2(y_test_temp,y_predicted_gru))
    mean_R2_lstm[i]=np.mean(get_R2(y_test_temp,y_predicted_lstm))


In [18]:
print("mean:",np.mean(mean_R2_wf))
print("bounds:",np.percentile(mean_R2_wf,[16, 84]))

('mean:', 0.67861755565417081)
('bounds:', array([ 0.66948934,  0.68716287]))


In [19]:
print("mean:",np.mean(mean_R2_wc))
print("bounds:",np.percentile(mean_R2_wc,[16, 84]))

('mean:', 0.69527895170777088)
('bounds:', array([ 0.68810632,  0.70236483]))


In [20]:
print("mean:",np.mean(mean_R2_xgb))
print("bounds:",np.percentile(mean_R2_xgb,[16, 84]))

('mean:', 0.72125369903000536)
('bounds:', array([ 0.71432758,  0.72825621]))


In [21]:
print("mean:",np.mean(mean_R2_dnn))
print("bounds:",np.percentile(mean_R2_dnn,[16, 84]))

('mean:', 0.6680679745050645)
('bounds:', array([ 0.6588902 ,  0.67780598]))


In [22]:
print("mean:",np.mean(mean_R2_rnn))
print("bounds:",np.percentile(mean_R2_rnn,[16, 84]))

('mean:', 0.68366245562982553)
('bounds:', array([ 0.67360827,  0.69535458]))


In [23]:
print("mean:",np.mean(mean_R2_gru))
print("bounds:",np.percentile(mean_R2_gru,[16, 84]))

('mean:', 0.75288456656838609)
('bounds:', array([ 0.74520345,  0.76139786]))


In [24]:
print("mean:",np.mean(mean_R2_lstm))
print("bounds:",np.percentile(mean_R2_lstm,[16, 84]))

('mean:', 0.78468672154845309)
('bounds:', array([ 0.77954228,  0.790292  ]))


In [34]:
save_folder='/home/jglaser2/Data/Decoding_Results/'
with open(save_folder+'s1_results.pickle','wb') as f:
    pickle.dump([mean_R2_wf,mean_R2_wc,mean_R2_xgb,mean_R2_dnn,mean_R2_rnn,mean_R2_gru,mean_R2_lstm],f)