## Import Packages

In [2]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from scipy import io
from scipy import stats
import pickle

#Import function to get the covariate matrix that includes spike history from previous bins
from preprocessing_funcs import get_spikes_with_history

#Import decoder functions
import Decoder_funcs
from Decoder_funcs import get_vaf

Using Theano backend.
Using gpu device 3: GeForce GTX TITAN X (CNMeM is disabled, cuDNN 5103)


## Import Data

**Neural data should be a matrix of size "number of time bins" x "number of neurons", where each entry is the firing rate of a given neuron in a given time bin**

**The output you are decoding should be a matrix of size "number of time bins" x "number of features you are decoding"**

In [3]:
# folder='/Users/jig289/Dropbox/MATLAB/Projects/In_Progress/BMI/Processed_Data/'
folder='/home/jglaser2/Data/DecData/'

with open(folder+'test_data.pickle','rb') as f:
    neural_data,vels_binned=pickle.load(f)

## Define Covariates ##

### User Options

In [4]:
bins_current=1 #Whether to use concurrent time bin of neural data
bins_before=13 #How many bins of neural data prior to the output are used for decoding
bins_after=0 #How many bins of neural data after (and including) the output are used for decoding
bins_surrounding=bins_before+bins_before+bins_after

### Get Covariates

In [5]:
#Function to get the covariate matrix that includes spike history from previous bins
X=get_spikes_with_history(neural_data,bins_before,bins_after,bins_current)

#Put in "flat" format for XGB and linear
X_flat=X.reshape(X.shape[0],(X.shape[1]*X.shape[2]))

#Set decoding output
y=vels_binned

### Process Covariates

In [6]:
# zscore = lambda x: (x - scipy.stats.nanmean(x)) / scipy.stats.nanstd(x)
def zscore_nan(X,axis):
    X_zscore=(X - np.nanmean(X,axis=axis)) / np.nanstd(X,axis=axis)
    return X_zscore


In [7]:
#Normalize

X=zscore_nan(X,axis=0)
X_flat=zscore_nan(X_flat,axis=0)

# y=stats.zscore(y,axis=0)

## Split into training/testing/validation sets

### User Options

In [8]:
#Set what part of data should be part of the training/testing/validation sets
training_range=[0, 0.7]
testing_range=[0.7, 0.85]
valid_range=[0.85,1]

### Split Data

In [9]:
num_examples=X.shape[0]

#Note that each range has a buffer of"bins_before" bins at the beginning, and "bins_after" bins at the end
#This makes it so that the different sets don't include overlapping neural data
training_set=np.arange(np.int(np.round(training_range[0]*num_examples))+bins_before,np.int(np.round(training_range[1]*num_examples))-bins_after)
testing_set=np.arange(np.int(np.round(testing_range[0]*num_examples))+bins_before,np.int(np.round(testing_range[1]*num_examples))-bins_after)
valid_set=np.arange(np.int(np.round(valid_range[0]*num_examples))+bins_before,np.int(np.round(valid_range[1]*num_examples))-bins_after)

#Divide covariates
X_train=X[training_set,:,:] #Subtract X.shape[1] so we don't have overlap in the train/test sets
X_flat_train=X_flat[training_set,:]
y_train=y[training_set,:]

X_test=X[testing_set,:,:] #Subtract X.shape[1] so we don't have overlap in the train/test sets
X_flat_test=X_flat[testing_set,:]
y_test=y[testing_set,:]

X_valid=X[valid_set,:,:] #Subtract X.shape[1] so we don't have overlap in the train/test sets
X_flat_valid=X_flat[valid_set,:]
y_valid=y[valid_set,:]

## Run Decoders

### Wiener Filter (Linear Regression)

In [24]:
#Get predictions
model_regr=Decoder_funcs.lin_reg_model(X_flat_train,y_train)
y_valid_pred_lin=model_regr.predict(X_flat_valid)

### XGBoost (Extreme Gradient Boosting)

In [10]:
#Fit model
model_xgb=Decoder_funcs.xgb_model(X_flat_train,y_train,max_depth=3,num_round=200)
#Get predictions
y_valid_pred_xgb=Decoder_funcs.xgb_predict(model_xgb,X_flat_valid)

### Simple RNN

In [18]:
#Get predictions
model_rnn=Decoder_funcs.SimpleRNN_model(X_train,y_train,units=400,dropout=0.25,num_epochs=10)
y_valid_pred_rnn=model_rnn.predict(X_valid)

### GRU (Gated Recurrent Unit)

In [None]:
#Get predictions
model_gru=Decoder_funcs.GRU_model(X_train,y_train,units=400,dropout=0,num_epochs=10)
y_valid_pred_gru=model_gru.predict(X_valid)

### LSTM (Long Short Term Memory)

In [13]:
#Get predictions
model_lstm=Decoder_funcs.LSTM_model(X_train,y_train,units=400,dropout=.25,num_epochs=10)
y_valid_pred_lstm=model_lstm.predict(X_valid)

## Get Metrics

In [25]:
vafs_lin=get_vaf(y_valid,y_valid_pred_lin)
vafs_lin

[0.75176754708627846, 0.75100935332779484]

In [12]:
vafs_xgb=get_vaf(y_valid,y_valid_pred_xgb)
vafs_xgb

[0.759985513205789, 0.75694730773341012]

In [19]:
# vafs_rnn=get_vaf(y_test,y_test_pred_rnn)
vafs_rnn=get_vaf(y_valid,y_valid_pred_rnn)
vafs_rnn

[0.81989414021421436, 0.7690404096532123]

In [None]:
vafs_gru=get_vaf(y_test,y_test_pred_gru)

In [14]:
vafs_lstm=get_vaf(y_valid,y_valid_pred_lstm)
vafs_lstm

[0.87430652141381116, 0.8300965085769989]

## Make Plots

In [None]:
#As an example, I plot the first 1000 values of the x velocity (column index 0), both true and predicted with the Wiener filter
fig_x_lin=plt.figure()
plt.plot(y_test[0:1000,0],'b')
plt.plot(y_test_pred_lin[0:1000,0],'r')
#Save figure
# fig_x_lin.savefig('x_velocity_decoding.eps')

In [11]:
from sklearn import linear_model

In [93]:
def wiener_casc(X_flat_train,y_train,X_flat_test,y_test,deg=3):
    num_outputs=y_train.shape[1]
    y_test_pred_ln=np.empty(y_test.shape)
    for i in range(num_outputs):
        regr = linear_model.LinearRegression()
        regr.fit(X_flat_train, y_train[:,i]) #Train
        y_train_pred_lin=regr.predict(X_flat_train)    
        p=np.polyfit(y_train_pred_lin,y_train[:,i],deg)
        y_train_pred_ln=np.polyval(p,y_train_pred_lin)
        #Predictions on test set
        y_test_pred_lin=regr.predict(X_flat_test)
        y_test_pred_ln[:,i]=np.polyval(p,y_test_pred_lin)
    return y_test_pred_ln

In [99]:
y_valid_pred_wc=wiener_casc(X_flat_train,y_train,X_flat_valid,y_valid,deg=3)

In [101]:
vafs_wc=get_vaf(y_valid,y_valid_pred_wc)
vafs_wc

[0.7584608604036065, 0.76774970824995847]