## Import Packages

In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from scipy import io
from scipy import stats
import pickle

#Import function to get the covariate matrix that includes spike history from previous bins
from preprocessing_funcs import get_spikes_with_history

#Import decoder functions
from Decoder_funcs import lin_reg_model
from Decoder_funcs import get_vaf

## Import Data

**Neural data should be a matrix of size "number of time bins" x "number of neurons", where each entry is the firing rate of a given neuron in a given time bin**

**The output you are decoding should be a matrix of size "number of time bins" x "number of features you are decoding"**

In [21]:
# folder='/Users/jig289/Dropbox/MATLAB/Projects/In_Progress/BMI/Processed_Data/'

with open('hc_test_data.pickle','rb') as f:
    neural_data,vels_binned=pickle.load(f)

## Define Covariates ##

### User Options

In [31]:
bins_before=5 #How many bins of neural data prior to the output are used for decoding
bins_after=5 #How many bins of neural data after (and including) the output are used for decoding
bins_surrounding=bins_before+bins_after

### Get Covariates

In [32]:
#Function to get the covariate matrix that includes spike history from previous bins
X=get_spikes_with_history(neural_data,bins_before,bins_after)

#Remove neurons with too few spikes
X_sum=np.nansum(np.nansum(X,axis=0),axis=0)
X_sum.shape
# rmv_temp=np.where(X_sum/X.shape[1]<20)
rmv_temp=np.where(X_sum/X.shape[1]<100)
X=np.delete(X,rmv_temp,2)



#Put in "flat" format for XGB and linear
X_flat=X.reshape(X.shape[0],(X.shape[1]*X.shape[2]))

#Set decoding output
y=vels_binned

### Process Covariates

In [33]:
# zscore = lambda x: (x - scipy.stats.nanmean(x)) / scipy.stats.nanstd(x)
def zscore_nan(X,axis):
    X_zscore=(X - stats.nanmean(X,axis=axis)) / stats.nanstd(X,axis=axis)
    return X_zscore
    

In [34]:
#Normalize

# X=stats.zscore(X,axis=0)
# X_flat=stats.zscore(X_flat,axis=0)

X=zscore_nan(X,axis=0)
X_flat=zscore_nan(X_flat,axis=0)

# y=stats.zscore(y,axis=0)

In [35]:
#Remove time bins with no output (y value)
rmv_time=np.where(np.isnan(y[:,0]) | np.isnan(y[:,1]))
X=np.delete(X,rmv_time,0)
X_flat=np.delete(X_flat,rmv_time,0)
y=np.delete(y,rmv_time,0)

## Split into training/testing/validation sets

### User Options

In [36]:
#Set what part of data should be part of the training/testing/validation sets
training_range=[0, 0.5]
testing_range=[0.5, .7]
valid_range=[0.7,1]

### Split Data

In [37]:
num_examples=X.shape[0]

#Note that each range has a buffer of"bins_before" bins at the beginning, and "bins_after" bins at the end
#This makes it so that the different sets don't include overlapping neural data
training_set=np.arange(np.int(np.round(training_range[0]*num_examples))+bins_before,np.int(np.round(training_range[1]*num_examples))-bins_after)
testing_set=np.arange(np.int(np.round(testing_range[0]*num_examples))+bins_before,np.int(np.round(testing_range[1]*num_examples))-bins_after)
valid_set=np.arange(np.int(np.round(valid_range[0]*num_examples))+bins_before,np.int(np.round(valid_range[1]*num_examples))-bins_after)

#Divide covariates
X_train=X[training_set,:,:] #Subtract X.shape[1] so we don't have overlap in the train/test sets
X_flat_train=X_flat[training_set,:]
y_train=y[training_set,:]

X_test=X[testing_set,:,:] #Subtract X.shape[1] so we don't have overlap in the train/test sets
X_flat_test=X_flat[testing_set,:]
y_test=y[testing_set,:]

X_valid=X[valid_set,:,:] #Subtract X.shape[1] so we don't have overlap in the train/test sets
X_flat_valid=X_flat[valid_set,:]
y_valid=y[valid_set,:]

## Run Decoders

### Wiener Filter (Linear Regression)

In [38]:
#Get predictions
y_test_pred_lin=lin_reg_model(X_flat_train,y_train,X_flat_test)

### XGBoost (Extreme Gradient Boosting)

In [None]:
#Get predictions
y_test_pred_xgb=xgb_model(X_flat_train,y_train,X_flat_test,max_depth=3,num_round=300)

### Simple RNN

In [None]:
#Get predictions
y_test_pred_rnn=SimpleRNN_model(X_train,y_train,X_test,units=400,dropout=0,num_epochs=10)

### GRU (Gated Recurrent Unit)

In [None]:
#Get predictions
y_test_pred_gru=GRU_model(X_train,y_train,X_test,units=400,dropout=0,num_epochs=10)

### LSTM (Long Short Term Memory)

In [None]:
#Get predictions
y_test_pred_gru=LSTM_model(X_train,y_train,X_test,units=400,dropout=0,num_epochs=10)

## Get Metrics

In [39]:
vafs_lin=get_vaf(y_test,y_test_pred_lin)
vafs_lin

[0.27092504169324583, 0.3055296443207548]

In [None]:
vafs_xgb=get_vaf(y_test,y_test_pred_xgb)

In [None]:
vafs_rnn=get_vaf(y_test,y_test_pred_rnn)

In [None]:
vafs_gru=get_vaf(y_test,y_test_pred_gru)

In [None]:
vafs_lstm=get_vaf(y_test,y_test_pred_lstm)

## Make Plots

In [None]:
#As an example, I plot the first 1000 values of the x velocity (column index 0), both true and predicted with the Wiener filter
fig_x_lin=plt.figure()
plt.plot(y_test[0:1000,0],'b')
plt.plot(y_test_pred_lin[0:1000,0],'r')
#Save figure
# fig_x_lin.savefig('x_velocity_decoding.eps')

**XGBoost**

In [None]:
def xgb_model(X_train=X_flat_train,y_train=y_train,X_test=X_flat_test,y_test=y_test,max_depth=3,num_round=300):

 
    #Set parameters
    param = {'objective': "reg:linear", #for linear output
        'eval_metric': "logloss", #loglikelihood loss
        'max_depth': max_depth, #this is the only parameter we have set, it's one of the way or regularizing
        'seed': 2925, #for reproducibility
        'silent': 1}
    param['nthread'] = -1 #with -1 it will use all available threads

    dtest = xgb.DMatrix(X_test)

    r2=np.zeros(2)
    for y_idx in range(2):

        dtrain = xgb.DMatrix(X_train, label=y_train[:,y_idx])
        bst = xgb.train(param, dtrain, num_round)
        # make prediction
        y_test_pred = bst.predict(dtest)
        r2[y_idx]=test_r_x=(np.corrcoef(y_test[:,y_idx],y_test_pred)[0,1])**2
        
    return r2


In [None]:
r2_xgb=xgb_model()
print(np.mean(r2_xgb))
r2_xgb