## 1. Import Packages

Below, we import both standard packages, and functions from the accompanying .py files

In [1]:
#Import standard packages
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from scipy import io
from scipy import stats
import pickle

#Import metrics
from metrics import get_R2
from metrics import get_rho

#Import decoder functions
from decoders import KalmanFilterDecoder

Using Theano backend.
 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using gpu device 3: GeForce GTX TITAN X (CNMeM is disabled, cuDNN 5103)


## 2. Load Data

The data that we load is in the format described below. We have another example script, "neural_preprocessing.py" that may be helpful towards putting the data in this format.

Neural data should be a matrix of size "number of time bins" x "number of neurons", where each entry is the firing rate of a given neuron in a given time bin

The output you are decoding should be a matrix of size "number of time bins" x "number of features you are decoding"

In [76]:
# folder='/Users/jig289/Dropbox/MATLAB/Projects/In_Progress/BMI/Processed_Data/'
# folder='/Users/jig289/Dropbox/Grad_School/Research/Projects/In_Progress/Decoding/DataFiles/'
folder='/home/jglaser2/Data/DecData/'

with open(folder+'hc_test_data.pickle','rb') as f:
    neural_data,pos_binned=pickle.load(f)

## 3. Preprocess Data

### 3A. User Inputs
The user can define what time period to use spikes from (with respect to the output).

In [88]:
lag=4 #What time bin of spikes should be used relative to the output
#(lag=-1 means use the spikes 1 bin before the output)

#-2 for m1, 0 for s1

### 3B. Format Covariates

#### Format Input

In [89]:
#Remove neurons with too few spikes
nd_sum=np.nansum(neural_data,axis=0)
rmv_nrn=np.where(nd_sum<200) #For WF and WC
# rmv_nrn=np.where(nd_sum<100)
neural_data=np.delete(neural_data,rmv_nrn,1)

#The covariate is simply the matrix of firing rates for all neurons over time
X_kf=neural_data

#### Format Output

In [90]:
#For the Kalman filter, we use the position, velocity, and acceleration as outputs
#Ultimately, we are only concerned with the goodness of fit of position (for this dataset)
#But using them all as covariates helps performance

temp=np.diff(pos_binned,axis=0)
vels_binned=np.concatenate((temp,temp[-1:,:]),axis=0)

temp2=np.diff(vels_binned,axis=0)
acc_binned=np.concatenate((temp2,temp2[-1:,:]),axis=0)

# y_kf=np.concatenate((pos,vels_binned,acc),axis=1)

# y_kf=np.concatenate((pos_binned,vels),axis=1)
y_kf=np.concatenate((pos_binned,vels_binned,acc_binned),axis=1)

### 3C. Process Covariates

#### Normalization and zero-centering

In [91]:
#Z-scoring function that works with Nans:
def zscore_nan(X,axis):
    X_zscore=(X - np.nanmean(X,axis=axis)) / np.nanstd(X,axis=axis)
    return X_zscore

In [92]:
#Normalize inputs
X_kf=zscore_nan(X_kf,axis=0)

#Zero-center outputs
y_kf_mean=np.nanmean(y_kf,axis=0)
y_kf=y_kf-y_kf_mean

In [93]:
#Remove time bins with no output (y value)
rmv_time=np.where(np.isnan(y_kf[:,0]) | np.isnan(y_kf[:,1]))
X_kf=np.delete(X_kf,rmv_time,0)
y_kf=np.delete(y_kf,rmv_time,0)

#### Take lag into account

In [94]:
num_examples=X_kf.shape[0]

#Re-align data to take lag into account
if lag<0:
    y_kf=y_kf[-lag:,:]
    X_kf=X_kf[0:num_examples+lag,:]
if lag>0:
    y_kf=y_kf[0:num_examples-lag,:]
    X_kf=X_kf[lag:num_examples,:]

### 3D. Split into training/testing/validation sets
Note that parameters should be setting using a separate validation set. 
Then, the goodness of fit should be be tested on a testing set (separate from the training and validation sets).

### User Options

In [95]:
#Set what part of data should be part of the training/testing/validation sets
training_range=[0, 0.4]
testing_range=[0.4, .8]

# training_range=[0, 0.32]
# testing_range=[0.32, .4]

### Split Data: For KF

In [96]:
#Number of examples after taking into account bins removed for lag alignment
num_examples_kf=X_kf.shape[0]
        
#Note that each range has a buffer of 1 bin at the beginning and end
#This makes it so that the different sets don't include overlapping data
training_set=np.arange(np.int(np.round(training_range[0]*num_examples_kf))+1,np.int(np.round(training_range[1]*num_examples_kf))-1)
testing_set=np.arange(np.int(np.round(testing_range[0]*num_examples_kf))+1,np.int(np.round(testing_range[1]*num_examples_kf))-1)

#Get training data
X_kf_train=X_kf[training_set,:]
y_kf_train=y_kf[training_set,:]

#Get testing data
X_kf_test=X_kf[testing_set,:]
y_kf_test=y_kf[testing_set,:]

## 4. Train Decoders


### Kalman Filter

In [97]:
#Declare model
model_kf=KalmanFilterDecoder()

#Fit model
model_kf.fit(X_kf_train,y_kf_train)

## 5. Get Fits on Test Data

In [98]:
# y_pred=model_kf.predict(X_kf_test,y_kf_test)
# r2=get_R2(y_kf_test,y_pred)
# print(r2[0:2])



[ 0.08081879  0.32772042]


In [103]:
# 2: -0.21890443  0.47029448
# 3: -0.14556268  0.46339536
# 4: -0.10358434  0.45245483

In [100]:
num_splits=10
num_bs=200
num_test_examples=y_kf_test.shape[0]
num_examples_per_split=np.round(np.divide(num_test_examples,num_splits))

#Initialize
mean_R2_kf=np.empty(num_bs)

y_preds_kf=np.empty([num_splits,num_examples_per_split,y_kf_test.shape[1]])

for i in range(num_splits):
    idx=np.arange(num_examples_per_split*i,num_examples_per_split*(i+1))
    X_kf_test_temp=X_kf_test[idx,:]
    y_kf_test_temp=y_kf_test[idx,:]
    
    #Kalman Filter
    y_preds_kf[i,:,:]=model_kf.predict(X_kf_test_temp,y_kf_test_temp)

    
y_test_bs=np.empty([num_splits,num_examples_per_split,y_kf_test.shape[1]])
for i in range(num_splits):
    idx=np.arange(num_examples_per_split*i,num_examples_per_split*(i+1))
    y_test_bs[i,:,:]=y_kf_test[idx,:]    
    
for i in range(num_bs): 
    
    random_idxs=np.floor(num_splits*np.random.rand(10)).astype(int)
    y_test_temp=np.reshape(y_test_bs[random_idxs,:,:],[y_test_bs.shape[0]*y_test_bs.shape[1],y_test_bs.shape[2]])    
    y_predicted_kf=np.reshape(y_preds_kf[random_idxs,:,:],[y_test_bs.shape[0]*y_test_bs.shape[1],y_test_bs.shape[2]])    
    
    mean_R2_kf[i]=np.mean(get_R2(y_test_temp,y_predicted_kf)[0:2])


In [101]:
np.mean(mean_R2_kf)

0.20727130425533935

In [102]:
save_folder='/home/jglaser2/Data/Decoding_Results/'
with open(save_folder+'hc_results_kf.pickle','wb') as f:
    pickle.dump([mean_R2_kf],f)