# Testing sensitivity of results to changes in hyperparameters

We do this test on a feedforward neural network

## User Options

Define what folder you're saving to

In [1]:
# save_folder=''
save_folder='/home/jglaser/Files/Neural_Decoding/Results/'

Define what folder you're loading from

In [2]:
# load_folder=''
load_folder='/home/jglaser/Data/DecData/'

Define which dataset you're using

In [3]:
# dataset='s1'
dataset='m1'
# dataset='hc'

Define how much training data to use

In [4]:
# data_amt='full'
data_amt='limited'

## 1. Import Packages

We import both standard packages, and functions from the accompanying .py files

In [5]:
#Import standard packages
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from scipy import io
from scipy import stats
import pickle
import time
import sys

#Add the main folder to the path, so we have access to the files there.
#Note that if your working directory is not the Paper_code folder, you may need to manually specify the path to the main folder. For example: sys.path.append('/home/jglaser/GitProj/Neural_Decoding')
sys.path.append('..') 

#Import function to get the covariate matrix that includes spike history from previous bins
from preprocessing_funcs import get_spikes_with_history

#Import metrics
from metrics import get_R2
from metrics import get_rho

#Import decoder functions
from decoders import WienerCascadeDecoder
from decoders import WienerFilterDecoder
from decoders import DenseNNDecoder
from decoders import SimpleRNNDecoder
from decoders import GRUDecoder
from decoders import LSTMDecoder
from decoders import XGBoostDecoder
from decoders import SVRDecoder

#Import Bayesian Optimization package
from bayes_opt import BayesianOptimization

  from pandas.core import datetools
Using Theano backend.
 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using gpu device 0: GeForce GTX TITAN X (CNMeM is enabled with initial size: 45.0% of memory, cuDNN Mixed dnn version. The header is from one version, but we link with a different version (5103, 5110))
  from ._conv import register_converters as _register_converters


In [6]:
#Turn off deprecation warnings

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 

## 2. Load Data

The data that we load is in the format described below. We have another example script, "neural_preprocessing.py" that may be helpful towards putting the data in this format.

Neural data should be a matrix of size "number of time bins" x "number of neurons", where each entry is the firing rate of a given neuron in a given time bin

The output you are decoding should be a matrix of size "number of time bins" x "number of features you are decoding"

In [7]:
if dataset=='s1':
    with open(load_folder+'s1_test_data.pickle','rb') as f:
    #     neural_data,vels_binned,pos_binned,acc_binned=pickle.load(f,encoding='latin1')
        neural_data,vels_binned,pos_binned,acc_binned=pickle.load(f)
        
if dataset=='m1':
    with open(load_folder+'m1_test_data.pickle','rb') as f:
    #     neural_data,vels_binned,pos_binned,acc_binned=pickle.load(f,encoding='latin1')
        neural_data,vels_binned,pos_binned,acc_binned=pickle.load(f)
        
if dataset=='hc':
    with open(load_folder+'hc_test_data.pickle','rb') as f:
    #     neural_data,vels_binned,pos_binned,acc_binned=pickle.load(f,encoding='latin1')
        neural_data,pos_binned=pickle.load(f)

## 3. Preprocess Data

### 3A. User Inputs
The user can define what time period to use spikes from (with respect to the output).

In [8]:
if dataset=='s1':
    bins_before=6 #How many bins of neural data prior to the output are used for decoding
    bins_current=1 #Whether to use concurrent time bin of neural data
    bins_after=6 #How many bins of neural data after (and including) the output are used for decoding
    
if dataset=='m1':
    bins_before=13 #How many bins of neural data prior to the output are used for decoding
    bins_current=1 #Whether to use concurrent time bin of neural data
    bins_after=0 #How many bins of neural data after (and including) the output are used for decoding
    
if dataset=='hc':
    bins_before=4 #How many bins of neural data prior to the output are used for decoding
    bins_current=1 #Whether to use concurrent time bin of neural data
    bins_after=5 #How many bins of neural data after (and including) the output are used for decoding

### 3B. Format Covariates

#### Format Input Covariates

In [9]:
#Remove neurons with too few spikes in HC dataset
if dataset=='hc':
    nd_sum=np.nansum(neural_data,axis=0)
    rmv_nrn=np.where(nd_sum<100)
    neural_data=np.delete(neural_data,rmv_nrn,1)

In [10]:
# Format for recurrent neural networks (SimpleRNN, GRU, LSTM)
# Function to get the covariate matrix that includes spike history from previous bins
X=get_spikes_with_history(neural_data,bins_before,bins_after,bins_current)

# Format for Wiener Filter, Wiener Cascade, XGBoost, and Dense Neural Network
#Put in "flat" format, so each "neuron / time" is a single feature
X_flat=X.reshape(X.shape[0],(X.shape[1]*X.shape[2]))

#### Format Output Covariates

In [11]:
#Set decoding output
if dataset=='s1' or dataset=='m1':
    y=vels_binned
if dataset=='hc':
    y=pos_binned

#### In HC dataset, remove time bins with no output (y value)

In [12]:
if dataset=='hc':
    #Remove time bins with no output (y value)
    rmv_time=np.where(np.isnan(y[:,0]) | np.isnan(y[:,1]))
    X=np.delete(X,rmv_time,0)
    X_flat=np.delete(X_flat,rmv_time,0)
    y=np.delete(y,rmv_time,0)

### 3C. Define training/testing/validation sets
We use the same testing/validation sets used in Fig. 6. The training size varies depending on the user choice of 'full' or 'limited'

In [13]:
if dataset=='s1' or dataset=='m1':
    dt=.05
if dataset=='hc':
    dt=.2

if dataset=='hc':

    #Size of sets
    test_size=int(450/dt) #7.5 min
    valid_size=test_size #validation size is the same as the test size
    if data_amt=='full':
        train_size=int(2250/dt) #37.5 minutes
    if data_amt=='limited':
        train_size=int(900/dt) #15 minutes
    
    #End indices
    end_idx=np.int(X.shape[0]*.8) #End of test set
    tr_end_idx=end_idx-test_size-valid_size #End of training set

if dataset=='s1':
    #Size of sets
    test_size=int(300/dt) #5 min
    valid_size=test_size #validation size is the same as the test size
    if data_amt=='full':
        train_size=int(1200/dt) # 20 min 
    if data_amt=='limited':
        train_size=int(60/dt) #1 min

    #End indices
    end_idx=np.int(X.shape[0]*.9)#End of test set
    tr_end_idx=end_idx-test_size-valid_size #End of training set

if dataset=='m1':
    #Size of sets
    test_size=int(300/dt) #5 min
    valid_size=test_size #validation size is the same as the test size
    if data_amt=='full':
        train_size=int(600/dt) # 10 min 
    if data_amt=='limited':
        train_size=int(60/dt) #1 min
    
    #End indices
    end_idx=np.int(X.shape[0]*1)#End of test set
    tr_end_idx=end_idx-test_size-valid_size #End of training set
        
    
#Range of sets
testing_range=[end_idx-test_size,end_idx] #Testing set (length of test_size, goes up until end_idx)
valid_range=[end_idx-test_size-valid_size,end_idx-test_size] #Validation set (length of valid_size, goes up until beginning of test set)
training_range=[tr_end_idx-train_size,tr_end_idx] #Training set (length of train_size, goes up until beginning of validation set)


Extract different sets

In [14]:
#Note that all sets have a buffer of"bins_before" bins at the beginning, and "bins_after" bins at the end
#This makes it so that the different sets don't include overlapping neural data

#Testing set
testing_set=np.arange(testing_range[0]+bins_before,testing_range[1]-bins_after)

#Validation set
valid_set=np.arange(valid_range[0]+bins_before,valid_range[1]-bins_after)

#Training_set
training_set=np.arange(training_range[0]+bins_before,training_range[1]-bins_after)


#Get training data
X_train=X[training_set,:,:]
X_flat_train=X_flat[training_set,:]
y_train=y[training_set,:]

#Get testing data
X_test=X[testing_set,:,:]
X_flat_test=X_flat[testing_set,:]
y_test=y[testing_set,:]

#Get validation data
X_valid=X[valid_set,:,:]
X_flat_valid=X_flat[valid_set,:]
y_valid=y[valid_set,:]

### 3D. Preprocess Data

In [15]:
#Z-score "X" inputs. 
X_train_mean=np.nanmean(X_train,axis=0)
X_train_std=np.nanstd(X_train,axis=0)
X_train=(X_train-X_train_mean)/X_train_std
X_test=(X_test-X_train_mean)/X_train_std
X_valid=(X_valid-X_train_mean)/X_train_std

#Z-score "X_flat" inputs. 
X_flat_train_mean=np.nanmean(X_flat_train,axis=0)
X_flat_train_std=np.nanstd(X_flat_train,axis=0)
X_flat_train=(X_flat_train-X_flat_train_mean)/X_flat_train_std
X_flat_test=(X_flat_test-X_flat_train_mean)/X_flat_train_std
X_flat_valid=(X_flat_valid-X_flat_train_mean)/X_flat_train_std

#Zero-center outputs
y_train_mean=np.nanmean(y_train,axis=0)
y_train=y_train-y_train_mean
y_test=y_test-y_train_mean
y_valid=y_valid-y_train_mean

## 4. Run Decoder

Set range of hyperparameters we're testing in the feedforward neural network

In [16]:
n_epochs=10 #Number of epochs

num_unit_set=[100,200,300,400,500,600,700,800,900,1000] #Number of units in each layer

frac_dropout_set=[0,.1,.2,.3,.4,.5] #Amount of dropout

Loop through hyperparameter combinations and get R2 values

In [17]:
#Initialize matrix that tracks the R2 values for 
r2_vals=np.empty([len(num_unit_set),len(frac_dropout_set)]) 

#Loop through hyperparameter combinations
#i is the index for the number of units
#j is the index for the amount of dropout

i=-1
for num_units in num_unit_set: #Loop through number of units
    i=i+1 #Iterate index for number of units
    j=-1
    for frac_dropout in frac_dropout_set: #Loop through amount of dropout
        j=j+1 #Iterate index for amount of dropout
        # Run model w/ above hyperparameters
        model_dnn=DenseNNDecoder(units=[num_units,num_units],dropout=frac_dropout,num_epochs=n_epochs) #Declare decoder
        model_dnn.fit(X_flat_train,y_train) #Fit decoder
        y_test_predicted_dnn=model_dnn.predict(X_flat_test) #Get test set predictions
        r2_vals[i,j]=np.mean(get_R2(y_test,y_test_predicted_dnn))  #Get R2 value on test set, and put in matrix

        print(num_units,frac_dropout,r2_vals[i,j])



(100, 0, 0.696266015291161)
(100, 0.1, 0.7033545101618082)
(100, 0.2, 0.6956054052157566)
(100, 0.3, 0.6719583691717945)
(100, 0.4, 0.6796487774658623)
(100, 0.5, 0.6225763920657579)
(200, 0, 0.7090122002037368)
(200, 0.1, 0.724355016440842)
(200, 0.2, 0.6952499221356006)
(200, 0.3, 0.692374309345424)
(200, 0.4, 0.6479185130659582)
(200, 0.5, 0.6486283578332661)
(300, 0, 0.7146423304756662)
(300, 0.1, 0.69538091192814)
(300, 0.2, 0.6851423606387953)
(300, 0.3, 0.6827551365455837)
(300, 0.4, 0.6779294285707755)
(300, 0.5, 0.6403877897393295)
(400, 0, 0.7120193298523239)
(400, 0.1, 0.7071591935809334)
(400, 0.2, 0.6891463804390167)
(400, 0.3, 0.6938406292981216)
(400, 0.4, 0.6773059045749703)
(400, 0.5, 0.6260958065035958)
(500, 0, 0.7194149311187766)
(500, 0.1, 0.7200126553784618)
(500, 0.2, 0.7045135162258749)
(500, 0.3, 0.6594607153607601)
(500, 0.4, 0.66106637925329)
(500, 0.5, 0.6182934684155306)
(600, 0, 0.719556777048681)
(600, 0.1, 0.7020481176590975)
(600, 0.2, 0.713973464443985

## Save

In [18]:
with open(save_folder+dataset+'_hyperparam_sensitivity'+data_amt+'.pickle','wb') as f:
    pickle.dump([r2_vals,num_unit_set,frac_dropout_set],f)

## Quick Check of Results

In [20]:
print(r2_vals)

[[0.69626602 0.70335451 0.69560541 0.67195837 0.67964878 0.62257639]
 [0.7090122  0.72435502 0.69524992 0.69237431 0.64791851 0.64862836]
 [0.71464233 0.69538091 0.68514236 0.68275514 0.67792943 0.64038779]
 [0.71201933 0.70715919 0.68914638 0.69384063 0.6773059  0.62609581]
 [0.71941493 0.72001266 0.70451352 0.65946072 0.66106638 0.61829347]
 [0.71955678 0.70204812 0.71397346 0.6861446  0.69250905 0.66806956]
 [0.71420902 0.69442423 0.68848977 0.68599257 0.68849479 0.64556903]
 [0.72023073 0.70588655 0.6897513  0.70438066 0.68621377 0.63520091]
 [0.72650823 0.73452566 0.66588558 0.71609785 0.71097092 0.65380788]
 [0.72137748 0.73321408 0.70471722 0.69882488 0.65289636 0.64326516]]
