Regressing GFP from Centerline and Neural data

In [None]:
import numpy as np
from matplotlib import animation, pyplot as plt
import seaborn as sns
import pandas as pd
import scipy.linalg as linalg
import scipy.stats as stats
import scipy.interpolate as interp

from sklearn.linear_model import Ridge

import warnings
warnings.filterwarnings('ignore')
%pylab inline 

In [None]:
### Read in worm data from SharedData .npz file
Worm = 'GFP'
Centerline = True
Neuron_Position = True
Close = True

worm_data = np.load('../SharedData/Worm'+Worm+'.npz')
print('The loaded Primary npz contains the variables:\n', np.sort([i for i in worm_data]))

G_sig = 'GFP' if Worm == 'GFP' else 'GCaMP'

### Import desired variables
G_Raw = worm_data['G_Raw']
R_Raw = worm_data['R_Raw']
Time = worm_data['Time']


### Import Centerline Data?
if Centerline:
    CLdata = np.load('../SharedData/Worm_Angles/WormAngle'+Worm+'.npz')
    print('The loaded Centerline npz contains the variables:\n', np.sort([i for i in CLdata]))
    
    CL_PCs = CLdata['proj_neural_thetas'].T


### Import Neuron Position Data?
if Neuron_Position:
    if Close:
        NPdata = np.load('../SharedData/NPos_PCA/Worm'+Worm+'_NPosPCA_Close.npz')
        print("Close neurons with no tail was loaded")
    else:
        NPdata = np.load('../SharedData/NPos_PCA/Worm'+Worm+'_NPosPCA.npz')
    print('The loaded Neuron Position npz contains the variables:\n', np.sort([i for i in NPdata]))
    
    NP_PCs = NPdata['NP_PCs']

In [None]:
print(NP_PCs.shape, G_Raw.shape, CL_PCs.shape)

In [None]:
### Determine which time columns have NaNs pick first and last w/o any NaNs
badcols = np.array([x|y for (x,y) in zip(np.isnan(G_Raw).any(axis=0), np.isnan(R_Raw).any(axis=0))])
begin_col = np.arange(0,len(Time))[~badcols][0]
end_col = np.arange(0,len(Time))[~badcols][-1] + 1

### Shave off times from before first and last full column
G_Raw = G_Raw[:,begin_col:end_col]
R_Raw = R_Raw[:,begin_col:end_col]
Time = Time[begin_col:end_col, 0]

if Centerline: CL_PCs = CL_PCs[:,begin_col:end_col]
if Neuron_Position: NP_PCs = NP_PCs[:,begin_col:end_col]
    
    
[neuron_length, neuron_time] = np.shape(G_Raw)
print('Neurons:', neuron_length, '\nTime Points:', neuron_time, '\nFrom', Time[0], 's to', Time[-1], 's')

### Fill in NaNs with interpolation
for i in np.arange(len(G_Raw)):
    
    g_bad = np.isnan(G_Raw[i])
    if g_bad.any():
        g_interp = interp.interp1d(Time[~g_bad], G_Raw[i,~g_bad], kind='cubic', assume_sorted=True)
        G_Raw[i][g_bad] = g_interp(Time[g_bad])
    
    r_bad = np.isnan(R_Raw[i])
    if r_bad.any():
        r_interp = interp.interp1d(Time[~r_bad], R_Raw[i,~r_bad], kind='cubic', assume_sorted=True)
        R_Raw[i][r_bad] = r_interp(Time[r_bad])
    
    #print(i)
    
    ### Visualize interpolated points
#     plt.scatter(Time[~r_bad], R_Raw[i,~r_bad], color='blue')
#     plt.plot(Time, R_Raw[i], color='blue', alpha=0.2)
#     plt.scatter(Time[r_bad], R_Raw[i,r_bad], color='red')
#     plt.show()
#     break

In [None]:
def MSE(G_Raw, Time, R_Raw = None, 
        num_timelag=0, num_timejump=0,
        CL_PCs = None, num_CL_comp = 1, CL_timelag = 0, CL_timejump = 0, 
        NP_PCs = None, num_NP_comp = 1, NP_timelag = 0, NP_timejump = 0,
        percent_holdout=20, reg_type='all', alpha_max = 100000):

    [neuron_length, neuron_time] = np.shape(G_Raw)

    ### Initialize design matrix M ###
    if R_Raw is not None:
        primary_window = num_timelag + num_timejump + 1
    else:
        primary_window = 0
        num_timelag = 0
        num_timejump = 0
    
    if CL_PCs is not None: 
        CL_window = CL_timelag + CL_timejump + 1
    else:
        num_CL_comp = 0
        CL_window = 0
    
    if NP_PCs is not None: 
        NP_window = NP_timelag + NP_timejump + 1
    else:
        num_NP_comp = 0
        NP_window = 0        
        
    max_window = np.max([primary_window, CL_window, NP_window])
    max_timelag = np.max([num_timelag, CL_timelag, NP_timelag])
    max_timejump = np.max([num_timejump, CL_timejump, NP_timejump])

    full_window = primary_window*neuron_length + CL_window*num_CL_comp + NP_window*num_NP_comp
        
    # Matrix of times x neural values in window, pc projections
    M = np.zeros((neuron_time - max_window + 1, full_window))
    for i in np.arange(max_timelag, neuron_time - max_timejump):
        
        b = 0 
        if R_Raw is not None:
            for j in range(neuron_length):
                M[i - max_timelag][b + j*primary_window:b + (j+1)*primary_window] = R_Raw[j,i-num_timelag:i+num_timejump+1]
        b = b + neuron_length*primary_window
        for k in range(num_CL_comp):
            M[i - max_timelag][b + k*CL_window:b + (k+1)*CL_window] = CL_PCs[k,i-CL_timelag:i+CL_timejump+1]
            
        b = b + num_CL_comp*CL_window
        for z in range(num_NP_comp):
            M[i - max_timelag][b + z*NP_window:b + (z+1)*NP_window] = NP_PCs[z,i-NP_timelag:i+NP_timejump+1]

    
    Y_all = G_Raw.T[max_timelag:neuron_time-max_timejump]
    Time_all = Time[max_timelag:neuron_time-max_timejump]

    #####################################################

    ### Split data into TRAIN and TEST set ###
    
    ### Define the size of the holdout set
    time_index_split = int(len(Y_all)*(1 - percent_holdout/100.0))

    ### Define TRAINing data
    TRAIN_Y_all = Y_all[:time_index_split]
    TRAIN_M = M[:time_index_split]
    TRAIN_Time = Time_all[:time_index_split]
    
    ### Remove mean from Y
    mean_TRAIN_Y = np.mean(TRAIN_Y_all, axis=0)
    TRAIN_Y_all = TRAIN_Y_all - mean_TRAIN_Y

    ### Define TESTing data
    TEST_Y_all = Y_all[time_index_split:] - mean_TRAIN_Y
    TEST_M = M[time_index_split:]
    TEST_Time = Time_all[time_index_split:]

    ####################################################

    ### Run regressions on TRAIN data ###
    
    ARD_mu = None; ARD_alpha = None; ridge_mu = None; standard_mu = None
    
    clf = Ridge(alpha=.1)
    ### Ridge Regression
    if reg_type in ['ridge', 'ridge_std', 'all']:
        for i in range(neuron_length):
            clf.fit(TRAIN_M, TRAIN_Y_all[:,i])
            
    ### Standard Regression
    if reg_type in ['standard', 'ridge_std', 'all']:
        standard_mu = np.linalg.solve(TRAIN_M.T @ TRAIN_M , TRAIN_M.T @ TRAIN_Y_all).T

    ####################################################

    ### Compute MSE on TEST data ###
    ridge_MSE = None; standard_MSE = None; ARD_MSE = None
      
    ### Ridge regression on all neurons
    if reg_type in ['ridge', 'ridge_std', 'all']:
        ridge_MSE = np.zeros((neuron_length))
        for i in range(neuron_length):
            ridge_MSE[i] = np.average((clf.predict(TEST_M)-TEST_Y_all.T[i])**2)
    
    ### Standard regression on all neurons
    if reg_type in ['standard', 'ridge_std', 'all']:
        standard_MSE = np.zeros((neuron_length))
        for i in range(neuron_length):
            standard_MSE[i] = np.average(((TEST_M @ standard_mu[i])- TEST_Y_all.T[i])**2)

    ####################################################

    return {'ridge_mu' : ridge_mu, 'standard_mu' : standard_mu, 'ARD_mu' : ARD_mu,
            'ridge_MSE' : ridge_MSE, 'standard_MSE' : standard_MSE, 'ARD_MSE' : ARD_MSE,
            'M' : M, 'Y_all' : Y_all, 'Time_all' : Time_all, 
            'time_index_split' : time_index_split, 'mean_TRAIN_Y' : mean_TRAIN_Y,
            'ARD_alpha' : ARD_alpha}

In [None]:
outR = MSE(G_Raw,Time,R_Raw,
          #NP_PCs = NP_PCs, num_NP_comp=1, #NP_timelag=10, NP_timejump=10,
          #CL_PCs = CL_PCs, num_CL_comp=1,
          reg_type='ridge_std')
outNP = MSE(G_Raw,Time,
          NP_PCs = NP_PCs, num_NP_comp=1, #NP_timelag=10, NP_timejump=10,
          #CL_PCs = CL_PCs, num_CL_comp=1,
          reg_type='ridge_std')
outCL = MSE(G_Raw,Time,
          NP_PCs = NP_PCs, num_NP_comp=1, #NP_timelag=10, NP_timejump=10,
          CL_PCs = CL_PCs, num_CL_comp=1,
          reg_type='ridge_std')
outNPCL = MSE(G_Raw,Time,
          NP_PCs = NP_PCs, num_NP_comp=1, #NP_timelag=10, NP_timejump=10,
          CL_PCs = CL_PCs, num_CL_comp=1,
          reg_type='ridge_std')

In [None]:
f = plt.figure(figsize=(20,10))
plt.plot(outR['ridge_MSE'].T)
plt.plot(outNP['ridge_MSE'].T)
plt.plot(outNPCL['ridge_MSE'].T)
plt.plot(outCL['ridge_MSE'].T)

if Close:
    plt.title("Ridge MSE (no tail, close pairwise)", size='20')
else:
    plt.title("Ridge MSE (with tail, pairwise)", size="20")
plt.legend(['RFP', 'Neuron positons', 'Neuron pos & centerline', 'Centerline'], fontsize='20')
plt.xlabel('Neuron')
plt.ylabel('MSE')

In [None]:
f,ax = plt.subplots(1,2,figsize=(20,10))
ax[0].plot(outR['ridge_MSE'].T)
ax[0].plot(outNP['ridge_MSE'].T)
ax[0].plot(outNPCL['ridge_MSE'].T)
ax[0].plot(outCL['ridge_MSE'].T)
ax[1].plot(outR['standard_MSE'].T)
ax[1].plot(outNP['standard_MSE'].T)
ax[1].plot(outNPCL['standard_MSE'].T)
ax[1].plot(outCL['standard_MSE'].T)

if Close:
    ax[0].set_title("Ridge MSE (no RFP, no tail, close pairwise)", fontsize='20')
    ax[1].set_title("Standard (no RFP, no tail, close pairwise)", fontsize='20')
else:
    ax[0].set_title("Ridge MSE (with tail, pairwise)")
    ax[1].set_title("Standard (with tail, pairwise)")
ax[0].legend(['RFP', 'Neuron positons', 'Neuron pos & centerline', 'Centerline'], fontsize='14')
ax[1].legend(['RFP', 'Neuron positons', 'Neuron pos & centerline', 'Centerline'], fontsize='14')
ax[0].set_xlabel('Neuron')
ax[0].set_ylabel('MSE')
ax[1].set_xlabel('Neuron')
ax[1].set_ylabel('MSE')