In [None]:
import numpy as np
from matplotlib import animation, pyplot as plt
import seaborn as sns
import pandas as pd
import scipy.linalg as linalg
import scipy.stats as stats
import scipy.interpolate as interp

import warnings
warnings.filterwarnings('ignore')
%pylab inline 

In [None]:
### Read in worm data from SharedData .npz file
Worm = 'GFP'
Centerline = True
Neuron_Position = False
Close = False

worm_data = np.load('../SharedData/Worm'+Worm+'.npz')
print('The loaded Primary npz contains the variables:\n', np.sort([i for i in worm_data]))

G_sig = 'GFP' if Worm == 'GFP' else 'GCaMP'

### Import desired variables
G_Raw = worm_data['G_Raw']
R_Raw = worm_data['R_Raw']
Time = worm_data['Time']


### Import Centerline Data?
if Centerline:
    CLdata = np.load('../SharedData/Worm_Angles/WormAngle'+Worm+'.npz')
    print('The loaded Centerline npz contains the variables:\n', np.sort([i for i in CLdata]))
    
    CL_PCs = CLdata['proj_neural_thetas'].T


### Import Neuron Position Data?
if Neuron_Position:
    if Close:
        NPdata = np.load('../SharedData/NPos_PCA/Worm'+Worm+'_NPosPCA_Close.npz')
        print("Close neurons with no tail was loaded")
    else:
        NPdata = np.load('../SharedData/NPos_PCA/Worm'+Worm+'_NPosPCA.npz')
    print('The loaded Neuron Position npz contains the variables:\n', np.sort([i for i in NPdata]))
    
    NP_PCs = NPdata['NP_PCs']

In [None]:
### Determine which time columns have NaNs pick first and last w/o any NaNs
badcols = np.array([x|y for (x,y) in zip(np.isnan(G_Raw).any(axis=0), np.isnan(R_Raw).any(axis=0))])
begin_col = np.arange(0,len(Time))[~badcols][0]
end_col = np.arange(0,len(Time))[~badcols][-1] + 1

### Shave off times from before first and last full column
G_Raw = G_Raw[:,begin_col:end_col]
R_Raw = R_Raw[:,begin_col:end_col]
Time = Time[begin_col:end_col, 0]

if Centerline: CL_PCs = CL_PCs[:,begin_col:end_col]
if Neuron_Position: NP_PCs = NP_PCs[:,begin_col:end_col]
    
    
[neuron_length, neuron_time] = np.shape(G_Raw)
print('Neurons:', neuron_length, '\nTime Points:', neuron_time, '\nFrom', Time[0], 's to', Time[-1], 's')

### Fill in NaNs with interpolation
for i in np.arange(len(G_Raw)):
    
    g_bad = np.isnan(G_Raw[i])
    if g_bad.any():
        g_interp = interp.interp1d(Time[~g_bad], G_Raw[i,~g_bad], kind='cubic', assume_sorted=True)
        G_Raw[i][g_bad] = g_interp(Time[g_bad])
    
    r_bad = np.isnan(R_Raw[i])
    if r_bad.any():
        r_interp = interp.interp1d(Time[~r_bad], R_Raw[i,~r_bad], kind='cubic', assume_sorted=True)
        R_Raw[i][r_bad] = r_interp(Time[r_bad])
    
    #print(i)
    
    ### Visualize interpolated points
#     plt.scatter(Time[~r_bad], R_Raw[i,~r_bad], color='blue')
#     plt.plot(Time, R_Raw[i], color='blue', alpha=0.2)
#     plt.scatter(Time[r_bad], R_Raw[i,r_bad], color='red')
#     plt.show()
#     break

In [None]:
f = plt.figure(figsize=(20,10))
for neuron in range(neuron_length):
    plt.plot(R_Raw[neuron,:])
plt.title("GFP RFP worm RFP")

In [None]:
m = mean(R_Raw, axis=1)
v = np.var(R_Raw, axis=1)
print("mean: %1.5f, variance: %1.5f" % (np.mean(m), np.mean(v)))

# Regressing against all RFP

#### Define ridge regression, maximizing marginal likelihood (see Bishop p.167)

In [None]:
### Define function to do ridge regression for an individual neuron
def ridge_regress(X, Y, 
                  XTX = None, eigs = None, 
                  converge_required = 0.001, a = 1.0, b = 1.0,
                  a_max = 100000000,
                  printer = False, verbose = False):
    
    ### Initialize
    a_old = -1.0
    N = len(X)

    if XTX == None:
        XTX = np.dot(X.T,X)

    if eigs == None:
        eigs = np.linalg.eigvals(XTX)

    ### Loop until alpha converges
    iterations = 0
    while abs(a_old - a) > converge_required and a < a_max:
        
        if np.isinf(a) and verbose:
            print("These features are not helpful in prediction for ridge regression")

        # Sigma = (b*XTX + A)^-1
        Sigma = np.linalg.inv(b*XTX + a*np.eye(len(XTX)))

        # mu = b*Sigma*X.T*Y
        mu = b*np.dot(np.dot(Sigma, X.T), Y)

        gamma = np.sum([(b*i)/(a + b*i) for i in eigs])
        a_new = gamma/np.dot(mu.T,mu)

        error = np.sum((Y - np.dot(X, mu))**2)
        b_new = (N - gamma)/error

        # debugging
        if verbose:
            if np.isinf(a):
                print("\tOld: %1.5f, New: %1.5f" %(a_old, a))
        
        
        a_old = a
        a = a_new
        b = b_new
        
        iterations += 1

        if printer:
            print(iterations, "    alpha = ", a, " beta = ", b, " Squared-Error = ", error) 
    converged = abs(a_old-a)< converge_required
    if verbose:
        if converged: 
            print("\tIterations: %d" % (iterations))
        else: 
            print("\tIterations: %d, a_old: %1.5f , Converged within: %1.5f" % (iterations, a_old, abs(a_old-a)))
    conf_int = np.sqrt(np.diag(Sigma)) # See Bishop p.167
    
    ### Return regression weights 'mu', std of weights 'conf_int', squared error of regression 'error'
    ## Converged: boolean whether or not the iterations converged to convergence required. 
    return mu, conf_int, error, converged

#### Define (Group) ARD for selecting only relevant weights

In [None]:
def GroupARD(X, Y, in_group = 1, 
             alpha_max = 100000000, max_iterations = 1000, printer=False):

    N = len(X)
    M = len(X[0])
    
    a = np.repeat(1.0, M) # alphas
    b = 1.0               # Beta = 1/sig^2 

    X1 = X

    deletions = []
    old_alphas = [a]

    for ard_iteration in range(max_iterations):

        # Sigma = (b*XTX + A)^-1
        Sigma = np.linalg.inv(b*np.dot(X1.T,X1) + np.diag(a))

        # mu = b*Sigma*X.T*Y
        mu = b*np.dot(np.dot(Sigma, X1.T), Y)

        gamma = 1.0 - a*np.diag(Sigma)
        group_gamma = np.array([np.sum(gamma[i:i+in_group]) for i in range(0, len(gamma), in_group)])
        mu_squared = mu**2
        group_mu = np.array([np.sum(mu_squared[i:i+in_group]) for i in range(0, len(mu_squared), in_group)])
        a_new = group_gamma/group_mu

        error = np.sum((Y - np.dot(X1, mu))**2)
        b_new = (N - np.sum(gamma))/error

        a = [alpha for alpha in a_new for k in range(in_group)]
        b = b_new

        if printer : print("\nIteration: ", ard_iteration, " beta = ", b, " Squared-Error = ", error)  

        over = [i for i in range(len(a)) if a[i] > alpha_max]
        if over:
            if printer : print("Deletions: ", len(over))
            deletions = [over] + deletions
            X1 = np.delete(X1,over,axis=1)
            a = np.delete(a,over)
        else:
            a_converge = np.sum((a - np.array(old_alphas[-1]))**2)
            if printer : print("Alpha distance = ", a_converge, "   Max alpha = ", np.max(a))
            if a_converge < .00001:
                break
        
        old_alphas.append(a)


    # Recover mu
    for i in deletions:
        for j in i:
            a = np.insert(a,j,-1)
            mu = np.insert(mu,j,0)

    if printer: 
        df = pd.DataFrame(list(zip(a, mu)), columns = ['alpha', 'mu'])
        print("\n", df)
        print("\nDeletions:", np.sum([len(i) for i in deletions])/in_group, 'out of', M/in_group)
        
    return mu, a

# Calculate MSE for different regressions

In [None]:
def MSE(G_Raw, Time, R_Raw, 
        use_R_Raw = True, num_timelag=0, num_timejump=0,
        CL_PCs = None, num_CL_comp = 1, CL_timelag = 0, CL_timejump = 0, 
        NP_PCs = None, num_NP_comp = 1, NP_timelag = 0, NP_timejump = 0,
        percent_holdout=20, reg_type='all', alpha_max = 100000,
        verbose = False):

    [neuron_length, neuron_time] = np.shape(G_Raw)

    ### Initialize design matrix M ###
    if use_R_Raw:
        primary_window = num_timelag + num_timejump + 1
    else:
        primary_window = 0; num_timelag = 0; num_timejump = 0
    
    if CL_PCs is not None: 
        CL_window = CL_timelag + CL_timejump + 1
    else:
        num_CL_comp = 0
        CL_window = 0; CL_timelag = 0; CL_timejump = 0
    
    if NP_PCs is not None: 
        NP_window = NP_timelag + NP_timejump + 1
    else:
        num_NP_comp = 0
        NP_window = 0; NP_timelag = 0; NP_timejump = 0      
        
    max_window = np.max([primary_window, CL_window, NP_window])
    max_timelag = np.max([num_timelag, CL_timelag, NP_timelag])
    max_timejump = np.max([num_timejump, CL_timejump, NP_timejump])

    full_window = primary_window*neuron_length + CL_window*num_CL_comp + NP_window*num_NP_comp
        
    # Matrix of times x neural values in window, pc projections
    M = np.zeros((neuron_time - max_window + 1, full_window))
    for i in np.arange(max_timelag, neuron_time - max_timejump):
        
        b = 0 
        if use_R_Raw:
            for j in range(neuron_length):
                M[i - max_timelag][b + j*primary_window:b + (j+1)*primary_window] = R_Raw[j,i-num_timelag:i+num_timejump+1]
        
        b = b + neuron_length*primary_window
        for k in range(num_CL_comp):
            M[i - max_timelag][b + k*CL_window:b + (k+1)*CL_window] = CL_PCs[k,i-CL_timelag:i+CL_timejump+1]
            
        b = b + num_CL_comp*CL_window
        for z in range(num_NP_comp):
            M[i - max_timelag][b + z*NP_window:b + (z+1)*NP_window] = NP_PCs[z,i-NP_timelag:i+NP_timejump+1]

    
    Y_all = G_Raw.T[max_timelag:neuron_time-max_timejump]
    Time_all = Time[max_timelag:neuron_time-max_timejump]

    #####################################################

    ### Split data into TRAIN and TEST set ###
    
    ### Define the size of the holdout set
    time_index_split = int(len(Y_all)*(1 - percent_holdout/100.0))

    ### Define TRAINing data
    TRAIN_Y_all = Y_all[:time_index_split]
    TRAIN_M = M[:time_index_split]
    TRAIN_Time = Time_all[:time_index_split]
    
    ### Remove mean from Y
    mean_TRAIN_Y = np.mean(TRAIN_Y_all, axis=0)
    TRAIN_Y_all = TRAIN_Y_all - mean_TRAIN_Y

    ### Define TESTing data
    TEST_Y_all = Y_all[time_index_split:] - mean_TRAIN_Y
    TEST_M = M[time_index_split:]
    TEST_Time = Time_all[time_index_split:]

    ####################################################

    ### Run regressions on TRAIN data ###
    
    ARD_mu = None; ARD_alpha = None; ridge_mu = None; standard_mu = None
    
    ### ARD
    if reg_type in ['ard', 'all']:
        
        ### Run GroupARD() on all neruons to get all weights
        ARD_mu = np.zeros((neuron_length, full_window))
        ARD_alpha = np.zeros((neuron_length, full_window))

        for i in range(len(ARD_mu)):
            ARD_mu[i], ARD_alpha[i] = GroupARD(TRAIN_M, TRAIN_Y_all[:,i], in_group=1, alpha_max = alpha_max)
            
    ### Ridge Regression
    if reg_type in ['ridge', 'ridge_std', 'all']:
        
        ### Run ridge_regression() on all neruons to get all weights
        ridge_mu = np.zeros((neuron_length, full_window))
        XTX = TRAIN_M.T @ TRAIN_M
        eigs = np.linalg.eigvals(XTX)

        number_converged = 0
        for i in range(len(ridge_mu)):
            if verbose:
                print("Neuron %d" % i)
            ridge = ridge_regress(TRAIN_M, TRAIN_Y_all[:,i], XTX = XTX, eigs = eigs, verbose = verbose)
            ridge_mu[i] = ridge[0]
            if ridge[3] == True:
                number_converged = number_converged + 1
        # debugging
        print('Number of neurons that converged: %d, total neurons: %d, percentage: %2.1f' %
              (number_converged, len(ridge_mu), number_converged*100/len(ridge_mu)))
      
    ### Standard Regression
    if reg_type in ['standard', 'ridge_std', 'all']:
        standard_mu = np.linalg.solve(TRAIN_M.T @ TRAIN_M , TRAIN_M.T @ TRAIN_Y_all).T

    ####################################################

    ### Compute MSE on TEST data ###
    ridge_MSE = None; standard_MSE = None; ARD_MSE = None
    
    ### Standard regression on single neuron
    single_MSE = np.zeros((neuron_length))
    for i in range(neuron_length):
        slope, intercept = stats.linregress(R_Raw[i,:time_index_split],G_Raw[i,:time_index_split])[:2]
        single_MSE[i] = np.average((slope*R_Raw[i,time_index_split:] + intercept - G_Raw[i,time_index_split:])**2)
      
    ### Ridge regression on all neurons
    if reg_type in ['ridge', 'ridge_std', 'all']:
        ridge_MSE = np.zeros((neuron_length))
        for i in range(neuron_length):
            ridge_MSE[i] = np.average(((TEST_M @ ridge_mu[i]) - TEST_Y_all.T[i])**2)
    
    ### Standard regression on all neurons
    if reg_type in ['standard', 'ridge_std', 'all']:
        standard_MSE = np.zeros((neuron_length))
        for i in range(neuron_length):
            standard_MSE[i] = np.average(((TEST_M @ standard_mu[i])- TEST_Y_all.T[i])**2)

    ### Group ARD on all neurons
    if reg_type in ['ard', 'all']:
        ARD_MSE = np.zeros((neuron_length))
        for i in range(neuron_length):
            ARD_MSE[i] = np.average(((TEST_M @ ARD_mu[i]) - TEST_Y_all.T[i])**2)

    ####################################################

    return {'ridge_mu' : ridge_mu, 'standard_mu' : standard_mu, 'ARD_mu' : ARD_mu,
            'single_MSE' : single_MSE, 'ridge_MSE' : ridge_MSE, 'standard_MSE' : standard_MSE, 'ARD_MSE' : ARD_MSE,
            'M' : M, 'Y_all' : Y_all, 'Time_all' : Time_all, 
            'time_index_split' : time_index_split, 'mean_TRAIN_Y' : mean_TRAIN_Y,
            'ARD_alpha' : ARD_alpha}

In [None]:
### Unpack output from MSE()
out = MSE(G_Raw,Time,R_Raw,
          #use_R_Raw = False,
          #NP_PCs = NP_PCs, num_NP_comp=1,#NP_timelag=10, NP_timejump=10,
          #CL_PCs = CL_PCs, num_CL_comp=1,
          reg_type='ridge')
ridge_mu, standard_mu, single_MSE, ridge_MSE, standard_MSE, ARD_MSE, M, Y_all, Time_all, time_index_split, mean_TRAIN_Y, ARD_mu, ARD_alpha = out['ridge_mu'], out['standard_mu'], out['single_MSE'], out['ridge_MSE'], out['standard_MSE'], out['ARD_MSE'], out['M'], out['Y_all'], out['Time_all'], out['time_index_split'], out['mean_TRAIN_Y'], out['ARD_mu'], out['ARD_alpha']

In [None]:
i = 29
plt.plot(ridge_mu[i])
print(ridge_MSE[i])

In [None]:
### Read in worm data from SharedData .npz file
Worm = 'GFP'
worm_data = np.load('../SharedData/Worm'+Worm+'.npz')
print('The loaded npz contains the variables:\n', np.sort([i for i in worm_data]))

G_sig = 'GFP' if Worm == 'GFP' else 'GCaMP'

### Import desired variables
NPos = worm_data['NPos']
Time = worm_data['Time']

NPos = np.transpose(NPos, (1,2,0)) # Reorder: neuron, dimension, time
Time = Time[:,0]

original_neurons,original_dim, original_time = NPos.shape
print('Shape of Neuron Position Matrix:', NPos.shape)

In [None]:
from scipy.interpolate import interp1d

### Fill in NaNs with interpolation
all_bad = np.zeros((len(Time))).astype(bool)
for i in range(len(NPos)):
    for j in range(3):
        
        bad = np.isnan(NPos[i][j]) | (NPos[i][j]==1.0)
        all_bad = all_bad | bad

        interp_pos = interp1d(Time[~bad], NPos[i][j][~bad], kind='linear', 
                              assume_sorted=True, bounds_error=False)
        NPos[i][j][bad] = interp_pos(Time[bad])

        ### Visualize interpolated points
#         plt.scatter(Time[~bad], NPos[i][j][~bad], color='blue')
#         plt.plot(Time, NPos[i][j], color='blue', alpha=0.2)
#         plt.scatter(Time[bad], NPos[i][j][bad], color='red')
#         plt.show()
#         break

### Define the first and last 'good' point (able to be interpolated), and trim accordingly
begin = np.where(~all_bad)[0][0]
end = np.where(~all_bad)[0][-1] + 1
NPos = NPos[:,:,begin:end]
Time = Time[begin:end]

### Transform z-coordinate from volts in pixels
volt_to_pixel = 30
NPos[:,2,:] = NPos[:,2,:]*volt_to_pixel

### Get sizes
num_neuron, dim, num_time = NPos.shape

In [None]:
NPos = np.transpose(NPos, (2,0,1)) # Reorder: time, neuron, dimension
dists_far = np.zeros((num_time, num_neuron, num_neuron)) # initialize matrix for pairwise distances
### Fill in pairwise distance matrix across time
for i in range(num_time):
    for j in range(len(NPos[i])): # number neurons
        for k in np.arange(j):
            dists_far[i][j][k] = np.linalg.norm(NPos[i][j] - NPos[i][k]) # function returns euclidean distance
            dists_far[i][k][j] = np.linalg.norm(NPos[i][j] - NPos[i][k])

In [None]:
dists = np.mean(dists_far, axis = 0)
dists_var = np.var(dists_far, axis = 0)

In [None]:
# i = 39
#plt.scatter(dists[i]/np.max(dists[i]), abs(ridge_mu[i])/ridge_mu[i,i])
# plt.scatter(dists[i], ridge_mu[i], color='blue')
# plt.scatter(dists[i,i], ridge_mu[i,i,], color='red')
# plt.ylabel('Regression Weight of Neruon')
# plt.xlabel('Average Pairwise Distance')
# plt.figtext(0.7,0.8, "pMSE = " + str(int(ridge_MSE[i])), fontsize=14)
# plt.figtext(0.7,0.75, "n = " + str(i), fontsize=14)
# plt.scatter(ridge_mu[i], dists_var[i])
# plt.ylim([-5,250])

neuron_n = [3,8,10,28]  # List of neurons to be observed

f ,ax = plt.subplots(len(neuron_n),1, figsize=(8,4*len(neuron_n)))
for i,j in enumerate(neuron_n):
    ax[i].scatter(dists[j], ridge_mu[j], color='blue')
    ax[i].scatter(dists[j,j], ridge_mu[j,j], color='red')
    
    ax[i].text(0.7,0.8, "pMSE = " + str(int(ridge_MSE[j])), fontsize=14, transform=ax[i].transAxes)
    ax[i].text(0.7,0.73, "n = " + str(j), fontsize=14, transform=ax[i].transAxes)
    ax[i].set_xlabel('Average Pairwise Distance')
    ax[i].set_ylabel('Regression Weight of Neruon')

In [None]:
results = []
errors = []
for i in np.arange(10):
    print(i)
#     temp = MSE(R_Raw,G_Raw,Time, NP_PCs = NP_PCs, num_NP_comp=i, NP_timelag=5, NP_timejump=5)
#     temp = MSE(R_Raw,G_Raw,Time, CL_PCs = CL_PCs, num_CL_comp=6, CL_timelag=i, CL_timejump=i)
    temp = MSE(R_Raw,G_Raw,Time, num_timelag=i, num_timejump=i)
    results.append(temp)
    errors.append(temp['ridge_MSE']/temp['single_MSE'])
    
print(pd.DataFrame(np.array(errors).T))

In [None]:
plt.hist(errors[0], bins=60, range = (0,1.5), color = 'red', alpha=0.5, cumulative=True)
plt.hist(errors[1], bins=60, range = (0,1.5), color = 'green', alpha=0.5, cumulative=True)
plt.hist(errors[2], bins=60, range = (0,1.5), color = 'blue', alpha=0.5, cumulative=True)
plt.show()

In [None]:
for i in range(len(errors)):
    print(i, np.mean(errors[i]), np.median(errors[i]))

In [None]:
for i in range(len(errors)):
    print(i, np.mean(errors[i]), np.median(errors[i]))

In [None]:
for i in range(len(errors)):
    print(i, np.mean(errors[i]), np.median(errors[i]))

In [None]:
# plt.plot(standard_mu[0], color = 'blue')
plt.plot(ridge_mu[0], color = 'green')
# plt.plot(ARD_mu[0], color = 'red')
plt.show()

In [None]:
np.arange(7)

In [None]:
a = np.array([[2,2],[4,4]])
np.average(a)

## Testing if Neuron Position and Centerline make a difference to MSE

Setting close to True

In [None]:
### Unpack output from MSE()
outNone = MSE(G_Raw,Time,R_Raw,
          reg_type='ridge_std')

### Unpack output from MSE()
outNP = MSE(G_Raw,Time,R_Raw,
          NP_PCs = NP_PCs, num_NP_comp=1, #NP_timelag=10, NP_timejump=10,
          reg_type='ridge_std')

outNPCL = MSE(G_Raw,Time,R_Raw,
          NP_PCs = NP_PCs, num_NP_comp=1, #NP_timelag=10, NP_timejump=10,
          CL_PCs = CL_PCs, num_CL_comp=1,
          reg_type='ridge_std')

outCL = MSE(G_Raw,Time,R_Raw,
          CL_PCs = CL_PCs, num_CL_comp=1,
          reg_type='ridge_std')

In [None]:
f,ax = plt.subplots(1,2,figsize=(20,10))
ax[0].plot(outNone['ridge_MSE'].T)
ax[0].plot(outNP['ridge_MSE'].T)
ax[0].plot(outNPCL['ridge_MSE'].T)
ax[0].plot(outCL['ridge_MSE'].T)
ax[1].plot(outNone['standard_MSE'].T)
ax[1].plot(outNP['standard_MSE'].T)
ax[1].plot(outNPCL['standard_MSE'].T)
ax[1].plot(outCL['standard_MSE'].T)

if Close:
    ax[0].set_title("Ridge MSE (no tail, close pairwise)")
    ax[1].set_title("Standard (no tail, close pairwise)")
else:
    ax[0].set_title("Ridge MSE (with tail, pairwise)")
    ax[1].set_title("Standard (with tail, pairwise)")
ax[0].legend(['None', 'Neuron positons', 'Neuron pos & centerline', 'Centerline'])
ax[1].legend(['None', 'Neuron positons', 'Neuron pos & centerline', 'Centerline'])
ax[0].set_xlabel('Neuron')
ax[0].set_ylabel('MSE')
ax[1].set_xlabel('Neuron')
ax[1].set_ylabel('MSE')

In [None]:
ridge_MSE = np.zeros((10, neuron_length))
standard_MSE = np.empty_like(ridge_MSE)
for i in np.arange(10):
    out = MSE(G_Raw,Time,R_Raw,
                  NP_PCs = NP_PCs, num_NP_comp=1, NP_timelag=i, NP_timejump=i,
                  CL_PCs = CL_PCs, num_CL_comp=1,
                  reg_type='ridge_std')
    ridge_MSE[i,:] = out['ridge_MSE']
    standard_MSE[i,:] = out['standard_MSE']

In [None]:
f, ax = plt.subplots(1,2, figsize=(20,10))
legend_names = []
for i in np.arange(10):
    legend_names.append("%d timelag"%i)
    ax[0].plot(ridge_MSE[i,:])
    ax[1].plot(standard_MSE[i,:])
    
if Close:
    ax[0].set_title("Ridge MSE exp pairwise distances\nno tail with centerlines")
    ax[1].set_title("Standard MSE exp pairwise distances\nno tail with centerlines")
else:
    ax[0].set_title("Ridge MSE pairwise distances\nwith tail with centerlines")
    ax[1].set_title("Standard MSE pairwise distances\nwith tail with centerlines")
ax[0].legend(legend_names)
ax[1].legend(legend_names)
ax[0].set_xlabel('Neuron')
ax[0].set_ylabel('MSE')
ax[1].set_xlabel('Neuron')
ax[1].set_ylabel('MSE')

# Regress w/o RFP


In [None]:
outNone = MSE(G_Raw,Time, R_Raw,
          #NP_PCs = NP_PCs, num_NP_comp=1, #NP_timelag=10, NP_timejump=10,
          #CL_PCs = CL_PCs, num_CL_comp=1,
          reg_type='ridge_std', use_R_Raw = True)

In [None]:
outNP = MSE(G_Raw,Time, R_Raw, 
          NP_PCs = NP_PCs, num_NP_comp=1, #NP_timelag=10, NP_timejump=10,
          #CL_PCs = CL_PCs, num_CL_comp=1,
          reg_type='ridge_std', use_R_Raw = False)

outNPCL = MSE(G_Raw,Time, R_Raw, 
          NP_PCs = NP_PCs, num_NP_comp=1, #NP_timelag=10, NP_timejump=10,
          CL_PCs = CL_PCs, num_CL_comp=1,
          reg_type='ridge_std', use_R_Raw = False)

outCL = MSE(G_Raw,Time, R_Raw, 
          #NP_PCs = NP_PCs, num_NP_comp=1, #NP_timelag=10, NP_timejump=10,
          CL_PCs = CL_PCs, num_CL_comp=1,
          reg_type='ridge_std', use_R_Raw = False)

In [None]:
f,ax = plt.subplots(1,2,figsize=(20,10))
ax[0].plot(outNone['ridge_MSE'].T)
ax[0].plot(outNP['ridge_MSE'].T)
ax[0].plot(outNPCL['ridge_MSE'].T)
ax[0].plot(outCL['ridge_MSE'].T)
ax[1].plot(outNone['standard_MSE'].T)
ax[1].plot(outNP['standard_MSE'].T)
ax[1].plot(outNPCL['standard_MSE'].T)
ax[1].plot(outCL['standard_MSE'].T)

if Close:
    ax[0].set_title("Ridge MSE No RFP (no tail, close pairwise)")
    ax[1].set_title("Standard (no tail, close pairwise)")
else:
    ax[0].set_title("Ridge MSE No RFP (with tail, pairwise)")
    ax[1].set_title("Standard (with tail, pairwise)")
ax[0].legend(['RFP', 'Neuron positons', 'Neuron pos & centerline', 'Centerline'])
ax[1].legend(['RFP', 'Neuron positons', 'Neuron pos & centerline', 'Centerline'])
ax[0].set_xlabel('Neuron')
ax[0].set_ylabel('MSE')
ax[1].set_xlabel('Neuron')
ax[1].set_ylabel('MSE')

## Extra Functions!

* Look at distribution of NaNs in original data

In [None]:
### Get list of columns with any NaN present
badcols = np.array([x|y for (x,y) in zip(np.isnan(G_Raw).any(axis=0), np.isnan(R_Raw).any(axis=0))])

### NaN Distribution Information
pd.set_option('display.max_rows', None)

print(len(np.nonzero(badcols)[0]), 'columns/times containing NaNs were found')

print('\nNaN Count by Time Column, Out of', len(G_Raw), 'Neurons')
print(pd.DataFrame(np.column_stack((Time[badcols],np.sum(np.isnan(G_Raw[:,badcols]), axis=0),np.sum(np.isnan(R_Raw[:,badcols]), axis=0))), 
                   index = np.nonzero(badcols)[0], columns=['Time', 'G_Raw', 'R_Raw']))

print('\nNaN Count by Neruon, Out of', len(G_Raw[0]), 'Time Points')
print(pd.DataFrame(np.column_stack((np.sum(np.isnan(G_Raw), axis=1),np.sum(np.isnan(R_Raw), axis=1))), 
                   columns=['G_Raw', 'R_Raw']))

* Remove all columns containing any NaN, instead of interpolating

In [None]:
### Remove Columns containing NaNs
badcols = np.array([x|y for (x,y) in zip(np.isnan(G_Raw).any(axis=0), np.isnan(R_Raw).any(axis=0))])

G_Raw = G_Raw[:,~badcols]
R_Raw = R_Raw[:,~badcols]
Time = Time[~badcols]

[neuron_length, neuron_time] = np.shape(G_Raw)

* Make a heat map of GFP/GCaMP & RFP

In [None]:
### Heat Map of GFP/GCaMP & RFP across trial
f ,ax = plt.subplots(1,2, figsize=(16,10*2))
ax[0].imshow(G_Raw, aspect=10,cmap="Paired")
ax[1].imshow(R_Raw, aspect=10,cmap="Paired")
for axis, title in zip(ax, [G_sig, r'RFP']):
    axis.set_title(title)
    axis.set_xlabel(r'Frame #')
    axis.set_ylabel(r'Neuron #')

* Examine Raw Fluorescence statistics within worm

In [None]:
### Visualize distribution of Raw fluorescence statistics across neurons
p = 8 # fluorescence percentile

f, ax = plt.subplots(2,4, figsize=(20,10))

ax[0][0].hist(np.min(G_Raw, axis=1), color='green')
ax[0][1].hist(np.percentile(G_Raw, p, axis=1), color='green')
ax[0][2].hist(np.median(G_Raw, axis=1), color='green')
ax[0][3].hist(np.max(G_Raw, axis=1), color='green')

ax[1][0].hist(np.min(R_Raw, axis=1), color='red')
ax[1][1].hist(np.percentile(R_Raw, p, axis=1), color='red')
ax[1][2].hist(np.median(R_Raw, axis=1), color='red')
ax[1][3].hist(np.max(R_Raw, axis=1), color='red')

ax[0][0].set_title(G_sig + ', Minimum Raw', fontsize = 16)
ax[0][1].set_title(G_sig + ', ' + str(p) + 'th Percentile Raw', fontsize = 16)
ax[0][2].set_title(G_sig + ', Median Raw', fontsize = 16)
ax[0][3].set_title(G_sig + ', Maximum Raw', fontsize = 16)
ax[1][0].set_title('RFP, Minimum Raw', fontsize = 16)
ax[1][1].set_title('RFP, ' + str(p) + 'th Percentile Raw', fontsize = 16)
ax[1][2].set_title('RFP, Median Raw', fontsize = 16)
ax[1][3].set_title('RFP, Maximum Raw', fontsize = 16)

plt.tight_layout()
plt.savefig('../Data/Worm'+Worm+'Raw_Fluorescence_Dist.pdf')

* Visualize raw activity (optionally rescaled) of particular neruons

In [None]:
### Visualize Raw Activity of specific neruon (optionally rescaled to 0-1)
def rescale(a):
    return [(i-np.min(a))/(np.max(a)-np.min(a)) for i in a]

n = 12
f = plt.figure(figsize=(20,10))

plt.plot(Time, rescale(G_Raw[n]), color = 'green')
plt.plot(Time, rescale(R_Raw[n]), color = 'red')

# plt.plot(Time, G_Raw[n], color = 'green')
# plt.plot(Time, R_Raw[n], color = 'red')

plt.xlim([0,Time[-1]])

plt.show()
print('Neuron: ', n)
print('            Min       %  Median     Max')
print('G_Raw: ', '%7.1f' % np.min(G_Raw[n]), '%7.1f' % np.percentile(G_Raw[n],8), '%7.1f' % np.median(G_Raw[n]), '%7.1f' % np.max(G_Raw[n]))
print('R_Raw: ', '%7.1f' % np.min(R_Raw[n]), '%7.1f' % np.percentile(R_Raw[n],8), '%7.1f' % np.median(R_Raw[n]), '%7.1f' % np.max(R_Raw[n]))

* Do basic linear regression of GFP/GCaMP against RFP

In [None]:
### Caclualte regression info of GFP/GCaMP against RFP
slopes = np.zeros(neuron_length)
r_values = np.zeros(neuron_length)
intercepts = np.zeros(neuron_length)

for i in range(neuron_length):
    slope, intercept, r_value, p_value, std_err = stats.linregress(R_Raw[i],G_Raw[i])
    slopes[i] = slope
    r_values[i] = r_value
    intercepts[i] = intercept

* Create Coefficeint of Varaiation figure for RFP vs GFP w/ r^2 as colorbar

In [None]:
### Display Coefficeint of Variation for RFP vs GFP w/ r^2 as colorbar
def onpick3(event):
    ind = event.ind
    print('onpick3 scatter:', ind)

f = plt.figure(figsize=(10,10))
plt.scatter(stats.variation(R_Raw, axis=1),stats.variation(G_Raw, axis=1), c = r_values, cmap='hot', picker=True)
plt.scatter(np.var(R_Raw, axis=1),np.var(G_Raw, axis=1), c = r_values, cmap='hot', picker=True)
plt.plot(np.arange(0.01,0.6,.01),np.arange(0.01,0.6,.01))
plt.xlim([0,.6])
plt.ylim([0,.6])
plt.xlabel('Cof. of Var. RFP Raw')
plt.ylabel('Cof. of Var. GFP Raw')
plt.colorbar(label=r'$r^2$')

slope, intercept, r_value, p_value, std_err = stats.linregress(stats.variation(R_Raw, axis=1),stats.variation(G_Raw, axis=1))
print(slope, intercept, r_value**2, p_value, std_err)
plt.plot(np.arange(0.01,0.6,.01), intercept + slope*np.arange(0.01,0.6,.01), 'g--')

f.canvas.mpl_connect('pick_event', onpick3)
plt.show()

#plt.savefig('Coeffcient_of_Variation.pdf')

* Visualize fit of weights to neural activity

In [None]:
### Define which weights to use & neruons to see
weights = ridge_mu
neuron_n = [52,55,12,42]  # List of neurons to be observed

f ,ax = plt.subplots(len(neuron_n),2, figsize=(16,4*len(neuron_n)))
for i,j in enumerate(neuron_n):
#     ax[i][0].plot(Time_all, Y_all.T[j]) # Actual activity
#     ax[i][0].plot(Time_all, (M @ weights[j])) # Recovered Activity using Estimated Weights
#     ax[i][1].plot(Time_all, (M @ weights[j]) - Y_all.T[j]) # Error
    
    ax[i][0].plot(Time_all[time_index_split:], Y_all.T[j,time_index_split:]) # Actual activity
    ax[i][0].plot(Time_all[time_index_split:], (M @ weights[j] + mean_TRAIN_Y[j])[time_index_split:]) # Recovered Activity using Estimated Weights
    #ax[i][0].plot(Time_all[time_index_split:], (M @ ARD_mu[j] + mean_TRAIN_Y[j])[time_index_split:]) # Recovered Activity using Estimated Weights
    ax[i][1].plot(Time_all[time_index_split:], ((M @ weights[j]) - Y_all.T[j] + mean_TRAIN_Y[j])[time_index_split:]) # Error
    
    ax[i][0].set_ylabel(r'Neuron ' + str(neuron_n[i]))
    
for axis, title in zip(ax[0], [r'Actual Activity & Estimated Activity', r'Error']):
    axis.set_title(title)
    
plt.tight_layout()
plt.show()

In [None]:
### Define which weights to use & neruons to see
weights = ridge_mu
neuron_n = [12,42]  # List of neurons to be observed

f ,ax = plt.subplots(len(neuron_n),1, figsize=(12,4*len(neuron_n)))
for i,j in enumerate(neuron_n):
#     ax[i][0].plot(Time_all, Y_all.T[j]) # Actual activity
#     ax[i][0].plot(Time_all, (M @ weights[j])) # Recovered Activity using Estimated Weights
#     ax[i][1].plot(Time_all, (M @ weights[j]) - Y_all.T[j]) # Error
    
    ax[i].plot(Time_all[time_index_split:], Y_all.T[j,time_index_split:], label='Actual Activity') # Actual activity
    ax[i].plot(Time_all[time_index_split:], (M @ weights[j] + mean_TRAIN_Y[j])[time_index_split:], label='Predicted Activity') # Recovered Activity using Estimated Weights
 
    ax[i].set_title(r'Neuron #' + str(neuron_n[i]))
    ax[i].set_xlabel(r'Time (s)')
    ax[i].set_ylabel(r'Raw GFP Fluorescence')
    ax[i].legend()
    
plt.tight_layout()
plt.show()

In [None]:
np.arange(0,5)

In [None]:
a[1:1]

In [None]:
np.maximum(0,2,3)