Compute pairwise matrix and find PCA of it

In [None]:
import numpy as np
from matplotlib import animation, pyplot as plt
import seaborn as sns
import pandas as pd
import scipy.linalg as linalg
import scipy.stats as stats
import scipy.interpolate as interp
from scipy.interpolate import interp1d # For some reason using interp continually gives me errors

import warnings
warnings.filterwarnings('ignore')
%pylab inline

In [None]:
# Some switches

# Compute exp(-pairwise distances) set close = True
close = False

# Remove worm tail, set tail = True
tail = False

In [None]:
### Read in worm data from SharedData .npz file
Worm = 'GFP'
worm_data = np.load('../SharedData/Worm'+Worm+'.npz')
print('The loaded npz contains the variables:\n', np.sort([i for i in worm_data]))

G_sig = 'GFP' if Worm == 'GFP' else 'GCaMP'

### Import desired variables
NPos = worm_data['NPos']
Time = worm_data['Time']

NPos = np.transpose(NPos, (1,2,0)) # Reorder: neuron, dimension, time
Time = Time[:,0]

original_neurons,original_dim, original_time = NPos.shape
print('Shape of Neuron Position Matrix:', NPos.shape)

In [None]:
### Fill in NaNs with interpolation
all_bad = np.zeros((len(Time))).astype(bool)
for i in range(len(NPos)):
    for j in range(3):
        
        bad = np.isnan(NPos[i][j]) | (NPos[i][j]==1.0)
        all_bad = all_bad | bad

        interp_pos = interp1d(Time[~bad], NPos[i][j][~bad], kind='linear', 
                              assume_sorted=True, bounds_error=False)
        NPos[i][j][bad] = interp_pos(Time[bad])

        ### Visualize interpolated points
#         plt.scatter(Time[~bad], NPos[i][j][~bad], color='blue')
#         plt.plot(Time, NPos[i][j], color='blue', alpha=0.2)
#         plt.scatter(Time[bad], NPos[i][j][bad], color='red')
#         plt.show()
#         break

### Define the first and last 'good' point (able to be interpolated), and trim accordingly
begin = np.where(~all_bad)[0][0]
end = np.where(~all_bad)[0][-1] + 1
NPos = NPos[:,:,begin:end]
Time = Time[begin:end]

### Transform z-coordinate from volts in pixels
volt_to_pixel = 30
NPos[:,2,:] = NPos[:,2,:]*volt_to_pixel

### Get sizes
num_neuron, dim, num_time = NPos.shape

In [None]:
print(NPos.shape)

In [None]:
if tail:
    # find index of tail
    if Worm == 'S1':
        indx = [i for i,v in enumerate(NPos[:,0,0]) if v > 470]
        indy = [i for i,v in enumerate(NPos[:,1,0]) if v > 350]
        times = [0,1000,2000,2770]
    elif Worm == 'GFP':
        indx = [i for i,v in enumerate(NPos[:,0,0]) if v > 310]
        indy = [i for i,v in enumerate(NPos[:,1,0]) if v > 380]
        times = [0,1000,2000]
    tail_ind = list(set(indx).intersection(indy))
    tail_bool = np.array([i in tail_ind for i in range(0,len(NPos[:,0,0]))])

    # Visualize the neural positions
    print(NPos.shape)
    f, ax = plt.subplots(len(times),1, figsize=(10,40))
    for i,v in enumerate(times):
        ax[i].scatter(NPos[:,0,v], NPos[:,1,v])
        ax[i].scatter(NPos[tail_ind,0,v], NPos[tail_ind,1,v], c = 'r')
    plt.show()

In [None]:
if tail:
    NPostail = NPos[~tail_bool,:,:]
    NPos = np.transpose(NPostail, (2,0,1)) # time, neuron, dimension
    print("Removed tail, NPos shape is: ", NPos.shape)
else:
    NPos = np.transpose(NPos, (2,0,1)) # Reorder: time, neuron, dimension
    print(NPostail.shape)
dists_far = np.zeros((num_time, (num_neuron-1)*num_neuron/2)) # initialize matrix for pairwise distances


In [None]:
### Fill in pairwise distance matrix across time
for i in range(num_time):
    count = 0
    for j in range(len(NPos[i])): # number neurons
        for k in np.arange(j):
            dists_far[i][count] = np.linalg.norm(NPos[i][j] - NPos[i][k]) # function returns euclidean distance
            count = count + 1

In [None]:
# Initialize matrix for pairwise distances e^-lambda*pairwise matrix
if close:    
    l = 2
    dists_close = np.exp(-l*dists_far)
    print("Created e^(-lambda* pairwise distance) matrix with size (%d, %d)" % (dists_close.shape[0], dists_close.shape[1]))

In [None]:
### Decompose distance array with SVD
if close:
    dists = dists_close
else:
    dists = dists_far
d = dists.T # n x t
d0 = d - np.mean(d, axis = 1)[:, None]
U, s, Vt = np.linalg.svd(d0)

In [None]:
### Choose the top components to be saved and save as npz
top_components = 100
NP_PCs = np.hstack((np.zeros((top_components,begin)),Vt[:top_components], np.zeros((top_components,original_time-end))))

if close:
    np.savez_compressed('Worm' + Worm + '_NPosPCA_Close', 
        NP_PCs = NP_PCs, s = s[:top_components], 
                    n = top_components, begin = begin, end = end
                   )
else:
    np.savez_compressed('Worm' + Worm + '_NPosPCA', 
            NP_PCs = NP_PCs, s = s[:top_components], 
                        n = top_components, begin = begin, end = end
                       )

In [None]:
print(NP_PCs.shape, original_time)

In [None]:
### Plot (cummulative) percentage of variance captured by PCs

# plt.scatter(range(10),s[:10]**2/np.sum(s**2))
plt.scatter(range(10),[np.sum(s[:i]**2)/np.sum(s**2) for i in range(10)])
plt.show()

In [None]:
### Visualize projection of data on PCs

plt.plot(d0 @ Vt[0], color ='blue')
# plt.plot(d0.T @ U[1])
# plt.plot(d0.T @ U[1000])
plt.show()