In [16]:
import sys
#sys.path.append("/home/tom/python/dPCA/python")
sys.path.append("/Users/nick/Dropbox/dynamic_brain/dPCA/python")
from dPCA import dPCA

import numpy as np
import pandas as pd
import sklearn.decomposition as deco
import scipy.ndimage as img
import os
#import sys

# import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
#plt.style.use('ggplot')

from allensdk.core.brain_observatory_cache import BrainObservatoryCache
# from allensdk.brain_observatory.drifting_gratings import DriftingGratings


Get dataset and driftingGratings objects

In [18]:
#manifest_path = os.path.join('/media/tom/Brain2016/','BrainObservatory','manifest.json')
manifest_path = os.path.join('/Volumes/Brain2016/','BrainObservatory','manifest.json')
boc = BrainObservatoryCache(manifest_file=manifest_path)
select = False
n_max = 10

df_expts = pd.DataFrame(boc.get_experiment_containers())
df_expts['fa'] = None
df_expts['pca'] = None
df_expts['dpca'] = None

for i, expt_container_id in enumerate(df_expts['id']):
    _, _, matAvg, ndAvg = extract_data_dg(boc, expt_container_id, selectcells=select)
    n_components = min([n_max, matAvg.shape[1]])
    result = pc_analysis(matAvg, n_components)
    df_expts.at[i, 'pca'] = result['ev']
    result = factor_analysis(matAvg, n_components)
    df_expts.at[i, 'fa'] = result['ev']
    result = dpc_analysis(ndAvg, matAvg, n_components)
    df_expts.at[i, 'dpca'] = result['ev']
    
df_expts.to_json('results_dg_rand_1.json')

In [17]:
def extract_data_dg(boc, expt_container_id, selectcells=False):
    cell_ids_example = None
    
    interlength = 30
    sweeplength = 60
    tlength = interlength + sweeplength
    typeStim = 'drifting_gratings'
    typeSession = 'three_session_A'
    
    expt_session_frame = pd.DataFrame(boc.get_ophys_experiments(experiment_container_ids=[expt_container_id]))
    session_id = expt_session_frame[expt_session_frame.session_type==typeSession].id.values[0]  
    data_set = boc.get_ophys_experiment_data(ophys_experiment_id = session_id)  
    stim_table = data_set.get_stimulus_table(typeStim)
    
    if selectcells:
        #path = '/media/Storage/Dropbox/dynamic_brain/chaoqing/subset_cells/exp-' + str(session_id) + '_cell_id_subset.dat'
        path = 'subset_cells/exp-' + str(session_id) + '_cell_id_subset.dat'
        good_cell = np.loadtxt(path)
        cell_specimens_df = pd.DataFrame(boc.get_cell_specimens())
        cell_ids_example = [cell for cell in good_cell if cell_specimens_df[cell_specimens_df.cell_specimen_id==cell].p_dg.values[0]<0.05]
        index = np.argsort(data_set.get_cell_specimen_indices(cell_ids_example))
        cell_ids_example = np.array(cell_ids_example)[index]
        cell_ids_example = np.sort(np.random.choice(data_set.get_cell_specimen_indices(), len(cell_ids_example), replace=False))
        

    time, dff = data_set.get_dff_traces(cell_ids_example)
    timetrial = time[range(tlength)]
    N = dff.shape[0]
    sweep_response = pd.DataFrame(index=stim_table.index.values, columns=np.arange(N).astype(str))
    for i in range(stim_table.shape[0]):
        for j in range(N):
            sweep_response.at[i,str(j)] = 100*dff[
                j, stim_table.at[i,'start'] + range(tlength)]


    df_all = pd.concat([sweep_response, stim_table], axis=1)
    matAll = np.column_stack([np.concatenate([df_all.iat[i,j] for i in range(df_all.shape[0])]) 
                           for j in range(N)])

    all_mean = df_all.groupby(['orientation','temporal_frequency']).apply(lambda x: np.sum(x, axis=0)/len(x))
    all_mean = all_mean[all_mean.temporal_frequency != 0] #cut static grating 
    all_mean['i'] = range(all_mean.shape[0])
    matAvg = np.column_stack([np.concatenate([all_mean.iat[i,j] for i in range(all_mean.shape[0])]) 
                           for j in range(N)])

    df_multi = all_mean.set_index(['orientation','temporal_frequency']) #cell, tf, ori, time
    ndAvg = np.stack([np.stack([np.stack([df_multi.at[(i1,i2),str(j)] 
                                          for i1 in np.unique(df_multi.index.get_level_values(0))],0)
                                for i2 in np.unique(df_multi.index.get_level_values(1))],0)
                      for j in range(N)],0)

    
#     df_all.to_csv('data/'+str(expt_container_id)+'_df_all.csv')
    return all_mean, matAll, matAvg, ndAvg

def factor_analysis(mat, n_components):
    mat = mat - mat.mean(axis=0, keepdims=True)
    totVar = np.sum(np.square(mat))
    expVar = np.zeros(n_components)
    ll = np.zeros(n_components)
    for ni in range(n_components):
        dec = deco.FactorAnalysis(ni+1).fit(mat)
        matReduced = dec.transform(mat)
        matRcons = np.dot(matReduced,dec.components_)
        expVar[ni] = 1 - np.sum(np.square(mat - matRcons))/totVar
    return {'obj':dec, 'ev':expVar, 'trans':matReduced, 'enc':dec.components_}

def pc_analysis(mat, n_components):
    dec = deco.PCA(n_components, whiten=False).fit(mat)
    ev = np.cumsum(dec.explained_variance_ratio_)
    return {'obj':dec, 'ev':ev, 'trans':dec.transform(mat), 'enc':dec.components_}

def dpc_analysis(mat, matAvg, ncomps):
    combinedParams = {'f': ['f', 'ft'] , 'o': ['o', 'ot'], 'fo': ['fo', 'fot']}
    dec = dPCA(labels='fot', n_components=ncomps, join=combinedParams).fit(mat)
    V = np.hstack(dec.P.values())
    W = np.hstack(dec.D.values())

    # flipping axes such that all encoders have more positive values
    toFlip = np.nonzero(np.sum(V, axis=0)<0)
    W[:, toFlip] = -W[:, toFlip]
    V[:, toFlip] = -V[:, toFlip]

    X = ndAvg.reshape((ndAvg.shape[0],-1))
    X -= np.mean(X, axis=1, keepdims=True)
    totalVar = np.sum(np.square(X))

    Z = np.dot(W.T,X)
    explVar = [1 - np.sum(np.square(X - np.outer(V[:,i],Z[i,:])))/totalVar for i in range(W.shape[1])]
    order = np.argsort(explVar)[::-1]
    explVar = np.array(explVar)[order[:ncomps]]

    W = W[:,order[:ncomps]]
    V = V[:,order[:ncomps]]
    Z = np.dot(W.T,X)
    cumVar = [1 - np.sum(np.square(X - np.dot(V[:,:i+1],Z[:i+1,:])))/totalVar for i in range(W.shape[1])]
    return {'obj':dec, 'ev':cumVar, 'trans':Z.T, 'enc':W}