# part0: imports

In [1]:
import os, sys, pathlib
from pprint import pprint 
from importlib import reload
import logging
from typing import Callable
import warnings
import pandas as pd
import numpy as np
import xarray as xr
from sklearn.decomposition import PCA
import scipy.linalg as linalg

import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib
from matplotlib.ticker import MaxNLocator


import pyaldata as pyal

%matplotlib inline

try:
    nbPath = pathlib.Path.cwd()
    RepoPath = nbPath.parent
    os.chdir(RepoPath)

    from tools import utilityTools as utility
    from tools import dataTools as dt
    import params
    defs = params.random_walk_defs
    
    set_rc =  params.set_rc_params
    root = params.root

finally:
    os.chdir(nbPath)

warnings.filterwarnings('ignore')

  from .autonotebook import tqdm as notebook_tqdm


#### Print the summary of all the datassets

check out the results [here](https://github.com/AtMostafa/notebook/blob/main/2021-cca-project/dataset-summary.md).

In [4]:
datadir = root/'random_walk'
_AllAnimalList = ['Chewie', 'Mihili', "MrT"]

_AllAnimalFiles=[]
for animal in _AllAnimalList:
    _AllAnimalFiles.extend(utility.find_file(datadir / animal, 'mat'))
# print(_AllAnimalFiles)

#don't include these sessions since the targets are incorrect
incorrect_target_files = ["Chewie_RT_FF_2013-10-28.mat", "Chewie_RT_VR_2013-12-12.mat", "MrT_RT_VR_2013-09-06.mat", "MrT_RT_VR_2013-09-04.mat"]


['/home/jcc319/multi_animal_alignment/data/random_walk/Chewie/Chewie_RT_VR_2013-12-13.mat', '/home/jcc319/multi_animal_alignment/data/random_walk/Chewie/Chewie_RT_VR_2013-10-11.mat', '/home/jcc319/multi_animal_alignment/data/random_walk/Chewie/Chewie_RT_FF_2013-12-09.mat', '/home/jcc319/multi_animal_alignment/data/random_walk/Chewie/Chewie_RT_FF_2013-12-18.mat', '/home/jcc319/multi_animal_alignment/data/random_walk/Chewie/Chewie_RT_FF_2013-12-17.mat', '/home/jcc319/multi_animal_alignment/data/random_walk/Chewie/Chewie_RT_VR_2013-12-12.mat', '/home/jcc319/multi_animal_alignment/data/random_walk/Chewie/Chewie_RT_FF_2013-10-28.mat', '/home/jcc319/multi_animal_alignment/data/random_walk/Chewie/Chewie_RT_FF_2013-12-10.mat', '/home/jcc319/multi_animal_alignment/data/random_walk/Chewie/Chewie_RT_VR_2013-10-10.mat', '/home/jcc319/multi_animal_alignment/data/random_walk/Chewie/Chewie_RT_CS_2016-10-21.mat', '/home/jcc319/multi_animal_alignment/data/random_walk/Chewie/Chewie_RT_FF_2013-10-29.mat'

In [5]:
raw_df = pyal.mat2dataframe('/home/jcc319/multi_animal_alignment/data/random_walk/Chewie/Chewie_RT_CS_2016-10-21.mat', shift_idx_fields=True)
raw_df.head()


Unnamed: 0,monkey,date,task,target_center,trial_id,result,bin_size,perturbation,epoch,idx_trial_start,idx_trial_end,idx_go_cue,pos,vel,acc,force,M1_spikes,M1_unit_guide,PMd_spikes,PMd_unit_guide
0,Chewie,10-21-2016,RT,"[[-3.697366714477539, 1.5944132804870605], [1....",2,R,0.01,CS,BL,9,336,"[19, 115, 216, 273]","[[-1.6233934029665136, -29.806833037805866], [...","[[9.502502328629195, -4.945520265504228], [9.3...","[[-14.140230801614209, 21.55262261762753], [-2...","[[-0.7399081463540637, 0.3927892477849265], [-...","[[0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,...","[[1, 1], [1, 2], [3, 1], [5, 1], [5, 2], [7, 1...","[[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,...","[[1, 1], [1, 2], [1, 3], [4, 1], [4, 2], [5, 1..."
1,Chewie,10-21-2016,RT,"[[7.688087463378906, -1.52504301071167], [-2.4...",3,R,0.01,CS,BL,9,392,"[19, 108, 198, 313]","[[-0.4807024337198271, -31.617166918356368], [...","[[0.5519463229445638, 3.3281165520009637], [1....","[[75.58603471382368, -25.976122087577867], [60...","[[-0.8368291181572973, 0.224127084643247], [-0...","[[0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[1, 1], [1, 2], [3, 1], [5, 1], [5, 2], [7, 1...","[[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[1, 1], [1, 2], [1, 3], [4, 1], [4, 2], [5, 1..."
2,Chewie,10-21-2016,RT,"[[3.5938963890075684, -0.6197371482849121], [4...",4,R,0.01,CS,BL,9,354,"[19, 115, 212, 286]","[[0.14076750167513552, -31.49352615706792], [0...","[[8.638733219602209, -0.0003180894720955575], ...","[[-24.050014222513006, 51.04746734448047], [-4...","[[-0.9655852790529066, 0.43527411755384016], [...","[[0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[1, 1], [1, 2], [3, 1], [5, 1], [5, 2], [7, 1...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,...","[[1, 1], [1, 2], [1, 3], [4, 1], [4, 2], [5, 1..."
3,Chewie,10-21-2016,RT,"[[-6.889117240905762, -2.445563316345215], [-3...",5,R,0.01,CS,BL,9,368,"[19, 107, 177, 253]","[[0.870940673116614, -31.41725949904925], [0.9...","[[5.21897587142075, -6.338590565622717], [4.49...","[[-71.05251288246204, 24.490942527687388], [-6...","[[-0.892147276176141, 0.4266238142730099], [-0...","[[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,...","[[1, 1], [1, 2], [3, 1], [5, 1], [5, 2], [7, 1...","[[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[1, 1], [1, 2], [1, 3], [4, 1], [4, 2], [5, 1..."
4,Chewie,10-21-2016,RT,"[[-3.336542844772339, 4.224082946777344], [2.7...",6,R,0.01,CS,BL,9,428,"[19, 153, 238, 339]","[[-0.476625434088497, -30.572590053413172], [-...","[[9.122018466739428, -8.087344962504512], [8.9...","[[-10.639360988263086, 29.877631075622283], [-...","[[-0.7295447313649992, 0.3667958069673153], [-...","[[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,...","[[1, 1], [1, 2], [3, 1], [5, 1], [5, 2], [7, 1...","[[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[1, 1], [1, 2], [1, 3], [4, 1], [4, 2], [5, 1..."


In [10]:
rows = []
for file in _AllAnimalFiles:
    if file.split(os.sep)[-1] in incorrect_target_files:
        # print(file)
        continue
    raw_df = pyal.mat2dataframe(file, shift_idx_fields=True)
    df = defs.prep_general(raw_df)

    dic = {
        'animal': file.split(os.sep)[-2],
        'file': file.split(os.sep)[-1],
        'areas': [x for x in list(raw_df.columns) if '_spikes' in x],
        'trials_all': len(raw_df),
        'trials_left': len(df),
        'neurons_all': [raw_df[x][0].shape[1] for x in list(raw_df.columns) if '_spikes' in x],
        'neurons_left': [df[x][0].shape[1] for x in list(df.columns) if '_spikes' in x],
        'MCx_neurons_all': sum([raw_df[x][0].shape[1] for x in list(raw_df.columns) if '_spikes' in x]),
        'MCx_neurons_left': df['MCx_spikes'][0].shape[1] 

    }
    rows.append(dic)

summary = pd.DataFrame(rows)
summary


Unnamed: 0,animal,file,areas,trials_all,trials_left,neurons_all,neurons_left,MCx_neurons_all,MCx_neurons_left
0,Chewie,Chewie_RT_VR_2013-12-13.mat,[M1_spikes],486,157,[67],[54],67,54
1,Chewie,Chewie_RT_VR_2013-10-11.mat,[M1_spikes],509,134,[88],[65],88,65
2,Chewie,Chewie_RT_FF_2013-12-09.mat,[M1_spikes],529,148,[65],[48],65,48
3,Chewie,Chewie_RT_FF_2013-12-18.mat,[M1_spikes],588,154,[62],[53],62,53
4,Chewie,Chewie_RT_FF_2013-12-17.mat,[M1_spikes],634,157,[57],[45],57,45
5,Chewie,Chewie_RT_FF_2013-12-10.mat,[M1_spikes],628,159,[63],[42],63,42
6,Chewie,Chewie_RT_VR_2013-10-10.mat,[M1_spikes],577,147,[74],[31],74,31
7,Chewie,Chewie_RT_CS_2016-10-21.mat,"[M1_spikes, PMd_spikes]",291,288,"[84, 211]","[84, 196, 280]",295,280
8,Chewie,Chewie_RT_FF_2013-10-29.mat,[M1_spikes],592,144,[67],[37],67,37
9,Chewie,Chewie_RT_VR_2013-10-09.mat,[M1_spikes],423,133,[74],[65],74,65


In [11]:
summary.groupby('animal')['MCx_neurons_left'].agg(['mean', 'sem'])

Unnamed: 0_level_0,mean,sem
animal,Unnamed: 1_level_1,Unnamed: 2_level_1
Chewie,72.0,23.370922
Mihili,126.5,9.200543
MrT,49.25,7.993487


Categories I want to separate:

- sessions with 2 areas, similar number of neurons each, as many monkeys as possible!
- 3 sessions for each area (M1, PMd, S1), most number of neurons

In [4]:
_AllAnimalFiles= [path.split(os.sep)[-1] for path in _AllAnimalFiles]

In [6]:
_AllAnimalFiles

[]

In [5]:
summary.areas

AttributeError: 'DataFrame' object has no attribute 'areas'

The code above relies on the host PC having the exact fiels as my laptop.  
So I replace the values below for reliability.

In [25]:
GoodDataList_RW = {'M1':{}, 'PMd':{}}
#-----------------------------------

for area in ['M1', 'PMd']:
    for animal in ['Chewie', 'Mihili', 'MrT']:
        area_idx = [area+'_spikes' in x for x in summary.areas]
        files = summary[area_idx & (summary.animal == animal)].file.values
        GoodDataList_RW[area][animal] = files

GoodDataList_RW

{'M1': {'Chewie': array(['Chewie_RT_VR_2013-12-13.mat', 'Chewie_RT_VR_2013-10-11.mat',
         'Chewie_RT_FF_2013-12-09.mat', 'Chewie_RT_FF_2013-12-18.mat',
         'Chewie_RT_FF_2013-12-17.mat', 'Chewie_RT_VR_2013-12-12.mat',
         'Chewie_RT_FF_2013-10-28.mat', 'Chewie_RT_FF_2013-12-10.mat',
         'Chewie_RT_VR_2013-10-10.mat', 'Chewie_RT_CS_2016-10-21.mat',
         'Chewie_RT_FF_2013-10-29.mat', 'Chewie_RT_VR_2013-10-09.mat'],
        dtype=object),
  'Mihili': array(['Mihili_RT_VR_2014-01-15.mat', 'Mihili_RT_FF_2014-02-14.mat',
         'Mihili_RT_FF_2014-02-24.mat', 'Mihili_RT_FF_2014-02-21.mat',
         'Mihili_RT_VR_2014-01-16.mat', 'Mihili_RT_VR_2014-01-14.mat'],
        dtype=object),
  'MrT': array(['MrT_RT_FF_2013-08-22.mat', 'MrT_RT_VR_2013-09-04.mat',
         'MrT_RT_VR_2013-09-06.mat', 'MrT_RT_VR_2013-09-10.mat',
         'MrT_RT_FF_2013-08-30.mat', 'MrT_RT_FF_2013-08-20.mat'],
        dtype=object)},
 'PMd': {'Chewie': array(['Chewie_RT_CS_2016-10-21.mat'], dt

Adding the aggregate of *M1* and *PMd* as **MCx**

In [26]:
MCx = {}
for area in ['M1', 'PMd']:
    for animal in GoodDataList_RW[area]:
        if animal not in MCx:
            MCx[animal] = []
        MCx[animal].extend(GoodDataList_RW[area][animal])
        MCx[animal] = list(set(MCx[animal]))

GoodDataList_RW['MCx'] = MCx