# part0: imports

In [9]:
import os, sys, pathlib
from pprint import pprint 
from importlib import reload
import logging
from typing import Callable
import warnings
import pandas as pd
import numpy as np
import xarray as xr
from sklearn.decomposition import PCA
import scipy.linalg as linalg

import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib
from matplotlib.ticker import MaxNLocator


import pyaldata as pyal

%matplotlib inline

try:
    nbPath = pathlib.Path.cwd()
    RepoPath = nbPath.parent
    os.chdir(RepoPath)

    from tools import utilityTools as utility
    from tools import dataTools as dt
    import params
    defs = params.random_walk_defs
    
    set_rc =  params.set_rc_params
    root = params.root

finally:
    os.chdir(nbPath)

warnings.filterwarnings('ignore')

#### Print the summary of all the datassets

check out the results [here](https://github.com/AtMostafa/notebook/blob/main/2021-cca-project/dataset-summary.md).

In [10]:
datadir = root/'random_walk'
_AllAnimalList = ['Chewie', 'Mihili', "MrT"]

_AllAnimalFiles=[]
for animal in _AllAnimalList:
    _AllAnimalFiles.extend(utility.find_file(datadir / animal, 'mat'))
# print(_AllAnimalFiles)


In [11]:
rows = []
for file in _AllAnimalFiles:
    raw_df = pyal.mat2dataframe(file, shift_idx_fields=True)
    df = defs.prep_general(raw_df)

    dic = {
        'animal': file.split(os.sep)[-2],
        'file': file.split(os.sep)[-1],
        'areas': [x for x in list(raw_df.columns) if '_spikes' in x],
        'trials_all': len(raw_df),
        'trials_left': len(df),
        'neurons_all': [raw_df[x][0].shape[0] for x in list(raw_df.columns) if '_spikes' in x],
        'neurons_left': [df[x][0].shape[0] for x in list(df.columns) if '_spikes' in x]
    }
    rows.append(dic)

summary = pd.DataFrame(rows)
summary


Unnamed: 0,animal,file,areas,trials_all,trials_left,neurons_all,neurons_left
0,Chewie,Chewie_RT_VR_2013-12-13.mat,[M1_spikes],486,157,[366],[122]
1,Chewie,Chewie_RT_VR_2013-10-11.mat,[M1_spikes],509,134,[534],[178]
2,Chewie,Chewie_RT_FF_2013-12-09.mat,[M1_spikes],529,148,[316],[151]
3,Chewie,Chewie_RT_FF_2013-12-18.mat,[M1_spikes],588,154,[297],[99]
4,Chewie,Chewie_RT_FF_2013-12-17.mat,[M1_spikes],634,157,[595],[198]
5,Chewie,Chewie_RT_VR_2013-12-12.mat,[M1_spikes],553,164,[345],[115]
6,Chewie,Chewie_RT_FF_2013-10-28.mat,[M1_spikes],530,135,[387],[129]
7,Chewie,Chewie_RT_FF_2013-12-10.mat,[M1_spikes],628,159,[319],[106]
8,Chewie,Chewie_RT_VR_2013-10-10.mat,[M1_spikes],577,147,[334],[111]
9,Chewie,Chewie_RT_CS_2016-10-21.mat,"[M1_spikes, PMd_spikes]",291,288,"[347, 347]","[115, 115, 115]"


In [32]:
raw_df = pyal.mat2dataframe(_AllAnimalFiles[0], shift_idx_fields=True)
raw_df.head()

Unnamed: 0,monkey,date,task,target_center,trial_id,result,bin_size,perturbation,perturbation_info,epoch,idx_trial_start,idx_go_cue,idx_trial_end,pos,vel,acc,force,M1_spikes,M1_unit_guide
0,Chewie,12-13-2013,RT,"[[6.837241172790527, 8.959444046020508], [-1.1...",1,R,0.01,VR,0.52,BL,9,"[19, 102, 202, 275]",355,"[[5.442727287317725, -34.53324075887817], [5.4...","[[0.5936249226750506, 2.3695806261322594], [0....","[[-45.55166384606839, 3.155726126529143], [-40...","[[-0.419910379339606, -0.20573448234655436], [...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[1, 1], [2, 1], [2, 2], [2, 3], [2, 4], [3, 1..."
1,Chewie,12-13-2013,RT,"[[-1.099478840827942, 4.949634552001953], [3.2...",2,R,0.01,VR,0.52,BL,9,"[19, 94, 193, 279]",329,"[[7.150144087244177, -30.07332136452093], [7.1...","[[-2.1868142899825243, -6.277170989189995], [-...","[[-19.056730327350383, 2.317259234457083], [-2...","[[-0.240245946959191, -0.5210908728563152], [-...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[1, 1], [2, 1], [2, 2], [2, 3], [2, 4], [3, 1..."
2,Chewie,12-13-2013,RT,"[[2.4766507148742676, 3.414874315261841], [7.3...",3,R,0.01,VR,0.52,BL,9,"[19, 105, 190, 285]",345,"[[10.62163852196618, -27.132327682368725], [10...","[[-2.0678070329226634, -11.473831692165849], [...","[[-28.73019529611891, -6.314640473534496], [-4...","[[-0.2036002095909978, -0.6750728584776086], [...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[1, 1], [2, 1], [2, 2], [2, 3], [2, 4], [3, 1..."
3,Chewie,12-13-2013,RT,"[[0.0, 0.04888395592570305], [0.0, 4.018441200...",5,R,0.01,VR,0.52,BL,9,"[19, 95, 174, 248]",309,"[[-2.9030662764410575, -35.96134310799778], [-...","[[3.9853114746731992, 6.335575658141003], [4.2...","[[31.62734996546982, -32.1547799005031], [22.3...","[[-0.5048118881513025, -0.1718568604114036], [...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[1, 1], [2, 1], [2, 2], [2, 3], [2, 4], [3, 1..."
4,Chewie,12-13-2013,RT,"[[3.652657985687256, 1.049389362335205], [-6.7...",7,R,0.01,VR,0.52,BL,9,"[19, 116, 182, 247]",295,"[[2.9404847628016206, -32.38023284736009], [2....","[[-1.4271695596564575, 0.6422790996897157], [-...","[[-44.925787834115035, -26.214239352377636], [...","[[-0.3546302390789349, -0.023329020534123612],...","[[0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[1, 1], [2, 1], [2, 2], [2, 3], [2, 4], [3, 1..."


Categories I want to separate:

- sessions with 2 areas, similar number of neurons each, as many monkeys as possible!
- 3 sessions for each area (M1, PMd, S1), most number of neurons

In [12]:
_AllAnimalFiles= [path.split(os.sep)[-1] for path in _AllAnimalFiles]

In [21]:
summary.areas


0                 [M1_spikes]
1                 [M1_spikes]
2                 [M1_spikes]
3                 [M1_spikes]
4                 [M1_spikes]
5                 [M1_spikes]
6                 [M1_spikes]
7                 [M1_spikes]
8                 [M1_spikes]
9     [M1_spikes, PMd_spikes]
10                [M1_spikes]
11                [M1_spikes]
12    [M1_spikes, PMd_spikes]
13    [M1_spikes, PMd_spikes]
14    [M1_spikes, PMd_spikes]
15    [M1_spikes, PMd_spikes]
16    [M1_spikes, PMd_spikes]
17    [M1_spikes, PMd_spikes]
18    [M1_spikes, PMd_spikes]
19    [M1_spikes, PMd_spikes]
20    [M1_spikes, PMd_spikes]
21    [M1_spikes, PMd_spikes]
22    [M1_spikes, PMd_spikes]
23    [M1_spikes, PMd_spikes]
Name: areas, dtype: object

The code above relies on the host PC having the exact fiels as my laptop.  
So I replace the values below for reliability.

In [25]:
GoodDataList = {'M1':{}, 'PMd':{}}
#-----------------------------------

for area in ['M1', 'PMd']:
    for animal in ['Chewie', 'Mihili', 'MrT']:
        area_idx = [area+'_spikes' in x for x in summary.areas]
        files = summary[area_idx & (summary.animal == animal)].file.values
        GoodDataList[area][animal] = files

GoodDataList

{'M1': {'Chewie': array(['Chewie_RT_VR_2013-12-13.mat', 'Chewie_RT_VR_2013-10-11.mat',
         'Chewie_RT_FF_2013-12-09.mat', 'Chewie_RT_FF_2013-12-18.mat',
         'Chewie_RT_FF_2013-12-17.mat', 'Chewie_RT_VR_2013-12-12.mat',
         'Chewie_RT_FF_2013-10-28.mat', 'Chewie_RT_FF_2013-12-10.mat',
         'Chewie_RT_VR_2013-10-10.mat', 'Chewie_RT_CS_2016-10-21.mat',
         'Chewie_RT_FF_2013-10-29.mat', 'Chewie_RT_VR_2013-10-09.mat'],
        dtype=object),
  'Mihili': array(['Mihili_RT_VR_2014-01-15.mat', 'Mihili_RT_FF_2014-02-14.mat',
         'Mihili_RT_FF_2014-02-24.mat', 'Mihili_RT_FF_2014-02-21.mat',
         'Mihili_RT_VR_2014-01-16.mat', 'Mihili_RT_VR_2014-01-14.mat'],
        dtype=object),
  'MrT': array(['MrT_RT_FF_2013-08-22.mat', 'MrT_RT_VR_2013-09-04.mat',
         'MrT_RT_VR_2013-09-06.mat', 'MrT_RT_VR_2013-09-10.mat',
         'MrT_RT_FF_2013-08-30.mat', 'MrT_RT_FF_2013-08-20.mat'],
        dtype=object)},
 'PMd': {'Chewie': array(['Chewie_RT_CS_2016-10-21.mat'], dt

Adding the aggregate of *M1* and *PMd* as **MCx**

In [26]:
MCx = {}
for area in ['M1', 'PMd']:
    for animal in GoodDataList[area]:
        if animal not in MCx:
            MCx[animal] = []
        MCx[animal].extend(GoodDataList[area][animal])
        MCx[animal] = list(set(MCx[animal]))

GoodDataList['MCx'] = MCx

In [9]:
# SingleSessionEx = {'Chewie2':['Chewie_CO_CS_2015-03-12.mat'],
#                    'Mihili':['Mihili_CO_FF_2014-02-17.mat'],
#                    'Jaco':['Jaco_CO_CS_2016-02-17.mat']
#                    }