In [1]:
import glob

In [2]:
# paths
filepaths = {
    "lip": "D:/data/session_struct/lip/neurons/",
    "pfc": "",
    "v4": "",
}

In [3]:
path_list={}
for area in filepaths.keys():
    path = filepaths[area]
    neu_path = path + "*neu.h5"
    path_list[area] = glob.glob(neu_path)

pipeline start
define functions to get populations processed in different useful ways

* la idea es que reciban como input una lista de diccionarios. Cada
diccionario contiene los parametros necesarios para alinear los spikes a un evento en particular
* tambien puede recibir como input attributos que se desean eliminar del objeto neurona (se elimina la informacion pero no el atributo per se)

In [4]:
import numpy as np
from joblib import Parallel, delayed
from tqdm import tqdm
from pathlib import Path
from typing import Dict, List
from ephysvibe.structures.neuron_data import NeuronData
from ephysvibe.structures.population_data import PopulationData
import pandas as pd

In [22]:
# Define parameters
params = [
    {
        "loc": "in",
        "event": "sample_on",
        "time_before": 500,
        "time_after": 1000,
        "select_block": 1,
    },
    {
        "loc": "in",
        "event": "test_on_1",
        "time_before": 500,
        "time_after": 600,
        "select_block": 1,
    }
]

In [23]:
def read_and_compute(path,params,rf_loc):
    neu=NeuronData.from_python_hdf5(path)
    neu=neu.get_neu_align(params=params,delete_att=['sp_samples'],rf_loc=rf_loc)
    return neu

In [24]:
rf_loc= pd.read_csv("//envau_cifs.intlocal.univ-amu.fr/work/invibe/USERS/IBOS/data/Riesling/TSCM/OpenEphys/activation_index/rf_loc_df_lip.csv")

In [25]:
population = Parallel(n_jobs=-1)(
    delayed(read_and_compute)(path,params,rf_loc) for path in tqdm(path_list['lip'])
)

100%|██████████| 530/530 [03:29<00:00,  2.53it/s]


In [34]:
aa = PopulationData(population)
aa.to_python_hdf5('test.h5')
aa = PopulationData.from_python_hdf5('test.h5')

In [38]:
from ephysvibe.trials.spikes import firing_rate
from ephysvibe.trials import  select_trials

In [36]:
def check_number_of_trials(xdict,samples,min_ntr):
    for key in samples:
        if xdict[key].shape[0]<min_ntr:
            return False
    return True

In [39]:
def color_data(fr_samples:Dict,min_ntr:int):
    samples = ['11','15','51','55']
    enough_tr = check_number_of_trials(fr_samples,samples,min_ntr)
    if not enough_tr: return None
    c1 = np.concatenate([fr_samples['11'],fr_samples['51']],axis=0)
    c5 = np.concatenate([fr_samples['15'],fr_samples['55']],axis=0)
    color = {'c1':c1,'c5':c5}
    return color
def orient_data(fr_samples:Dict,min_ntr:int):
    samples = ['11','15','51','55']
    enough_tr = check_number_of_trials(fr_samples,samples,min_ntr)
    if not enough_tr: return None
    o1 = np.concatenate([fr_samples['11'],fr_samples['15']],axis=0)
    o5 = np.concatenate([fr_samples['51'],fr_samples['55']],axis=0)
    orient = {'o1':o1,'o5':o5}
    return orient

def sampleid_data(fr_samples:Dict,min_ntr:int):
    samples = ['11','15','51','55']
    enough_tr = check_number_of_trials(fr_samples,samples,min_ntr)
    if not enough_tr: return None
    return fr_samples

def neutral_data(fr_samples:Dict,min_ntr:int):
    samples = ['0','11','15','51','55']
    enough_tr = check_number_of_trials(fr_samples,samples,min_ntr)
    if not enough_tr: return None
    n = fr_samples['0']
    nn = np.concatenate([fr_samples['11'],fr_samples['15'],fr_samples['51'],fr_samples['55']],axis=0)
    neutral = {'n':n,'nn':nn}
    return neutral

In [40]:
def preproc_for_decoding(neu:NeuronData,params:Dict,to_decode:str,min_ntr:int,start_sample:int,end_sample:int,start_test:int,end_test:int,avgwin:int=100):
    # Average fr across time
    idx_start_sample = getattr(neu,params['time_before_son'])  + start_sample
    idx_end_sample = getattr(neu,params['time_before_son'])  + end_sample
    idx_start_test = getattr(neu,params['time_before_t1on']) + start_test
    idx_end_test = getattr(neu,params['time_before_t1on']) + end_test
    sampleon=getattr(neu,params['sampleon'])
    t1on=getattr(neu,params['t1on'])

    fr_son = firing_rate.moving_average(
            sampleon, win=avgwin, step=1
        )[:, idx_start_sample:idx_end_sample]
    fr_t1on = firing_rate.moving_average(
            t1on, win=avgwin, step=1
        )[:, idx_start_test:idx_end_test]

    fr = np.concatenate([fr_son,fr_t1on],axis=1)
    fr_samples = select_trials.get_sp_by_sample(fr, neu.sample_id[getattr(neu,params['maskson'])])

    if to_decode == 'color':
        data = color_data(fr_samples,min_ntr)
    elif to_decode == 'orient':
        data = orient_data(fr_samples,min_ntr)
    elif to_decode == 'sampleid':
        data = sampleid_data(fr_samples,min_ntr)
    elif to_decode == 'neutral':
        data = neutral_data(fr_samples,min_ntr)
    return data

In [54]:
# define vars
to_decode='color'
niterations = 10
ntr_train = 30
ntr_test = 10
min_ntr = 25
start_sample=-200
end_sample=850
start_test=-400
end_test=450
trial_duration =(end_sample-start_sample)+(end_test-start_test)
path=""
params= {
        'time_before_son': 'time_before_son_in',
        'time_before_t1on': 'time_before_t1on_in',
        'sampleon': 'sp_son_in',
        't1on': 'sp_t1on_in',
        'maskson': 'mask_son_in'
}
# preprocessing 
popu = PopulationData.from_python_hdf5('test.h5')
# split by condition and check enough number of trials (>25)
list_data = popu.execute_function(preproc_for_decoding,params,to_decode,min_ntr,start_sample,end_sample,start_test,end_test,avgwin=100,ret_df=False)
list_data = [idata for idata in list_data if idata is not None ]

100%|██████████| 530/530 [00:31<00:00, 16.81it/s]


In [87]:
def pick_train_test_trials(idx_trials, train_ratio):
    n_trials = len(idx_trials)
    tmp = np.random.permutation(idx_trials)
    train = tmp[: int((n_trials * train_ratio))]
    test = tmp[int((n_trials * train_ratio)) :]
    return train, test

In [90]:
def compute_decoding(model,list_data,trial_duration,ntr_train,ntr_test,topred):
    test_train_ratio = 1 - ntr_test / ntr_train
    ntopred = len(topred)
    num_cells=len(list_data)
    # Initialize arrays to store train and test data
    data_train = np.empty(
        [trial_duration, ntr_train*ntopred, num_cells]
    )
    data_test = np.empty(
        [trial_duration, ntr_test*ntopred, num_cells]
    )
    perf = np.empty([trial_duration, trial_duration])
    y_train,y_test = [],[]
    for i in range(ntopred):
        y_train.append(np.zeros(ntr_train)+i)
        y_test.append(np.zeros(ntr_test)+i) 
    y_train,y_test = np.concatenate(y_train),np.concatenate(y_test)

    # Iterate through neurons to randomly pick trials
    for icell,cell in enumerate(list_data):
        trials_train,trials_test=[],[]
        for ipred in topred:
            trials = cell[ipred]
            idx_trials = np.arange(len(trials))
            train, test = pick_train_test_trials(idx_trials, test_train_ratio)
            train = np.random.choice(train, ntr_train, replace=True)
            test = np.random.choice(test, ntr_test, replace=True)
            trials_train.append(trials[train])
            trials_test.append(trials[test])

        # build matrices of  [timestamp, trials, neurons] dimensions to feed to classifiers
        data_train[:, :, icell] = np.concatenate(trials_train,axis=0).T
        data_test[:, :, icell] = np.concatenate(trials_test,axis=0).T
    
    # train and test classifier
    for time_train in range(trial_duration):
        model.fit(data_train[time_train], y_train)
        for time_test in range(trial_duration):
            y_predict = model.predict(data_test[time_test])
            perf[time_train, time_test] = np.where(y_predict - y_test == 0)[0].shape[0]
    
    return perf

In [57]:
from sklearn.svm import SVC

In [58]:
topred=['c1','c5']
# pick random trials for training and testing and iterate with a double for to train and test in all t
model = SVC(
    kernel="linear", C=0.8, decision_function_shape="ovr", gamma="auto", degree=1
)
all_perf = Parallel(n_jobs=20)(delayed(compute_decoding)(model,list_data,trial_duration,ntr_train,ntr_test,topred) for _ in tqdm(range(niterations)))

100%|██████████| 10/10 [00:00<00:00, 63.88it/s]


TypeError: 'list' object cannot be interpreted as an integer

In [91]:
compute_decoding(model,list_data,trial_duration,ntr_train,ntr_test,topred)

KeyboardInterrupt: 