In [1]:
from ephysvibe.structures.population_data import PopulationData
from ephysvibe.structures.neuron_data import NeuronData
from ephysvibe.structures.results import Results
from ephysvibe.trials.spikes import firing_rate
from ephysvibe.trials import select_trials
from ephysvibe.stats import smetrics
# from preproc_tools import get_fr_by_sample
import numpy as np
import pandas as pd
import compute_population_distance
from joblib import Parallel, delayed
from tqdm import tqdm

### Start preprocessing

In [2]:
area = 'lip'
rf_loc='in'
avgwin = 100
min_sp_sec = 1
n_test = 1
min_trials = 25
inout = "out"  # in out
select_block = 1
nonmatch = True  # if True: includes nonmatch trials
norm = False
zscore = True
select_n_neu = 100
# sample timing
time_before_sample = 500
start_sample = -200
end_sample = 450 + 400
time_after_sample = end_sample + 200
# test timing
time_before_test = 500
start_test = -400
end_test = n_test * 450 + 200
time_after_test = end_test + 200
dtype_sp = np.int8
dtype_mask = bool

In [3]:
filepaths = {
    "lip": "/envau/work/invibe/USERS/IBOS/data/Riesling/TSCM/OpenEphys/new_structure/session_struct/lip/neurons/",
    "pfc": "/envau/work/invibe/USERS/IBOS/data/Riesling/TSCM/OpenEphys/new_structure/session_struct/pfc/neurons/",
    "v4": "/envau/work/invibe/USERS/IBOS/data/Riesling/TSCM/OpenEphys/new_structure/session_struct/v4/neurons/",
}
outputpath = "/envau/work/invibe/USERS/IBOS/data/Riesling/TSCM/OpenEphys/population_lat/data/neurons_no_selectivity_inout_out/"
selectivity_path = (
    "/envau/work/invibe/USERS/IBOS/data/Riesling/TSCM/OpenEphys/selectivity/all_trials/2024_09_13_11_38_12"
)
nidpath = {
    "lip": selectivity_path + "/lip_no_neutral_inout_selectivity.csv",
    "pfc": selectivity_path + "/pfc_no_neutral_inout_selectivity.csv",
    "v4": selectivity_path + "/v4_no_neutral_inout_selectivity.csv",
}

allspath = {
    # "lip": "/envau/work/invibe/USERS/IBOS/data/Riesling/TSCM/OpenEphys/population_lat/data/all_neurons_corrected_in/2024-08-21_16-50-55/lip_preprocdata.h5",
    # "pfc": "/envau/work/invibe/USERS/IBOS/data/Riesling/TSCM/OpenEphys/population_lat/data/all_neurons_corrected_in/2024-08-21_16-50-55/pfc_preprocdata.h5",
    # "v4": "/envau/work/invibe/USERS/IBOS/data/Riesling/TSCM/OpenEphys/population_lat/data/all_neurons_corrected_in/2024-08-21_16-50-55/v4_preprocdata.h5",
}

rf_loc_path = {
    "lip": "/envau/work/invibe/USERS/IBOS/data/Riesling/TSCM/OpenEphys/activation_index/rf_loc_df_lip.csv",
    "pfc": "/envau/work/invibe/USERS/IBOS/data/Riesling/TSCM/OpenEphys/activation_index/rf_loc_df_pfc.csv",
    "v4": "/envau/work/invibe/USERS/IBOS/data/Riesling/TSCM/OpenEphys/activation_index/rf_loc_df_v4.csv",
}

In [4]:
popu = PopulationData.from_python_hdf5(f"/envau/work/invibe/USERS/IBOS/data/Riesling/TSCM/OpenEphys/population/{area.lower()}/2024_08_28_12_23_36/population.h5")

In [5]:
def get_fr_by_sample(
    neu,
    time_before_son: str,
    time_before_t1on: str,
    sp_son: str,
    sp_t1on: str,
    mask_son: str,
    start_sample: int,
    end_sample: int,
    start_test: int,
    end_test: int,
    n_test,
    min_trials,
    min_neu=False,
    nonmatch=True,
    avgwin=50,
    n_sp_sec=5,
    norm=False,
    zscore=False,
    include_nid=None,
):
    if include_nid is not None:
        nid = neu.get_neuron_id()
        if not (nid in include_nid):
            return None

    idx_start_sample = int((getattr(neu, time_before_son) + start_sample) )
    idx_end_sample = int((getattr(neu, time_before_son) + end_sample) )
    idx_start_test = int((getattr(neu, time_before_t1on) + start_test) )
    idx_end_test = int((getattr(neu, time_before_t1on) + end_test) )
    sampleon = getattr(neu, sp_son)
    t1on = getattr(neu, sp_t1on)
    mask_son = getattr(neu, mask_son)

    # Select trials aligned to sample onset
    
    sample_id = neu.sample_id[mask_son]



    # Build masks to select trials with match in the n_test
    mask_match = np.where(
        neu.test_stimuli[mask_son, n_test - 1] == neu.sample_id[mask_son],
        True,
        False,
    )
    mask_neu = neu.sample_id[mask_son] == 0
    # Build masks to select trials with the selected number of test presentations
    max_test = neu.test_stimuli[mask_son].shape[1]
    mask_ntest = (max_test - np.sum(np.isnan(neu.test_stimuli[mask_son]), axis=1)) > (
        n_test - 1
    )

    if nonmatch:  # include nonmatch trials
        mask_match_neu = np.logical_or(mask_ntest, mask_neu)
    else:
        mask_match_neu = np.logical_or(mask_match, mask_neu)
    if np.sum(mask_match_neu) < 20:
        return None

    # Average fr across time
    avg_sample_on = firing_rate.moving_average(
        sampleon[mask_match_neu], win=avgwin, step=1
    )[:, idx_start_sample:idx_end_sample]
    avg_test1_on = firing_rate.moving_average(
        t1on[mask_match_neu], win=avgwin, step=1
    )[:, idx_start_test:idx_end_test]
    # Concatenate sample and test aligned data
    sp = np.concatenate((avg_sample_on, avg_test1_on), axis=1)
    # Check fr
    ms_fr = np.nanmean(sp) * 1000 > n_sp_sec
    if not ms_fr:
        return None
    # Check number of trials
    sample_id = neu.sample_id[mask_son][mask_match_neu]
    samples = [0, 11, 15, 55, 51]
    if min_neu:
        sample_fr = sp[np.where(sample_id == 0, True, False)]
        if sample_fr.shape[0] < min_trials:
            return None
    else:
        for s_id in samples:
            sample_fr = sp[np.where(sample_id == s_id, True, False)]
            if sample_fr.shape[0] < min_trials:
                return None
    if norm == True:
        sp = sp / np.max(sp)
    if zscore == True:
        sp_std = np.std(sp, ddof=1, axis=0)
        sp_std = np.where(sp_std == 0, 1, sp_std)
        sp = (sp - np.mean(sp, axis=0).reshape(1, -1)) / sp_std.reshape(1, -1)
    # Get trials grouped by sample
    fr_samples = select_trials.get_sp_by_sample(sp, sample_id, samples=samples)

    if fr_samples is None:
        return None

    return fr_samples


In [6]:
include_nid = None
if bool(nidpath):
    df_sel = pd.read_csv(nidpath[area])
    include_nid = df_sel["nid"].values
all_fr_samples = popu.execute_function(
    get_fr_by_sample,
    time_before_son= "time_before_son_in",
    time_before_t1on= "time_before_t1on_in",
    sp_son= "sp_son_in",
    sp_t1on= "sp_t1on_in",
    mask_son= "mask_son_in",
    start_sample=start_sample,
    end_sample=end_sample,
    start_test=start_test,
    end_test=end_test,
    n_test=n_test,
    min_trials=min_trials,
    min_neu=False,
    nonmatch=nonmatch,
    avgwin=avgwin,
    n_sp_sec=min_sp_sec,
    norm=norm,
    zscore=zscore,
    include_nid=include_nid,
    n_jobs=-1,
    ret_df=False,
)

100%|██████████| 530/530 [00:05<00:00, 104.38it/s]


In [7]:
rng = np.random.default_rng(1997)

In [28]:
fr_dicts_only = [item for item in all_fr_samples if isinstance(item, dict)]
res = {}
print("start iterations")
distance_data = []
for _ in tqdm(range(3)):
    dist = compute_population_distance.get_distance(
        fr_dicts_only,
        rng=rng,
        min_trials=min_trials,
        select_n_neu=select_n_neu,
    )
    distance_data.append(dist)

all_dist_n_nn = []
all_dist_fake_n_nn = []
for asc in distance_data:
    all_dist_n_nn.append(asc["dist_n_nn"])
    all_dist_fake_n_nn.append(asc["dist_fake_n_nn"])
all_dist_n_nn = np.array(all_dist_n_nn,dtype=np.float32)
all_dist_fake_n_nn = np.array(all_dist_fake_n_nn,dtype=np.float32)
res["dist_n_nn"] = all_dist_n_nn
res["dist_fake_n_nn"] = all_dist_fake_n_nn
res["n_neurons"] = asc["n_neurons"]

start iterations


100%|██████████| 3/3 [00:00<00:00,  5.98it/s]


In [29]:
res = Results(
    "population_distance.py",
    "path",
    distance=res,

)

In [30]:
res.distance

{'dist_n_nn': array([[3.05688574, 3.05538381, 3.0133116 , ..., 3.97156234, 3.94105815,
         0.        ],
        [3.08522708, 3.06176452, 3.04466589, ..., 3.42104746, 3.32144957,
         0.        ],
        [2.77379562, 2.79691084, 2.77716203, ..., 3.92940362, 3.93720955,
         0.        ]]),
 'dist_fake_n_nn': array([[2.36878985, 2.34233951, 2.30079229, ..., 2.75387935, 2.76445617,
         0.        ],
        [2.80951245, 2.78525248, 2.79086419, ..., 2.6543645 , 2.68030157,
         0.        ],
        [2.88773587, 2.93637232, 2.90210092, ..., 2.82692642, 2.84770021,
         0.        ]]),
 'n_neurons': 159}

In [16]:
res.to_python_hdf5("population_distance.h5")

In [12]:
distance_data

[{'dist_n_nn': array([2.80766575, 2.84264495, 2.82615031, ..., 3.61830623, 3.55321372,
         0.        ]),
  'dist_fake_n_nn': array([2.82238942, 2.80456765, 2.76073598, ..., 3.16693348, 3.16531354,
         0.        ]),
  'n_neurons': 159},
 {'dist_n_nn': array([3.06786346, 3.10201721, 3.14300502, ..., 3.62410808, 3.62080308,
         0.        ]),
  'dist_fake_n_nn': array([2.84460701, 2.80514465, 2.81603807, ..., 2.66797091, 2.69807412,
         0.        ]),
  'n_neurons': 159},
 {'dist_n_nn': array([2.28650596, 2.31684979, 2.36854456, ..., 3.76092063, 3.75352271,
         0.        ]),
  'dist_fake_n_nn': array([2.64899118, 2.71516816, 2.69240093, ..., 2.97217454, 2.97899085,
         0.        ]),
  'n_neurons': 159}]