# Preprocessing pipeline for reliability bootstrapping based on block-design-based simulation campaigns
1. Extract & cut spike trains
2. Compute filtered spike signals & mean-centering
3. Compute firing rates

In [1]:
import sys
sys.path.append('../../library')

### 1. Extract & cut spike trains

Extracts (excitatory) spike trains in format compatible with "toposample" pipeline

In [2]:
from extract import run_extraction

In [3]:
campaign_path = '/gpfs/bbp.cscs.ch/data/scratch/proj9/bisimplices/simulations/BlobStimReliability_O1v5-SONATA_OutConnsRemoved_BlockDesign_Struct'
# campaign_path = '/gpfs/bbp.cscs.ch/data/scratch/proj9/bisimplices/simulations/BlobStimReliability_O1v5-SONATA_OutConnsRemoved_BlockDesign_Rnd'
# campaign_path = '/gpfs/bbp.cscs.ch/data/scratch/proj9/bisimplices/simulations/BlobStimReliability_O1v5-SONATA_RecipConnsRemoved_BlockDesign_Struct'
# campaign_path = '/gpfs/bbp.cscs.ch/data/scratch/proj9/bisimplices/simulations/BlobStimReliability_O1v5-SONATA_RecipConnsRemoved_BlockDesign_Rnd'

sim_paths, working_dir = run_extraction(campaign_path, working_dir_name='working_dir')
num_sims = len(sim_paths)

  mask |= (ar1 == a)


INFO: 90 spike files written to "/gpfs/bbp.cscs.ch/data/scratch/proj9/bisimplices/bbp_workflow/e41d95a2-0e94-4e9a-9898-e0ea1497edf0/working_dir"


### 2. Compute filtered spike signals & mean-centering

Runs preprocessing (filtering, mean-centering) of (excitatory) spike trains [PARALLEL IMPLEMENTATION]

In [2]:
from preprocess import run_preprocessing, merge_into_h5_data_store

In [5]:
# Run preprocessing
spike_file_names = [f'raw_spikes_exc_{idx}.npy' for idx in range(num_sims)]

run_preprocessing(working_dir, spike_file_names, sigma=10.0, mean_centered=True, pool_size=10)

Finished preprocessing in 852.391s


In [None]:
# Merge individual preprocessed spike files into .h5 data store
#   split_by_gid==False ... Datasets per sim
#   split_by_gid==True  ... Datasets per sim & GID
tmp_file_names = [f'spike_signals_exc_{idx}__tmp__.npz' for idx in range(num_sims)]

h5_file = merge_into_h5_data_store(working_dir, tmp_file_names, data_store_name='processed_data_store', split_by_gid=False)

### 3. Compute firing rates

Runs firing rate extraction based on mean inverse inter-spike interval of (excitatory) spike trains [PARALLEL IMPLEMENTATION]

In [3]:
from preprocess import run_rate_extraction, merge_rates_to_h5_data_store

In [None]:
# Run rate extraction
spike_file_names = [f'raw_spikes_exc_{idx}.npy' for idx in range(num_sims)]

run_rate_extraction(working_dir, spike_file_names, pool_size=30)

In [5]:
# Merge individual rate files into .h5 data store
tmp_file_names = [f'firing_rates_exc_{idx}__tmp__.npz' for idx in range(num_sims)]

h5_file = merge_rates_to_h5_data_store(working_dir, tmp_file_names, data_store_name='processed_data_store', do_overwrite=False)

100%|██████████| 90/90 [00:01<00:00, 46.44it/s]


INFO: 90 files merged and added to "/gpfs/bbp.cscs.ch/data/scratch/proj9/bisimplices/bbp_workflow/6852a83f-3e7c-4705-b6cd-fe2a34bbfba2/working_dir/processed_data_store.h5"


__HOW TO LOAD PROCESSED SPIKE SIGNALS, META-INFO, AND RATES FROM .H5 DATA STORE:__
~~~
h5_store = h5py.File(h5_file)
print(f'Groups/Datasets: {list(h5_store.keys())}')
t_bins = np.array(h5_store['t_bins'])
gids = np.array(h5_store['gids'])
firing_rates = np.array(h5_store['firing_rates'])
mean_centered = np.array(h5_store['mean_centered']).tolist()
sigma = np.array(h5_store['sigma']).tolist()
print(f'Spike signals per sims: {list(h5_store["spike_signals_exc"].keys())}')
if split_by_gid == True:
    print(f'Spike signals within sim <SIM_IDX>: {list(h5_store["spike_signals_exc"]["sim_<SIM_IDX>"].keys())}')
    spike_signal = np.array(h5_store['spike_signals_exc'][f'sim_{<SIM_IDX>}'][f'gid_{<GID>}'])
else:
    spike_signals = np.array(h5_store['spike_signals_exc'][f'sim_{<SIM_IDX>}'])
h5_store.close()
~~~