# Preparation of SSCx analysis with topological sampling pipeline
## ℹ️ Andras's plastic SSCx circuit [proj96]
[https://github.com/BlueBrain/topological_sampling](https://github.com/BlueBrain/topological_sampling)

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import json
import pandas as pd
import scipy.sparse as sps
from bluepy import Simulation, Cell, Synapse

## Data preparation
- Neuron info (hex0 target): __neuron_info.pickle__
- Spikes (hex0 target; EXC only): __raw_spikes.npy__
- Stimulus train: __stim_stream.npy__
- Adjacency matrix (hex0 target; re-indexed): __connectivity.npz__

In [2]:
camp_id = '4073e95f-abb1-4b86-8c38-13cf9f00ce0b'
sim_id = '000'
sim_path = f'/gpfs/bbp.cscs.ch/data/scratch/proj96/home/ecker/simulations/{camp_id}/{sim_id}'
stim_config_file = os.path.join(os.path.split(sim_path)[0], 'input_spikes', 'stimulus_stream__start2000__end61501__rate2__seed12.txt')
save_path = f'/gpfs/bbp.cscs.ch/data/scratch/proj83/home/pokorny/analyses/proj96/{camp_id}/toposample_input_ABCD_{sim_id}'
if not os.path.exists(save_path):
    os.makedirs(save_path)

blue_config = os.path.join(sim_path, 'BlueConfig')
sim = Simulation(blue_config)
c = sim.circuit
hex0 = c.cells.ids('hex0')

spike_file = os.path.abspath(os.path.join(sim_path, sim.config['Stimulus_spikeReplay']['SpikeFile']))
assert os.path.exists(stim_config_file), 'ERROR: Stimulus config file not found!'
stim_tab = pd.read_table(stim_config_file, sep=' ', names=['onset', 'pattern'], index_col='onset')
stim_tab = stim_tab[stim_tab.index >= 2000] # Filter initial transients
pattern_list = np.unique(stim_tab['pattern'])
stim_tab['pid'] = [np.where(pattern_list == p)[0][0] for p in stim_tab['pattern']] # Map patterns (str) to indices
stim_cfg = {'stim_train': stim_tab['pid'].tolist(), 'time_windows': stim_tab.index.tolist()}
if len(stim_cfg['time_windows']) == len(stim_cfg['stim_train']):
    stim_cfg['time_windows'].append(stim_cfg['time_windows'][-1] + np.diff(stim_cfg['time_windows'][-2:])[0]) # Add end of last time bin


Have keys ({'cao_CR_GluSynapse', 'SYNAPSES__minis_single_vesicle'}) that are not defined for section: Conditions


In [23]:
# Neuron info
neuron_info = c.cells.get(hex0, properties=[Cell.X, Cell.Y, Cell.Z, Cell.LAYER, Cell.MTYPE, Cell.SYNAPSE_CLASS])
neuron_info.to_pickle(os.path.join(save_path, 'neuron_info.pickle'))
neuron_info.to_hdf(os.path.join(save_path, 'neuron_info.h5'), 'neuron_info', format='table')



In [24]:
# Excitatory spikes
hex0_exc = neuron_info[neuron_info['synapse_class'] == 'EXC'].index
raw_spikes = sim.spikes.get(hex0_exc)
raw_spikes = np.vstack((raw_spikes.index, raw_spikes.to_numpy())).T
np.save(os.path.join(save_path, 'raw_spikes.npy'), raw_spikes)

  mask |= (ar1 == a)


In [25]:
# Stim train
stim_stream = np.array(stim_cfg['stim_train'])
np.save(os.path.join(save_path, 'stim_stream.npy'), stim_stream)

In [12]:
# Adjacency matrix
conns = np.array(list(c.connectome.iter_connections(pre=hex0, post=hex0)))
reindex_table = sps.csr_matrix((np.arange(neuron_info.shape[0], dtype=int), (np.zeros(neuron_info.shape[0], dtype=int), neuron_info.index.to_numpy())))
conns_reindex = np.array([reindex_table[0, conns[:, d]].toarray().flatten() for d in range(conns.shape[1])]).T

adj_matrix = sps.csc_matrix((np.full(conns_reindex.shape[0], True), conns_reindex.T.tolist()))
sps.save_npz(os.path.join(save_path, 'connectivity.npz'), adj_matrix)

## Merging of multiple simulations
- Need to have same neuron info and adjacency matrix
- Spikes and stimulus trains will be concatenated
- _OPTIONAL:_ Pattern filtering

In [3]:
camp_id = '4073e95f-abb1-4b86-8c38-13cf9f00ce0b'
merge_list = [f'/gpfs/bbp.cscs.ch/data/scratch/proj83/home/pokorny/analyses/proj96/{camp_id}/toposample_input_000',
              f'/gpfs/bbp.cscs.ch/data/scratch/proj83/home/pokorny/analyses/proj96/{camp_id}/toposample_input_001',
              f'/gpfs/bbp.cscs.ch/data/scratch/proj83/home/pokorny/analyses/proj96/{camp_id}/toposample_input_002']
pattern_filter = [0, 1, 2, 3] # Inclusion filter list of pattern (0: A, 1: B, ..., 9: J) # None to include all

if pattern_filter is None:
    patt_str = ''
else:
    assert isinstance(pattern_filter, list), 'ERROR: Pattern filter inclusion list expected!'
    patt_str = '_p' + ''.join(['-' + str(p) for p in pattern_filter])
save_path = f'/gpfs/bbp.cscs.ch/data/scratch/proj83/home/pokorny/analyses/proj96/{camp_id}/toposample_input_merged{patt_str}'
if not os.path.exists(save_path):
    os.makedirs(save_path)


In [78]:
# Check neuron info & write to merged folder
neuron_info = pd.read_pickle(os.path.join(merge_list[0], 'neuron_info.pickle'))
for p in merge_list[1:]:
    assert neuron_info.equals(pd.read_pickle(os.path.join(p, 'neuron_info.pickle'))), f'ERROR: Neuron info mismatch at {p}!'
neuron_info.to_pickle(os.path.join(save_path, 'neuron_info.pickle'))
neuron_info.to_hdf(os.path.join(save_path, 'neuron_info.h5'), 'neuron_info', format='table')



In [10]:
# Check adjacency matrix & write to merged folder
adj_matrix = sps.load_npz(os.path.join(merge_list[0], 'connectivity.npz'))
for p in merge_list[1:]:
    assert np.array_equal(adj_matrix.nonzero(), sps.load_npz(os.path.join(p, 'connectivity.npz')).nonzero()), f'ERROR: Adjacency matrices mismatch at {p}!'
sps.save_npz(os.path.join(save_path, 'connectivity.npz'), adj_matrix.tocsc())

In [80]:
# Merge & reindex stim trains
stim_trains = []
for p in merge_list:
    st = np.load(os.path.join(p, 'stim_stream.npy'))
    if pattern_filter is not None:
        st[~np.isin(st, pattern_filter)] = -1
    stim_trains.append(st)
stim_trains_merged = np.hstack(stim_trains)
stim_trains_merged = stim_trains_merged[stim_trains_merged != -1]
stim_trains_reidx = np.full_like(stim_trains_merged, -1)
for pidx, p in enumerate(pattern_filter):
    stim_trains_reidx[stim_trains_merged == p] = pidx
np.save(os.path.join(save_path, 'stim_stream.npy'), stim_trains_reidx)

In [81]:
# Cut & merge spikes
start = 2000 # First stim [Assumed to be the same for all sims!!]
isi = 500 # Inter-stimulus interval [Assumed to be the same for all sims!!]

raw_spikes = []
t_offset = 0
for idx, path in enumerate(merge_list):
    spk = np.load(os.path.join(path, 'raw_spikes.npy')) # Load spikes
    t_start = start
    for pidx, pid in enumerate(stim_trains[idx]):
        t_end = t_start + isi
        if pid != -1:
            spk_cut = spk[np.logical_and(spk[:, 0] >= t_start, spk[:, 0] < t_end), :] # Cut spikes
            spk_cut[:, 0] = spk_cut[:, 0] - t_start + t_offset # Correct spike times
            t_offset += t_end - t_start
            raw_spikes.append(spk_cut)
        t_start = t_end
raw_spikes = np.vstack(raw_spikes) # Merge
np.save(os.path.join(save_path, 'raw_spikes.npy'), raw_spikes)


In [82]:
patt_counts = {p: np.sum(stim_trains_merged == p) for p in pattern_filter}
print(f'Merged {len(merge_list)} files ({len(stim_trains_merged) * isi}ms in total) with {len(stim_trains_merged)} pattern presentations: {patt_counts}')
print(f'Spike times from {min(raw_spikes[:, 0]):.3f} to {max(raw_spikes[:, 0]):.3f}ms')

Merged 3 files (72000ms in total) with 144 pattern presentations: {0: 36, 1: 36, 2: 36, 3: 36}
Spike times from 0.025 to 71999.975ms


## Re-convert dataframe from .h5 to .pickle
- In case pickled dataframe has wrong protocol
- Should be saved with same python/pandas version as used in toposample analysis (e.g., using same venv)

In [1]:
import pandas as pd
import os

camp_id = '4073e95f-abb1-4b86-8c38-13cf9f00ce0b'
save_path = f'/gpfs/bbp.cscs.ch/data/scratch/proj83/home/pokorny/analyses/proj96/{camp_id}/toposample_input_merged_p-0-1-2-3'
h5_file = os.path.join(save_path, 'neuron_info.h5')
pickle_file = os.path.splitext(h5_file)[0] + '.pickle'
neuron_info = pd.read_hdf(h5_file)
if os.path.exists(pickle_file):
    os.rename(pickle_file, os.path.splitext(pickle_file)[0] + '_BAK_' + os.path.splitext(pickle_file)[1])
neuron_info.to_pickle(pickle_file)
print(f'File written to {pickle_file}!')

File written to /gpfs/bbp.cscs.ch/data/scratch/proj83/home/pokorny/analyses/proj96/4073e95f-abb1-4b86-8c38-13cf9f00ce0b/toposample_input_merged_p-0-1-2-3/neuron_info.pickle!


## Prepare working dir
- Creates empty copy of working dir
- Copies input files

In [2]:
import os
import shutil

name = 'ABCD'
# camp_id = '4073e95f-abb1-4b86-8c38-13cf9f00ce0b'
# save_path = f'/gpfs/bbp.cscs.ch/data/scratch/proj83/home/pokorny/analyses/proj96/{camp_id}/toposample_input_merged_p-0-1-2-3'

topo_path = '/gpfs/bbp.cscs.ch/data/scratch/proj83/home/pokorny/analyses/proj96/4073e95f-abb1-4b86-8c38-13cf9f00ce0b/topological_sampling'
empty_dir = 'working_dir_EMPTY_'

In [5]:
tgt_dir = os.path.join(topo_path, empty_dir.replace('EMPTY', name))
print(shutil.copytree(os.path.join(topo_path, empty_dir), tgt_dir))
print(shutil.copy(os.path.join(save_path, 'neuron_info.pickle'), os.path.join(tgt_dir, 'data', 'input_data')))
print(shutil.copy(os.path.join(save_path, 'connectivity.npz'), os.path.join(tgt_dir, 'data', 'input_data')))
print(shutil.copy(os.path.join(save_path, 'stim_stream.npy'), os.path.join(tgt_dir, 'data', 'input_data')))
print(shutil.copy(os.path.join(save_path, 'raw_spikes.npy'), os.path.join(tgt_dir, 'data', 'input_data')))

/gpfs/bbp.cscs.ch/data/scratch/proj83/home/pokorny/analyses/proj96/4073e95f-abb1-4b86-8c38-13cf9f00ce0b/topological_sampling/working_dir_ABCD_
/gpfs/bbp.cscs.ch/data/scratch/proj83/home/pokorny/analyses/proj96/4073e95f-abb1-4b86-8c38-13cf9f00ce0b/topological_sampling/working_dir_ABCD_/data/input_data/neuron_info.pickle
/gpfs/bbp.cscs.ch/data/scratch/proj83/home/pokorny/analyses/proj96/4073e95f-abb1-4b86-8c38-13cf9f00ce0b/topological_sampling/working_dir_ABCD_/data/input_data/connectivity.npz
/gpfs/bbp.cscs.ch/data/scratch/proj83/home/pokorny/analyses/proj96/4073e95f-abb1-4b86-8c38-13cf9f00ce0b/topological_sampling/working_dir_ABCD_/data/input_data/stim_stream.npy
/gpfs/bbp.cscs.ch/data/scratch/proj83/home/pokorny/analyses/proj96/4073e95f-abb1-4b86-8c38-13cf9f00ce0b/topological_sampling/working_dir_ABCD_/data/input_data/raw_spikes.npy


## Prepare m-type sections for random sampling

In [8]:
mtypes = np.unique(neuron_info['mtype'])

In [None]:
import json
nsamples = 25
dict_list = []
for mtype in mtypes:
    mtype_dict = {'name': mtype, 'value': {'column': 'mtype', 'value': mtype}, 'number': nsamples}
    dict_list.append(mtype_dict)
cfg_str = json.dumps([[dict_list]], indent=2) # Use nested lists to set correct indent
print(f'Config string for {len(mtypes)} m-types:')
print(cfg_str)