# Preparation of SSCx analysis with topological sampling pipeline
[https://github.com/BlueBrain/topological_sampling](https://github.com/BlueBrain/topological_sampling)

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import json
import pandas as pd
import scipy.sparse as sps
from bluepy import Simulation, Cell, Synapse

## Data preparation
- Neuron info (hex0 target): __neuron_info.pickle__
- Spikes (hex0 target; EXC only): __raw_spikes.npy__
- Stimulus train: __stim_stream.npy__
- Adjacency matrix (hex0 target; re-indexed): __connectivity.npz__

In [2]:
sim_path = '/gpfs/bbp.cscs.ch/data/scratch/proj83/bbp_workflow/b0a99807-f1c4-41e0-a261-5fb4e718412d/000'
# sim_path = '/gpfs/bbp.cscs.ch/data/scratch/proj83/bbp_workflow/f553e919-2fac-460c-a58e-3facc388da90/000'
save_path = os.path.join(sim_path, 'toposample_input')
if not os.path.exists(save_path):
    os.makedirs(save_path)

blue_config = os.path.join(sim_path, 'BlueConfig')
sim = Simulation(blue_config)
c = sim.circuit
hex0 = c.cells.ids('hex0')

spike_file = os.path.abspath(os.path.join(sim_path, sim.config['Stimulus_spikeReplay']['SpikeFile']))
spike_config_file = os.path.splitext(spike_file)[0] + '.json'
assert os.path.exists(spike_config_file), 'ERROR: Spike config file not found!'
with open(spike_config_file, 'r') as f:
    spike_config = json.load(f)

In [5]:
# Neuron info
neuron_info = c.cells.get(hex0, properties=[Cell.X, Cell.Y, Cell.Z, Cell.LAYER, Cell.MTYPE, Cell.SYNAPSE_CLASS])
neuron_info.to_pickle(os.path.join(save_path, 'neuron_info.pickle'))
neuron_info.to_hdf(os.path.join(save_path, 'neuron_info.h5'), 'neuron_info', format='table')

In [4]:
# Excitatory spikes
hex0_exc = neuron_info[neuron_info['synapse_class'] == 'EXC'].index
raw_spikes = sim.spikes.get(hex0_exc)
raw_spikes = np.vstack((raw_spikes.index, raw_spikes.to_numpy())).T
np.save(os.path.join(save_path, 'raw_spikes.npy'), raw_spikes)

  mask |= (ar1 == a)


In [5]:
# Stim train
stim_stream = np.array(spike_config['props']['stim_train'])
np.save(os.path.join(save_path, 'stim_stream.npy'), stim_stream)

In [10]:
# Adjacency matrix
conns = np.array(list(c.connectome.iter_connections(pre=hex0, post=hex0)))
reindex_table = sps.csr_matrix((np.arange(neuron_info.shape[0], dtype=int), (np.zeros(neuron_info.shape[0], dtype=int), neuron_info.index.to_numpy())))
conns_reindex = np.array([reindex_table[0, conns[:, d]].toarray().flatten() for d in range(conns.shape[1])]).T

adj_matrix = sps.csc_matrix((np.full(conns_reindex.shape[0], True), conns_reindex.T.tolist()))
sps.save_npz(os.path.join(save_path, 'connectivity.npz'), adj_matrix)

In [15]:
print(f'INFO: Toposample input files written to "{save_path}": {os.listdir(save_path)}')

INFO: Toposample input files written to "/gpfs/bbp.cscs.ch/data/scratch/proj83/bbp_workflow/b0a99807-f1c4-41e0-a261-5fb4e718412d/000/toposample_input": ['neuron_info.pickle', 'neuron_info.h5', 'connectivity.npz', 'stim_stream.npy', 'raw_spikes.npy']


## Re-convert dataframe from .h5 to .pickle
- In case pickled dataframe has wrong protocol
- Should be saved with same python/pandas version as used in toposample analysis (e.g., using same venv)

In [1]:
import pandas as pd
import os

# camp_id = '4073e95f-abb1-4b86-8c38-13cf9f00ce0b'
# save_path = f'/gpfs/bbp.cscs.ch/data/scratch/proj83/home/pokorny/analyses/proj96/{camp_id}/toposample_input_merged'
save_path = '/gpfs/bbp.cscs.ch/data/scratch/proj83/bbp_workflow/b0a99807-f1c4-41e0-a261-5fb4e718412d/000/toposample_input'
h5_file = os.path.join(save_path, 'neuron_info.h5')
pickle_file = os.path.splitext(h5_file)[0] + '.pickle'
neuron_info = pd.read_hdf(h5_file)
if os.path.exists(pickle_file):
    os.rename(pickle_file, os.path.splitext(pickle_file)[0] + '_BAK_' + os.path.splitext(pickle_file)[1])
neuron_info.to_pickle(pickle_file)
print(f'File written to {pickle_file}!')

File written to /gpfs/bbp.cscs.ch/data/scratch/proj83/bbp_workflow/b0a99807-f1c4-41e0-a261-5fb4e718412d/000/toposample_input/neuron_info.pickle!
