In [1]:
import xarray as xr 
import pandas as pd 
import numpy as np
import scipy.sparse as ssparse
from tqdm import tqdm
import sparse

Hidden dependencies: 
    
 - sparse: https://pypi.org/project/sparse/

In [2]:
spike_df_path = '/media/timsit/Partition 1/data/interim/active-m2-good-w-updated-wheel/spike_df.pkl'
spike_df = pd.read_pickle(spike_df_path)

In [3]:
neuron_df_path = '/media/timsit/Partition 1/data/interim/active-m2-good-w-updated-wheel/neuron_df.pkl'
neuron_df = pd.read_pickle(neuron_df_path)

In [4]:
query_subject_ref = 3
query_exp_ref = 21 
target_cell_id = neuron_df.loc[
    neuron_df['expRef'] == query_exp_ref
]['cellId']

subset_spike_df = spike_df.loc[
    spike_df['cellId'].isin(target_cell_id)
]

In [5]:
def bin_spikes_sparse(spike_df, sampling_rate=2000, sparse_type='sparse'):
    
    min_time = np.min(spike_df['spikeTime'])
    max_time = np.max(spike_df['spikeTime'])
    num_bins = int(max_time - min_time) * 2000
    num_neurons = len(np.unique(spike_df['cellId']))
    
    sparse_vector_list = list()
    for cell_id in tqdm(np.unique(spike_df['cellId'])):
        cell_spike_df = spike_df.loc[
        spike_df['cellId'] == cell_id]
        binned_vector, time_coords = np.histogram(cell_spike_df, num_bins, range=(min_time, max_time))
        if sparse_type == 'sparse':
            sparse_vector = sparse.COO.from_numpy(binned_vector)
        else:
            sparse_vector = ssparse.coo_matrix(binned_vector)
        
        sparse_vector_list.append(sparse_vector)
    
    if sparse_type == 'sparse':
        sparse_matrix = sparse.stack(sparse_vector_list)
    else:
        sparse_matrix = ssparse.vstack(sparse_vector_list)
    
    return sparse_matrix, time_coords

In [6]:
sparse_matrix, time_coords = bin_spikes_sparse(subset_spike_df)

100%|██████████| 415/415 [00:31<00:00, 13.04it/s]


In [48]:
np.shape(np.arange(np.shape(sparse_matrix)[1]))

(7990000,)

In [50]:
spike_xr

In [7]:
spike_ds = xr.Dataset(
    {'Spike': (['Cell', 'Time'], sparse_matrix)},
    coords={'Cell': ('Cell', np.arange(np.shape(sparse_matrix)[0])),
           'Bin': ('Time', np.arange(np.shape(sparse_matrix)[1])),
           'Time': ('Time', time_coords[:-1])}
)

In [84]:
neuron_df

Unnamed: 0,subjectRef,expRef,penRef,cellLoc,cellArea,cellPos,cluNum,cellId
0,1,1,1,TT,TTv,"[598, 589, 277]",1,0
1,1,1,1,TT,TTv,"[600, 583, 276]",2,1
2,1,1,1,TT,TTv,"[600, 585, 276]",3,2
3,1,1,1,TT,TTv,"[599, 587, 277]",4,3
4,1,1,1,TT,TTv,"[601, 582, 276]",5,4
...,...,...,...,...,...,...,...,...
14695,6,56,89,ILA,ILA6a,"[641, 343, 389]",9,14695
14696,6,56,89,PL,PL6a,"[649, 329, 390]",10,14696
14697,6,56,89,ILA,ILA6a,"[630, 363, 389]",11,14697
14698,6,56,89,ILA,ILA6a,"[632, 361, 389]",12,14698


In [8]:
# Add new data variable
cell_location = neuron_df.loc[
    neuron_df['expRef'] == query_exp_ref
]['cellLoc']
spike_ds.assign({'CellLocation': ('Cell', cell_location)})

In [9]:
spike_ds.attrs['Exp'] = query_exp_ref
spike_ds.attrs['Subject'] = query_subject_ref

In [83]:
spike_ds

In [3]:
min_time = np.min(spike_df['spikeTime'])
max_time = np.max(spike_df['spikeTime'])

In [4]:
cell_spike_df = spike_df.loc[
    spike_df['cellId'] == 0
]

In [5]:
sampling_rate = 2000
num_bins = int(max_time - min_time) * 2000
num_neurons = len(np.unique(spike_df['cellId']))

In [6]:
num_bins

15102000

In [7]:
binned_vector, _ = np.histogram(cell_spike_df, num_bins, range=(min_time, max_time))

In [13]:
sparse_vector = ssparse.csr_matrix(binned_vector)

In [14]:
sparse_vector

<1x15102000 sparse matrix of type '<class 'numpy.int64'>'
	with 10362 stored elements in Compressed Sparse Row format>

In [20]:
sparse_matrix = ssparse.vstack(sparse_vector_list)

In [21]:
np.shape(sparse_matrix)

(44, 15102000)

## Save the sparse array

### Built in sparse method

In [None]:
save_path = '/home/timsit/xarray-intro/data/sparse_spikes.npz'
sparse.save_npz(save_path, sparse_matrix)

In [15]:
import h5py
hf = h5py.File('sparse_spikes.h5', 'w')
dataset = hf.create_dataset('Spike', data=sparse_matrix)
dataset.attrs['Dimensions'] = ['Cell', 'Time']
dataset.attrs['Time'] = {'Time': time_coords[:-1], 'Bin': np.arange(np.shape(sparse_matrix)[1])}
dataset.attrs['Cell'] = {'Cell': np.arange(np.shape(sparse_matrix)[1])}
hf.close()

ModuleNotFoundError: No module named 'h5py'