In [1]:
"""
Applying transformations to large files in batches:

BatchProcessor.multi_channel_apply lets you apply transformations to
batches of data where every batch has observations from every channel.

This example show how to extract information from a large file by
processing it in batches.
"""

import logging
import os

import numpy as np

from yass.batch import BatchProcessor

  return f(*args, **kwds)


In [2]:
# configure logging to get information about the process
logging.basicConfig(level=logging.INFO)

In [3]:
# raw data file
path_to_neuropixel_data = (os.path.expanduser('~/data/ucl-neuropixel'
                           '/rawDataSample.bin'))

In [4]:
# on each batch, we find the maximum value in every channel
def max_in_channel(batch):
    """Add one to every element in the batch
    """
    return np.max(batch, axis=0)

In [5]:
# create batch processor for the data
bp = BatchProcessor(path_to_neuropixel_data,
                    dtype='int16', n_channels=385, data_format='wide',
                    max_memory='10MB')

# appply a multi channel transformation, each batch will be a temporal
# subset with observations from all selected n_channels, the size
# of the subset is calculated depending on max_memory. Results
# from every batch are returned in a list
res = bp.multi_channel_apply(max_in_channel,
                             mode='memory',
                             channels=[0, 1, 2])

INFO:yass.batch.batch:Applying function __main__.max_in_channel...
INFO:yass.batch.batch:__main__.max_in_channel took 0.019220829010009766 seconds


In [6]:
# we have one element per batch
len(res)

2

In [7]:
# output for the first batch
res[0]

array([137, 119,  99], dtype=int16)

In [8]:
# stack results from every batch
arr = np.stack(res, axis=0)

In [9]:
# let's find the maximum value along every channel in all the dataset
np.max(arr, axis=0)

array([137, 119,  99], dtype=int16)