This notebook is used to stream data from dandi, and then acessing it in the brainscore scope.

In [1]:
import numpy as np
import io
import pandas as pd
import os
import fnmatch
import shutil
import re
from ruamel.yaml import YAML

import xarray as xr
import h5py
from brainio.assemblies import NeuronRecordingAssembly

In [4]:
h5_recording = h5py.File('/braintree/data2/active/users/sgouldin/projects/faceemovids/monkeys/pico/h5/230908.pico.rsvp.faceemovids.experiment_psth_raw.h5', 'r')
h5_norm      = h5py.File('/braintree/data2/active/users/sgouldin/projects/normalizers/monkeys/pico/h5/230908.pico.rsvp.normalizers.experiment_psth_raw.h5', 'r')

# image has been shown for 500 ms

In [None]:
def create_norm_assembly(data):
    psth = np.asarray(data['psth'])  # Shaped images x repetitions x time_bins x channels
    meta = data['meta']
    timebase = np.arange(meta['start_time_ms'][()], meta['stop_time_ms'][()], meta['tb_ms'][()])
    timebins = np.asarray([[int(x), int(x)+int(meta['tb_ms'][()])] for x in timebase])
    assert len(timebase) == psth.shape[2]

    assembly = xr.DataArray(psth,
                    coords={'repetition': ('repetition', list(range(psth.shape[1]))),
                            'time_bin_id': ('time_bin', list(range(psth.shape[2]))),
                            'time_bin_start': ('time_bin', [x[0] for x in timebins]),
                            'time_bin_stop': ('time_bin', [x[1] for x in timebins])},
                    dims=['image', 'repetition', 'time_bin', 'neuroid'])

    # Collapse dimensions 'image' and 'repetitions' into a single 'presentation' dimension
    assembly = assembly.stack(presentation=('image', 'repetition')).reset_index('presentation')
    assembly = assembly.drop('image')
    assembly = NeuronRecordingAssembly(assembly)  # Convert to assembly
    assembly_for_qc = assembly.isel(time_bin = slice(7, 17)).sum('time_bin')
    return assemby, assembly_for_qc

_, da = create_norm_assembly(h5_norm)

In [25]:
def load_nwb(hasspike = None, haspsth = None):
    ############### Iterate through every File with SpikeTime and Create NWB ######
    ###############################################################################

    df = pd.read_excel( '/braintree/home/aliya277/dandi_brainscore/pico_inventory.xlsx'  )
    SubjectName = 'pico'
    storage_dir = '/braintree/home/aliya277/inventory'
    
    for index, DataFrame in df.iterrows():
            
    

        date = f"20{DataFrame['date']}"
        if len(str(DataFrame['time'])) != 6: time = f"0{DataFrame['time']}"
        else: time = str(DataFrame['time'])
        
        if DataFrame['ImageSet'] == 'normalizers':
            directory = f'norm_FOSS.sub_pico.{date}_{time}.proc'
        elif DataFrame['ImageSet'] == 'normalizers-HVM':
            directory = f'norm_HVM.sub_pico.{date}_{time}.proc'
        else: 
            directory = f"exp_{DataFrame['ImageSet']}.sub_pico.{date}_{time}.proc"

        
        imagesetdir = os.path.join(storage_dir, ".".join(directory.split(".")[0:1]))
        subjectdir  = os.path.join(storage_dir, imagesetdir, ".".join(directory.split(".")[0:2]))

        if hasspike == True and haspsth == False:
            condition = (DataFrame['Has SpikeTime'] == 1)
        elif hasspike == False and haspsth == True:
            condition = (DataFrame['Has h5'] == 1)
        elif hasspike == True and haspsth == True:
            condition = (DataFrame['Has h5'] == 1) and (DataFrame['Has SpikeTime'] == 1)


        if condition and DataFrame['ImageSet'] != 'normalizers-HVM' and DataFrame['ImageSet'] != 'normalizers':
            print(directory)

            nwbfilepath = os.path.join(os.path.join(subjectdir,directory), f"{directory}.nwb")
            io = NWBHDF5IO(nwbfilepath, "r") 
            nwbfile = io.read()

            return nwbfile


    

nwbfile = load_nwb(hasspike = True, haspsth = True)

exp_ko_context_size.sub_pico.20220930_153358.proc


In [24]:
def load_psth_as_assembly(nwbfile):
    for string in nwbfile.session_description.split(', '):
        if string.startswith('ON/OFF'):
            on_off = string.split(":")[-1]
            try:
                on_off = on_off.split('/')
            except: pass
            on_off = [eval(i) for i in on_off]
        if string.startswith('Visual'):
            vis_deg = eval(string.split(':')[-1])

    spikeTimes = nwbfile.units['spike_times'][:]
    psth = nwbfile.scratch['psth'][:] #[stimuli x reps x timebins x channels]
    [start_time_ms, stop_time_ms, tb_ms] = nwbfile.scratch['psth meta'][:] 
    # try:
    #     # stimulus presentation times, i.e. start of each trial
    #     stim_start_time_ms = nwbfile.intervals['trials']['start_time'][:] 
    #     stim_stop_time_ms  = nwbfile.intervals['trials']['stop_time'][:]
    #     assert 'ms' == nwbfile.intervals['trials']['unit'][:][0]
    # except:
    #     stim_start_time_ms = None
    #     stim_stop_time_ms = None
    
    # Adjusted from: brainio_contrib/mkgu_packaging/dicarlo/sanghavi/sanghavijozwik2020.py load_responses
    # and from sachiscripts/HVM_var6_subset.py load_responses

    # timebins = np.arange(start_time_ms, stop_time_ms, tb_ms)
    psth = psth.squeeze()

    timebase = np.arange(meta['start_time_ms'][()], meta['stop_time_ms'][()], meta['tb_ms'][()])
    t_cols = np.where((timebase >= 70) & (timebase < 170))[0]
    rate = np.mean(psth[:, :, t_cols, :], axis=2)
    
    print(timebins)
    # assembly = xr.DataArray(psth,
    #                         coords={'repetition': ('repetition', list(range(psth.shape[1]))),
    #                                 'time_bin_id': ('time_bin', list(range(psth.shape[2]))),
    #                                 'time_bin_start': ('time_bin', [x for x in timebins]),
    #                                 'time_bin_stop': ('time_bin', [x+tb_ms for x in timebins])
    #                                 },
    #                         dims=['image', 'repetition', 'time_bin', 'neuroid'])

    assembly = xr.DataArray(rate,
                                       coords={'repetition': ('repetition', list(range(rate.shape[1]))),
                                               'stimulus_id': ('image', list(range(rate.shape[0]))),
                                               'id': ('image', list(range(rate.shape[0])))},
                                       dims=['image', 'repetition', 'neuroid'])

    # Collapse dimensions 'image' and 'repetitions' into a single 'presentation' dimension
    assembly = assembly.stack(presentation=('image', 'repetition')).reset_index('presentation')
    assembly = assembly.drop('image')
    assembly = NeuronRecordingAssembly(assembly)
    assembly = assembly.transpose('presentation', 'neuroid')

    return assembly


        
psth_assembly = load_psth_as_assembly(nwbfile)


[-100.  -90.  -80.  -70.  -60.  -50.  -40.  -30.  -20.  -10.    0.   10.
   20.   30.   40.   50.   60.   70.   80.   90.  100.  110.  120.  130.
  140.  150.  160.  170.  180.  190.  200.  210.  220.  230.  240.  250.
  260.  270.  280.  290.  300.  310.  320.  330.  340.  350.  360.  370.
  380.  390.]


In [23]:
import xarray as xr
import numpy as np


class SessionNeuralData(object):
    def __init__(
            self,
            da_presentation: xr.DataArray,
            presentation_dim='presentation',
            neuroid_dim='neuroid',
            stimulus_id_coord='stimulus_id',
            timestamp_coord='unix_timestamp',
    ):

        """
        A class which wraps the raw spike count data, which is supplied as an xr.DataArray with dims:
            value: (presentation_dim, neuroid_dim)
        It has a mandatory coord:
            stimulus_id_coord: (presentation_dim)

        :param da_presentation:  xr.DataArray of spike counts, with dimensions (presentation_dim, neuroid_dim).
        """
        dim_set = {
            presentation_dim,
            neuroid_dim,
        }

        mandatory_coords = {
            stimulus_id_coord,
            timestamp_coord,
        }

        assert isinstance(da_presentation, xr.DataArray), f"da_presentation:{da_presentation}, Required type: xr.DataArray"

        assert set(da_presentation.dims) == dim_set, f"da_presentation.dims:{da_presentation.dims}, Required dimensions: {dim_set}"
        for coord in mandatory_coords:
            assert coord in da_presentation.coords, f"da_presentation.coords:{da_presentation.coords}, Required coordinates: {mandatory_coords}"

        assert set(da_presentation[stimulus_id_coord].dims) == {presentation_dim}, f"da_presentation[{stimulus_id_coord}].dims:{da_presentation[stimulus_id_coord].dims}, Required dimensions: {presentation_dim}"

        # Perform basic checks
        nan_entries = np.isnan(da_presentation).sum()
        negative_entries = (da_presentation < 0).sum()
        noninteger_entries = (np.mod(da_presentation, 1) != 0).sum()

        if nan_entries > 0:
            raise ValueError(f"da_presentation contains {nan_entries} NaN entries")
        if negative_entries > 0:
            raise ValueError(f"da_presentation contains {negative_entries} negative entries")
        if noninteger_entries > 0:
            raise ValueError(f"da_presentation contains {noninteger_entries} non-integer entries")

        # Rename dims to standard names
        presentation_dim_standard = 'presentation'
        neuroid_dim_standard = 'neuroid'
        stimulus_id_standard = 'stimulus_id'
        timestamp_coord_standard = 'unix_timestamp'

        da_presentation = da_presentation.rename(
            {
                presentation_dim: presentation_dim_standard,
                neuroid_dim: neuroid_dim_standard,
                stimulus_id_coord: stimulus_id_standard,
                timestamp_coord: timestamp_coord_standard,
            }
        )

        self.da_presentation = da_presentation
        self.presentation_dim = presentation_dim_standard
        self.neuroid_dim = neuroid_dim_standard
        self.stimulus_id_coord = stimulus_id_standard
        self.timestamp_coord = timestamp_coord_standard
        self.timestamp_start = float(np.min(da_presentation[self.timestamp_coord].values))

    @property
    def stimulus_id_to_da(self):
        if not hasattr(self, '_stimulus_id_to_da'):
            self._stimulus_id_to_da = {}
            for stimulus_id, da in self.da_presentation.groupby(self.stimulus_id_coord):
                self._stimulus_id_to_da[stimulus_id] = da.transpose(self.presentation_dim, self.neuroid_dim)

        return self._stimulus_id_to_da

da = psth_assembly
da = da.dropna('neuroid')
print(SessionNeuralData(da_presentation=da).stimulus_id_to_da)

AssertionError: da_presentation.dims:('presentation', 'neuroid', 'time_bin'), Required dimensions: {'presentation', 'neuroid'}

In [None]:
def load_responses(data_dir, stimuli):
    data_dir = data_dir / 'database'
    assert os.path.isdir(data_dir)
    psth = np.load(data_dir / 'solo.rsvp.bold5000.experiment_psth.npy')  # Shaped images x repetitions x time_bins x channels

    # Compute firing rate for given time bins
    timebins = [[70, 170], [170, 270], [50, 100], [100, 150], [150, 200], [200, 250], [70, 270]]
    photodiode_delay = 30  # Delay recorded on photodiode is ~30ms
    timebase = np.arange(-100, 381, 10)  # PSTH from -100ms to 380ms relative to stimulus onset
    assert len(timebase) == psth.shape[2]
    rate = np.empty((len(timebins), psth.shape[0], psth.shape[1], psth.shape[3]))
    for idx, tb in enumerate(timebins):
        t_cols = np.where((timebase >= (tb[0] + photodiode_delay)) & (timebase < (tb[1] + photodiode_delay)))[0]
        rate[idx] = np.mean(psth[:, :, t_cols, :], axis=2)  # Shaped time bins x images x repetitions x channels

    assembly = xr.DataArray(rate,
                            coords={'repetition': ('repetition', list(range(rate.shape[2]))),
                                    'time_bin_id': ('time_bin', list(range(rate.shape[0]))),
                                    'time_bin_start': ('time_bin', [x[0] for x in timebins]),
                                    'time_bin_stop': ('time_bin', [x[1] for x in timebins])},
                            dims=['time_bin', 'image', 'repetition', 'neuroid'])

    # Add neuroid related meta data
    neuroid_meta = pd.DataFrame(json.load(open(data_dir.parent / 'array-metadata' / 'mapping.json')))
    for column_name, column_data in neuroid_meta.iteritems():
        assembly = assembly.assign_coords(**{f'{column_name}': ('neuroid', list(column_data.values))})

    # Add stimulus related meta data
    for column_name, column_data in stimuli.iteritems():
        assembly = assembly.assign_coords(**{f'{column_name}': ('image', list(column_data.values))})

    # Collapse dimensions 'image' and 'repetitions' into a single 'presentation' dimension
    assembly = assembly.stack(presentation=('image', 'repetition')).reset_index('presentation')
    assembly = assembly.drop('image')
    assembly = NeuronRecordingAssembly(assembly)

    # Filter noisy electrodes
    psth = np.load(data_dir / 'solo.rsvp.bold5000.normalizer_psth.npy')
    t_cols = np.where((timebase >= (70 + photodiode_delay)) & (timebase < (170 + photodiode_delay)))[0]
    rate = np.mean(psth[:, :, t_cols, :], axis=2)
    normalizer_assembly = xr.DataArray(rate,
                                       coords={'repetition': ('repetition', list(range(rate.shape[1]))),
                                               'image_id': ('image', list(range(rate.shape[0]))),
                                               'id': ('image', list(range(rate.shape[0])))},
                                       dims=['image', 'repetition', 'neuroid'])
    for column_name, column_data in neuroid_meta.iteritems():
        normalizer_assembly = normalizer_assembly.assign_coords(
            **{f'{column_name}': ('neuroid', list(column_data.values))})
    normalizer_assembly = normalizer_assembly.stack(presentation=('image', 'repetition')).reset_index('presentation')
    normalizer_assembly = normalizer_assembly.drop('image')
    normalizer_assembly = normalizer_assembly.transpose('presentation', 'neuroid')
    normalizer_assembly = NeuronRecordingAssembly(normalizer_assembly)

    filtered_assembly = filter_neuroids(normalizer_assembly, 0.7)
    assembly = assembly.sel(neuroid=np.isin(assembly.neuroid_id, filtered_assembly.neuroid_id))
    assembly = assembly.transpose('presentation', 'neuroid', 'time_bin')

    # Add other experiment and data processing related info
    assembly.attrs['image_size_degree'] = 8
    assembly.attrs['stim_on_time_ms'] = 100

    return assembly

In [9]:
# Adjusted from: brainio_contrib/mkgu_packaging/dicarlo/sanghavi/sanghavijozwik2020.py load_responses

timebins = np.arange(start_time_ms, stop_time_ms, tb_ms)
assembly = xr.DataArray(psth,
                        coords={'repetition': ('repetition', list(range(psth.shape[1]))),
                                'time_bin_id': ('time_bin', list(range(psth.shape[2]))),
                                'time_bin_start': ('time_bin', [x for x in timebins]),
                                'time_bin_stop': ('time_bin', [x+tb_ms for x in timebins])
                                },
                        dims=['image', 'repetition', 'time_bin', 'neuroid'])
 # Collapse dimensions 'image' and 'repetitions' into a single 'presentation' dimension
assembly = assembly.stack(presentation=('image', 'repetition')).reset_index('presentation')
assembly = assembly.drop('image')
assembly = NeuronRecordingAssembly(assembly)
assembly = assembly.transpose('presentation', 'neuroid', 'time_bin')

# Add other experiment and data processing related info
assembly.attrs['image_size_degree'] = vis_deg
assembly.attrs['stim_on_time_ms']   = on_off[0]

assembly
