# Converting Local Data to NWB

This notebook steps through converting data to NWB format. 

Note that this notebook doesn't use the metadata files.

In [1]:
%config Completer.use_jedi = False

In [2]:
from pathlib import Path

from datetime import datetime
from dateutil.tz import tzlocal

import h5py
import joblib
import numpy as np
import pandas as pd

from pynwb import NWBFile, NWBHDF5IO, TimeSeries, ProcessingModule
from pynwb.file import Subject
from pynwb.behavior import Position, SpatialSeries
from pynwb.ecephys import ElectricalSeries, SpikeEventSeries

In [3]:
# Import local code module
import sys
sys.path.append('..')
from conv.utils import clean_strings, get_event_time
from conv.io import get_files

### Settings

In [4]:
# Define subject information
subj = 'example'
session = 'session_0'

In [5]:
# Define the base data folder
data_folder = Path('...')

# Define the full subject & session path
full_path = data_folder / subj / session

In [6]:
# Load behavior data
task = load_task_obj(full_path / 'behavioral' / 'task.p')

In [7]:
# Get current date
current_date = datetime.now(tzlocal())

## Initialize a NWB File

Set up the file.

### Define Recording Metadata

In [8]:
# Define metadata for NWB file
session_description = 'XX'
experiment_description = 'Example conversion to NWB format.'
identifier = 'XX'
experimenter = 'XX'
lab = 'U01 Group'
institution = 'Columbia'
session_id = '001'

In [9]:
# Define collection site information
if subj[0:2] == 'XX':
    data_collection = 'XX'
else:
    data_collection = 'unknown'

### Define Subject Information

In [10]:
# Set subject information
age = None
sex = None
description = None
species = 'human'
subject_id = None

In [11]:
# Create subject object
subject = Subject(age=age,
                  sex=sex,
                  description=description, 
                  species=species,
                  subject_id=subject_id)

### Collect together into NWB file

In [12]:
# Initialize a NWB file
nwbfile = NWBFile(session_description=session_description,
                  identifier=identifier,
                  file_create_date=current_date,
                  session_start_time=current_date,
                  experimenter=experimenter,
                  lab=lab,
                  institution=institution,
                  data_collection=data_collection,
                  experiment_description=experiment_description,
                  subject=subject,
                  session_id=session_id)

## Recording Definition

### Device(s)

In [13]:
# Device information
device_name = 'RECORDING DEVICE'
device_desc = 'RECORDING DEVICE DESCRIPTION'
device_manu = 'RECORDING DEVICE MANUFACTURER'

# Create device object
device = nwbfile.create_device(device_name, device_desc, device_manu)

In [14]:
# Check out the defined device
device

RECORDING DEVICE pynwb.device.Device at 0x140417075578864
Fields:
  description: RECORDING DEVICE DESCRIPTION
  manufacturer: RECORDING DEVICE MANUFACTURER

### Electrodes

In [15]:
# Add electrode description
location = 'WHERE'
electrode_name = '{}-microwires-{}'.format('A', 'chnum')
description = "Behnke Fried/Micro Inner Wire Bundle....ADD DETAILS."

# Add electrode group
electrode_group = nwbfile.create_electrode_group(electrode_name,
                                                 description=description,
                                                 location=location,
                                                 device=device)

In [16]:
# Define / get electrode information
x_pos, y_pos, z_pos = 0.0, 0.0, 0.0
imp = np.nan
location = 'place'
filtering = '0, np.inf'
reference = None

In [17]:
# Add electrode to NWB
n_electrodes = 5
for ind in range(n_electrodes):
    nwbfile.add_electrode(x_pos, y_pos, z_pos, imp, location, filtering, electrode_group, 
                          id=ind, reference=reference)

## Stimuli

Add stimuli of interest to the NWB file.

In [20]:
# Add stimuli
stim_description = 'DESCRIPTION.'
for stim in stimuli:
    nwbfile.add_stimulus(stim)

## Behaviour data

### Trial Data

In [22]:
# Define trial information
nwbfile.add_trial_column('...', '...')

In [23]:
# Collect trial indices
trial_inds = ...

In [24]:
# Get all trial start & stop times
trial_start_times = times[np.hstack([np.array([0]), trial_inds[:-1]])]
trial_stop_times = times[trial_inds-1]

In [30]:
# Add event information to NWB file
n_trials = len(trial_inds)
for t_ind in range(n_trials):
    
    # Get trial start and end times
    t_start = trial_start_times[t_ind]
    t_end = trial_stop_times[t_ind]
    
    # Add trial information to file
    nwbfile.add_trial(start_time=t_start,
                      ...,
                      stop_time=t_end
                     )

In [31]:
# Check the trial information as a dataframe
#nwbfile.trials.to_dataframe()

### Position Data

In [32]:
# Get location data
loc_data = np.vstack([task.pos['x'], task.pos['z']])

In [33]:
# Set position data as a spatial series and add to NWB file
position = Position(name='position')
position.create_spatial_series(name='xy_position',
                               data=loc_data,
                               timestamps=times,
                               reference_frame='middle',
                               description='XY position of the subject.')
nwbfile.add_acquisition(position)

#### Add position derived measures as ProcessingModule

In [34]:
# Create time series for speed & linear positon
speed = TimeSeries(name='speed',
                   data = np.array(task.pos['speed']),
                   unit = 'virtual units / second',
                   timestamps=times)

In [35]:
# Add derived spatial measures to NWB file as ProcessingModule
position_things = ProcessingModule(name='position_measures',
                                   description='Derived measures related to position data.',
                                   data_interfaces=[speed])
nwbfile.add_processing_module(position_things)

position_measures pynwb.base.ProcessingModule at 0x140416986573600
Fields:
  data_interfaces: {
    linear_position <class 'pynwb.base.TimeSeries'>,
    speed <class 'pynwb.base.TimeSeries'>
  }
  description: Derived measures related to position data.

## Spiking Data

In [36]:
# Get a list of the available spike files
spike_files = get_files(full_path / 'split_files')

In [37]:
# Specify additional metadata columns for units
nwbfile.add_unit_column('channel', 'The recording channel of this unit.')
nwbfile.add_unit_column('location', 'The anatomical location of this unit.')

In [38]:
# Add each unit to the NWB file
for ind, spike_file in enumerate(spike_files):

    # Get channel information from file name
    channel = spike_file.split('.')[0].split('_')[-1]
    
    # Load spike file & get spike data
    # NOTE: currently loads HDF5 file - update as needed
    with h5py.File(full_path / 'split_files' / spike_file, 'r') as h5file:
        spike_data = h5file['spike_data_sorted']
    
        # Add unit data
        nwbfile.add_unit(id=ind,
                         electrodes=[0],
                         channel=channel,
                         waveform_mean=np.mean(spike_data['spikeWaveforms'][:], 0),
                         spike_times=spike_data['spikeTimes'][:])

## Field Data

In [40]:
# Create the electrode table
electrode_table_region = nwbfile.create_electrode_table_region([0], 'xx')

In [41]:
# Get the list of available LFP files
lfp_files = get_files(full_path / 'micro_lfp', select='.p')

In [42]:
# Add each LFP trace as a new object
for ind, lfp_file in enumerate(lfp_files):
    with open(full_path / 'micro_lfp' / lfp_file, 'rb') as pfile:
        
        # Load ephys data
        #ephys_data = load(...)
        
        # Create & add electrical series to store LFP data
        ephys_ts = ElectricalSeries('field_data_' + str(ind),
                                    ephys_data,
                                    electrode_table_region,
                                    starting_time=0.,
                                    rate=500.,
                                    resolution=np.inf,
                                    comments="...",
                                    description="LFP time series.")
        nwbfile.add_acquisition(ephys_ts)

### Save out local data file

In [44]:
# Save out an example NWB file
with NWBHDF5IO('nwb_local_data.nwb', 'w') as io:
    io.write(nwbfile)