# Converting Local Data to NWB

This notebook steps through converting data to NWB format. 

Note that this notebook doesn't use the metadata files.

In [None]:
%config Completer.use_jedi = False

In [None]:
from pathlib import Path

import h5py
import joblib
import numpy as np
import pandas as pd

from pynwb import NWBFile, TimeSeries, ProcessingModule
from pynwb.file import Subject
from pynwb.behavior import Position, SpatialSeries
from pynwb.ecephys import ElectricalSeries, SpikeEventSeries

In [None]:
# Import local code module
import sys
sys.path.append('..')
from conv.io import get_files, load_task_object, session_name, save_nwbfile
from convth.utils import incrementer, get_current_date, convert_time_to_date
from convth.validate import validate_nwb

### Settings

In [None]:
# Run settings
reset_time = True
drop_before_task = True

In [None]:
# Define subject information
subj = 'example'
session = 'session_0'

In [None]:
# Define the base data folder
data_folder = Path('...')

# Define the full subject & session path
full_path = data_folder / subj / session

## Load Files

In [None]:
# Define session name
session_name = make_session_name(subj, session)
session_name

In [None]:
# Load behavior data
task = load_task_object(full_path / 'behavioral' / 'task.p')

## Setup

In [None]:
# Initialize notes
notes = None

In [None]:
# Get session start time
session_date = convert_time_to_date(task.session['start_time'] / 1000)
session_date

In [None]:
# Reset task time stamps to start at the session start time
if reset_time:
    task = update_task_time(task, 'offset', offset=task.session['start'])
    notes = 'The exact subtracted timestamp is: {}'.format(task.time_offset)

## Initialize a NWB File

Set up the file.

### Define Subject Information

In [None]:
# Set subject information
age = None
sex = None
description = 'participant'
species = 'human'
subject_id = 'test'

In [None]:
# Create subject object
subject = Subject(age=age,
                  sex=sex,
                  description=description, 
                  species=species,
                  subject_id=subject_id)

### Define Recording Metadata

In [None]:
# Define metadata for NWB file
session_description = 'XX'
identifier = 'XX'
experimenter = ['Experimenter1', 'Experimenter2']
experiment_description = 'Experiment information - example.'
session_name = 'testfile'
institution = 'Columbia'
keywords = ['keyword1', 'keyword2']
source_script='https://github.com/JacobsSU/AnalyzeTH/'
source_script_file_name='01-ConvertToNWB.ipynb'
stimulus_notes = 'Notes on stimulus presentation.'
lab = 'Jacobs Lab'

In [None]:
# Define information collected from task object
experiment_description = \
    'Task: ' + task.experiment['version']['label'] + \
    ' build-' + task.experiment['version']['number'] + \
    ' ({})'.format(task.experiment['language'])

In [None]:
# Define collection site information
if subj[0:2] == 'XX':
    data_collection = 'XX'
else:
    data_collection = 'unknown'

### Collect together into NWB file

In [None]:
# Initialize a NWB file
nwbfile = NWBFile(session_description=session_description,
                  identifier=session_name,
                  session_start_time=session_date,
                  file_create_date=get_current_date(),
                  experimenter=experimenter,
                  experiment_description=experiment_description,
                  session_id=session_name,
                  institution=institution,
                  keywords=keywords,
                  notes=notes,
                  source_script=source_script,
                  source_script_file_name=source_script_file_name,
                  data_collection=data_collection,
                  stimulus_notes=stimulus_notes,
                  lab=lab,
                  subject=subject)

## Recording Definition

### Device(s)

In [None]:
# Device information
device_name = 'RECORDING DEVICE'
device_desc = 'RECORDING DEVICE DESCRIPTION'
device_manu = 'RECORDING DEVICE MANUFACTURER'

# Create device object
device = nwbfile.create_device(device_name, device_desc, device_manu)

In [None]:
# Check out the defined device
device

### Electrodes

In [None]:
bundle_description = 'BUNDLE DESCRIPTION'

In [None]:
# Add electrode bundles and electrode information
for bundle_name, bundle_location in electrodes:
    
    # Create an electrode group for the current bundle
    electrode_group = nwbfile.create_electrode_group(name=bundle_name,
                                                     description=bundle_description,
                                                     location=bundle_location,
                                                     device=device)
    
    # Add electrodes to file for the current bundle
    for electrode_ind in range(electrodes.n_electrodes_per_bundle):
        nwbfile.add_electrode(location=electrode_group.location,
                              group=electrode_group,
                              id=electrode_ind, enforce_unique_id=False)


In [None]:
# Check the electrodes table
nwbfile.electrodes.to_dataframe()

## Stimuli

Add stimuli of interest to the NWB file.

In [None]:
# Add stimuli
stim_description = 'DESCRIPTION.'
for stim in stimuli:
    nwbfile.add_stimulus(stim)

## Behaviour data

### Trial Data

In [None]:
# Define trial information
nwbfile.add_trial_column('...', '...')

In [None]:
# Collect trial indices
trial_inds = ...

In [None]:
# Add event information to NWB file
n_trials = len(trial_inds)
for t_ind in range(len(task.trial['trial'])):
    
    # Add trial information to file
    try:
        nwbfile.add_trial(start_time=...,
                          ...,
                          stop_time=...
                         )
    except IndexError:
        print('Incomplete last trial - skipped adding.')

In [None]:
# Check the trial information as a dataframe
behav = nwbfile.trials.to_dataframe()
behav.head()

### Position Data

In [None]:
# Set position data as a spatial series and add to NWB file
position = Position(name='position')
position.create_spatial_series(name='player_position',
                               data=np.vstack([task.pos['x'], task.pos['z']]),
                               unit='virtual units',
                               timestamps=task.position['time'],
                               reference_frame='XX',
                               description='Position of the subject.')
nwbfile.add_acquisition(position)

In [None]:
# Set head direction information as a compass direction and add to NWB file
heading = CompassDirection(name='heading')
heading.create_spatial_series(name='direction',
                                     data=task.head_direction['degrees'],
                                     unit='degrees',
                                     timestamps=task.head_direction['time'],
                                     reference_frame='north',
                                     description="The direction the subject's head is pointing.")
nwbfile.add_acquisition(heading)

#### Add position derived measures as ProcessingModule

In [None]:
# Create time series for speed & linear positon
speed = TimeSeries(name='speed',
                   description='The players movement speed, computed from the position data.',
                   data = task.position['speed'],
                   unit = 'virtual units / second',
                   timestamps=task.position['time'])

In [None]:
# Add derived spatial measures to NWB file as ProcessingModule
position_things = ProcessingModule(name='position_measures',
                                   description='Derived measures related to position data.',
                                   data_interfaces=[speed])
nwbfile.add_processing_module(position_things)

## Spiking Data

In [None]:
# Get a list of the available spike files
spike_files = get_files(full_path / 'spikes')

In [None]:
# Specify additional metadata columns for units
nwbfile.add_unit_column('channel', 'The recording channel of this unit.')
nwbfile.add_unit_column('location', 'The anatomical location of this unit.')

In [None]:
# Add each unit to the NWB file
unit_ind = incrementer()
for spike_file in spike_files:
    
    # Load spike file & get spike data
    # NOTE: currently loads HDF5 file - update as needed
    with h5py.File(full_path / 'spikes' / spike_file, 'r') as h5file:
        spike_data = h5file['spike_data_sorted']
        
        # If task information has been offset, apply the same to spike times
        if task.time_reset:
            #spike_times = spike_times - task.time_offset
            ...

        # If set to drop before task, remove spikes before session start time
        if drop_before_task:
            #spike_times = spike_times[spike_times >= task.session['start']]
            ...
    
        # Add unit data
        nwbfile.add_unit(id=ind,
                         electrodes=[0],
                         channel=...,
                         location=...
                         spike_times=...)

## Field Data

In [None]:
# Create the electrode table
electrode_table_region = nwbfile.create_electrode_table_region([0], 'xx')

In [None]:
# Get the list of available LFP files
lfp_files = get_files(full_path / 'lfp', select='.p')

In [None]:
# Add each LFP trace as a new object
for ind, lfp_file in enumerate(lfp_files):
    with open(full_path / 'lfp' / lfp_file, 'rb') as pfile:
        
        # Load ephys data
        #ephys_data = load(...)
        
        # Create & add electrical series to store LFP data
        ephys_ts = ElectricalSeries('field_data_' + str(ind),
                                    ephys_data,
                                    electrode_table_region,
                                    starting_time=0.,
                                    rate=500.,
                                    resolution=np.inf,
                                    comments="...",
                                    description="LFP time series.")
        nwbfile.add_acquisition(ephys_ts)

## Data Checks

Check NWBfile for consistency.

In [None]:
...

### Save out local data file

In [None]:
# # Save out an example NWB file
# save_nwbfile(nwbfile, 'nwb_local_data')

In [None]:
# # Validate the saved out NWB file
# validate_nwb('nwb_local_data', verbose=True)