# Converting Local Data to NWB

This notebook steps through converting data to NWB format. 

Note that this notebook doesn't use the metadata files.

In [None]:
%config Completer.use_jedi = False

In [None]:
import numpy as np

from pynwb import NWBFile, TimeSeries, ProcessingModule
from pynwb.file import Subject
from pynwb.behavior import Position, CompassDirection
from pynwb.ecephys import ElectricalSeries

In [None]:
# Import local code module
import sys
sys.path.append('..')
from conv import Paths
from conv.io import (get_files, make_session_name,
                     load_config, load_task_object, open_h5file, save_nwbfile)
from conv.utils import incrementer, get_current_date, convert_time_to_date, validate_nwb

### Settings

In [None]:
# Run settings
reset_time = True
drop_before_task = True

In [None]:
# Define experiment name
experiment = 'experiment'

In [None]:
# Define subject information
subject = 'example'
session = 'session_0'

In [None]:
# Define project path
project_path = ''

In [None]:
# Define project paths object
paths = Paths(project_path)

## Load Files

In [None]:
# Define session name
session_name = make_session_name(subject, experiment, session)
session_name

In [None]:
# Load behavior data
task = load_task_object(session_name, folder='example_files/')
assert task

In [None]:
# Load the metadata file
metadata = load_config(session_name, folder='example_files/')
assert metadata

In [None]:
# Create a an electrodes object
electrodes = Electrodes()

# Add dummy data to electrodes object
bundle_names = ['b1', 'b2']
bundle_locations = ['place1', 'place2']
electrodes.add_bundles(bundle_names, bundle_locations)

In [None]:
# Get a list of the available spike files
spike_files = get_files(paths.spikes, 'cells')
assert len(spike_files)

## Setup

In [None]:
# Initialize notes
notes = None

In [None]:
# Get session start time
session_date = convert_time_to_date(task.session['start_time'] / 1000)
session_date

In [None]:
# Reset task time stamps to start at the session start time
if reset_time:
    task = update_task_time(task, 'offset', offset=task.session['start'])
    notes = 'The exact subtracted timestamp is: {}'.format(task.info['time_offset'])

## Initialize a NWB File

Set up the file.

### Define Subject Information

In [None]:
# Create subject object
subject = Subject(age=metadata['subject']['age'] if metadata['subject']['age'] != 'XX' else None,
                  sex=metadata['subject']['sex'] if metadata['subject']['sex'] != 'XX' else None,
                  description=metadata['subject']['description'], 
                  species=metadata['subject']['species'],
                  subject_id=subj)

### Define Recording Metadata

In [None]:
# Define information collected from task object
experiment_description = \
    'Task: ' + task.experiment['version']['label'] + \
    ' build-' + task.experiment['version']['number'] + \
    ' ({})'.format(task.experiment['language'])

# Define the source script as this notebook
source_file_name = 'notebooks/01-ConvertToNWB'

In [None]:
# Define collection site information
if subj[0:2] == 'XX':
    data_collection = 'XX'
else:
    data_collection = 'unknown'

### Collect together into NWB file

In [None]:
# Initialize a NWB file
nwbfile = NWBFile(session_description=metadata['study']['session_description'],
                  identifier=session_name,
                  session_start_time=session_date,
                  file_create_date=get_current_date(),
                  experimenter=metadata['study']['experimenter'],
                  experiment_description=experiment_description,
                  session_id=session_name,
                  institution=metadata['study']['institution'],
                  keywords=metadata['study']['keywords'],
                  notes=notes,
                  source_script=metadata['study']['source_script'],
                  source_script_file_name=source_file_name,
                  data_collection=metadata['study']['data_collection'],
                  stimulus_notes=metadata['study']['stimulus_notes'],
                  lab=metadata['study']['lab'],
                  subject=subject)

## Recording Definition

### Device(s)

In [None]:
# Create device object
device = nwbfile.create_device(metadata['device']['device_name'],
                               metadata['device']['device_description'],
                               metadata['device']['device_manufacturer'])

In [None]:
# Check out the defined device
device

### Electrodes

In [None]:
# Add electrode bundles and electrode information
for bundle_name, bundle_location in electrodes:
    
    # Create an electrode group for the current bundle
    electrode_group = nwbfile.create_electrode_group(name=bundle_name,
                                                     description=metadata['device']['bundle_description'],
                                                     location=bundle_location,
                                                     device=device)
    
    # Add electrodes to file for the current bundle
    for electrode_ind in range(electrodes.n_electrodes_per_bundle):
        nwbfile.add_electrode(location=electrode_group.location,
                              group=electrode_group,
                              id=electrode_ind, enforce_unique_id=False)

In [None]:
# Check the electrodes table
nwbfile.electrodes.to_dataframe()

## Stimuli

Add stimulus information here.

Depending on the task, this could included using:
- nwbfile.stimulus (with `add_stimulus`), which can contain stimuli such as images
- nwbfile.acquisition (with `add_acquisition`), which can contain information such as positions

In [None]:
# Add stimuli information to file, as NWB stimulus objects
#   In this case, `add_stimulus` expect to add a series of TimeSeries objects - could be images, etc
stimuli = ... # Load or define stimuli (load might want to move to top)
for stim in stimuli:
    nwbfile.add_stimulus(stim)
# AND/OR
# Add stimulus position information
stimuli = Position(name='stimuli')
stimuli.create_spatial_series(name='stimulus_positions',
                              data=task.stimuli['...'],
                              unit='virtual units',
                              reference_frame='corner',
                              rate=0.,
                              description=metadata['stimulus']['position'])
nwbfile.add_acquisition(stimuli)

## Behaviour data

### Trial Data

In [None]:
# Add trial event definitions
for event, description in metadata['events'].items():
    nwbfile.add_trial_column(event, description)

In [None]:
# Add event information to NWB file
for t_ind in range(len(task.trial['trial'])):
    
    # Add trial information to file
    nwbfile.add_trial(start_time=...,
                      ...,
                      stop_time=...
                     )

In [None]:
# Check the trial information as a dataframe
behav = nwbfile.trials.to_dataframe()
behav.head()

### Position Data

In [None]:
# Define and add the boundary definitions
boundaries = Position(name='boundaries')
boundaries.create_spatial_series(name='center',
                                 data=np.array([task.environment['...'],
                                                task.environment['...']]),
                                 unit='virtual units',
                                 reference_frame='corner',
                                 rate=0.,
                                 description=metadata['position']['center'])
boundaries.create_spatial_series(name='x_range',
                                 data=[task.environment['...'],
                                       task.environment['...']],
                                 unit='virtual units',
                                 reference_frame='corner',
                                 rate=0.,
                                 description=metadata['position']['x_range'])
boundaries.create_spatial_series(name='z_range',
                                 data=[task.environment['...'],
                                       task.environment['...']],
                                 unit='virtual units',
                                 reference_frame='corner',
                                 rate=0.,
                                 description=metadata['position']['x_range'])
nwbfile.add_acquisition(boundaries)

In [None]:
# Set position data as a spatial series and add to NWB file
position = Position(name='position')
position.create_spatial_series(name='player_position',
                               data=np.vstack([task.pos['x'], task.pos['z']]),
                               unit='virtual units',
                               timestamps=task.position['time'],
                               reference_frame='XX',
                               description=metadata['position']['player_position'])
nwbfile.add_acquisition(position)

In [None]:
# Set head direction information as a compass direction and add to NWB file
heading = CompassDirection(name='heading')
heading.create_spatial_series(name='direction',
                                     data=task.head_direction['degrees'],
                                     unit='degrees',
                                     timestamps=task.head_direction['time'],
                                     reference_frame='north',
                                     description=metadata['position']['heading'])
nwbfile.add_acquisition(heading)

#### Add position derived measures as ProcessingModule

In [None]:
# Create time series for speed & linear positon
speed = TimeSeries(name='speed',
                   data = task.position['speed'],
                   unit = 'virtual units / second',
                   timestamps=task.position['time'],
                   description=metadata['position']['speed'])

In [None]:
# Add derived spatial measures to NWB file as ProcessingModule
position_things = ProcessingModule(name='position_measures',
                                   data_interfaces=[speed],
                                   description=metadata['position']['position_measures'])
nwbfile.add_processing_module(position_things)

## Spiking Data

In [None]:
# Define some sorting metadata
description = "Spike sorting solutions - done with {} by {}.".format(\
    metadata['sorting']['sorter'], metadata['sorting']['done_by'])

In [None]:
# Initialize the units data, with given description
nwbfile.units = Units('units', description=description)

In [None]:
# Add unit metadata columns
for field, description in metadata['units'].items():
    nwbfile.add_unit_column(field, description)

In [None]:
# Add each unit to the NWB file
ind = incrementer()
for spike_file in spike_files:
    
    # Get channel information from file name
    channel = ...
    
    # Load spike file & get spike data (example for HDF5 files)
    with open_h5file(spike_file, paths.spikes) as h5file:
        
        spike_data = h5file['spike_data_sorted']
        spike_times = spike_data['spike_times'][:]
        spike_clusters = spike_data['spike_clusters'][:]
        spike_waveforms = spike_data['spike_waveforms'][:]
        
    # If task information has been offset, apply the same to spike times
    if task.status['time_reset']:
        spike_times = spike_times - task.info['time_offset']

    # Loop across clusters within the file, and add each unit
    for cluster in set(spike_clusters):
        mask = spike_clusters == cluster

        # Get the spike times for the cluster
        unit_spike_times = spike_times[mask]
        if SETTINGS['DROP_BEFORE_TASK']:
            unit_spike_times = unit_spike_times[unit_spike_times >= task.session['start_time']]

        # Get the average waveform
        unit_waveform_mean = np.mean(spike_waveforms[mask, :], 0)

        # Add unit data
        nwbfile.add_unit(id=next(ind),
                         electrodes=[0],
                         channel=channel,
                         location=...,
                         waveform_mean=unit_waveform_mean,
                         spike_times=unit_spike_times)

In [None]:
# Check the units table
nwbfile.units.to_dataframe().head()

## Field Data

In [None]:
# Create the electrode table
electrode_table_region = nwbfile.create_electrode_table_region([0], 'xx')

In [None]:
# Get the list of available LFP files
lfp_files = get_files(full_path / 'lfp', select='.p')

In [None]:
# Add each LFP trace as a new object
for ind, lfp_file in enumerate(lfp_files):
    with open(full_path / 'lfp' / lfp_file, 'rb') as pfile:
        
        # Load ephys data
        #ephys_data = load(...)
        
        # Create & add electrical series to store LFP data
        ephys_ts = ElectricalSeries('field_data_' + str(ind),
                                    ephys_data,
                                    electrode_table_region,
                                    starting_time=0.,
                                    rate=500.,
                                    resolution=np.inf,
                                    comments="...",
                                    description="LFP time series.")
        nwbfile.add_acquisition(ephys_ts)

## Data Checks

Check NWBfile for consistency.

In [None]:
...

### Save out local data file

In [None]:
# # Save out an example NWB file
# save_nwbfile(nwbfile, 'example_files/')

In [None]:
# # Validate the saved out NWB file
# validate_nwb('example_files/nwb_local_data', verbose=True)