## Convert patchmaster .dat to .nwb form at and add metadata and experimental details

### Import modules and setup filenames


In [1]:
# import modules
import numpy as np
import os
import pandas as pd
import pynwb
import roman
import sys
import uuid

from datetime import datetime
from dateutil.tz import tzlocal
from pynwb import NWBHDF5IO, NWBFile
from pynwb.file import Subject

## add path to parent directory to import conversion module
sys.path.append('..') 
from src.x_to_nwb import convert

In [2]:
# set files to look at for testing
cell_id = 'ASH189' # neuron type + recording number, should be a unique identifier within this dataset

# TO DO - eventually want to be able to loop through all the cells in the metadata spreadsheet

### Define functions

In [3]:
def import_recording_metadata(fname, cell_id, start_row):
    '''
    import_metadata: gets all the metadata from a specific recording with the input cell id 
    '''
    #import excel file
    df = pd.read_excel(fname_metadata, skiprows = range(start_row), header=[0,1], sheet_name='Recordings-MetaData')

    #clean up the column names
    colname_top = df.columns.get_level_values(level=0).str.replace('Unnamed.*','',regex=True)
    colname_bottom = df.columns.get_level_values(level=1).str.replace('Unnamed.*','',regex=True)
    df.columns = [colname_top, colname_bottom]
    df.columns = df.columns.map(''.join)
    
    #select the row containing the relevant experiment info
    exp_info = df.loc[df['Cell ID'] == cell_id]

    return exp_info

In [4]:
def import_subject_metadata(fname, exp_info):
    '''
    import_subject_metadata: gets subject info like genotype, worm dimensions, id
    and exports as dict for input into the nwb file
    '''
    #get cell id
    cell_id = exp_info['Cell ID'].values[0]
    cell_type = exp_info['Cell'].values[0]
    
    #get genotype using strain ID
    strain = exp_info['Strain'].values[0]
    df = pd.read_excel(fname, sheet_name='StrainsdB')
    genotype = df['Genotype'][df['Strain'] == strain].values[0]
    
    #get worm dimensions
    vals = exp_info.filter(like='µm').values[0]
    keys = ['length (µm)','width (µm)','area (µm)']
    dims = dict(zip(keys,vals))
        
    #compile description
    descript = {**{'cell type': cell_type}, **dims}
    nan_vals = ['no data','nd']
    for k in descript:
        if descript[k] in nan_vals:
            descript[k] = 'nan'

    subject_metadata = {'subject_id': cell_id, 'description': str(descript), 'genotype': genotype}
    
    return subject_metadata

In [5]:
def get_ingredients(fname, rec_data, sheet_name, col_name):
    #import values for experiment
    df = pd.read_excel(fname, header=[0,1], sheet_name=sheet_name)
    solution = rec_data[col_name].values[0]

    #get ingredients list for each solution
    if solution == 'np':
        ingredients = 'no fast perfusion, gravity fed'
    elif solution not in df:
        ingredients = solution
    else:
        ingredients = dict(zip(df[solution]['Ingredients'], df[solution]['Molarity']))

    #output result
    return ingredients

In [6]:
def import_solution_metadata(fname, rec_data):
    sol_int = get_ingredients(fname, rec_data, 'I-SolutionsdB', 'I-soln')
    sol_ext_ctl = get_ingredients(fname, rec_data, 'E-SolutionsdB', 'E-soln-ctl')
    sol_ext_exp = get_ingredients(fname, rec_data, 'E-SolutionsdB', 'E-soln-exp')
    
    solution_metadata = {'Internal solution': sol_int, 
                         'External solution - control': sol_ext_ctl,
                         'External solution - experimental': sol_ext_exp}
    
    return solution_metadata

In [7]:
def get_dat_filename(date_raw):
    '''
    get_dat_filename: converts string in dd-mm-yy format with the month stored
    as roman numerals into yy_mm_dd format with all integer values 
    '''    
    # convert roman numerals to string
    date_split = date_raw.split('-') 
    date_split[1] = str(roman.fromRoman(date_split[1]))
    
    #convert to be yy-mm-dd format for .dat files
    date = datetime.strptime(' '.join(date_split), "%d %m %y").strftime("%y-%m-%d")
    
    return date

### Compile metadata

In [8]:
# load the metadata spreadsheet
fname_metadata = '..//test_data//ASH-metadata_12_III_29.xls'
rec_data = import_recording_metadata(fname_metadata, cell_id, start_row=13)

# get metadata details
subj_data = import_subject_metadata(fname_metadata, rec_data)
sol_data = import_solution_metadata(fname_metadata, rec_data)

# save all metadata fields
all_metadata = {'species': 'C. elegans',
                'genotype': subj_data['genotype'],
                'subject_id': subj_data['subject_id'],
                'subject_description': subj_data['description'],
                'experiment_description':'intracellular whole-cell patch clamp recordings in ASH neurons of C. elegans',
                'session_description': 'PLACEHOLDER',
                'lab': 'Goodman Lab',
                'institution': 'Stanford University',
                'protocol': 'similar to Goodman et al, 1998 Neuron, doi: 10.1016/s0896-6273(00)81014-4', # TO DO - include more details?
                'related_publications': 'Geffeney et al, Neuron 2011, doi: 10.1016/j.neuron.2011.06.038'
               }

# TO DO - add electrode details?
# TO DO - add field for mechanical stimuluation for relevant files?
# TO DO - add more details to protocol? or to stimulus traces?
    # start with ct-ivq 'on cell'
    # followed by ct-ivq 'whole cell'
    # average traces from first IVq subtracted from averaged traces of second IVq to remove capacitance artifacts
    # ct protocol (capacity transient) is a series of +10 mV and -10 mV voltage pulses to measure capacitance and to estimate series resistance.

### Generate nwb file

In [9]:
# get file name of raw data
rec_date = get_dat_filename(rec_data['Recording Date'].values[0])

# check that file exists
fname_dat = f"..//test_data//{rec_date}.dat"
if not os.path.isfile(fname_dat):
    rec_date = rec_date.replace('-','_')
    fname_dat = f"..//test_data//{rec_date}.dat"

if not os.path.isfile(fname_dat):
    raise ValueError(f"The file {fname_dat }does not exist")

# run converter - TO DO
convert(fname_dat, overwrite=True, existingNWBData=all_metadata)





Increment mode Alternate is not supported.
Can not yet recreate stimset Miv_VC
Can not yet recreate stimset Miv_VC
Can not yet recreate stimset Miv_VC
Can not yet recreate stimset Miv_VC
Can not yet recreate stimset Miv_VC
Can not yet recreate stimset Miv_VC
Can not yet recreate stimset Miv_VC
Can not yet recreate stimset Miv_VC
Can not yet recreate stimset Miv_VC
Can not yet recreate stimset Miv_VC
Can not yet recreate stimset Miv_VC
Increment mode Alternate is not supported.
Can not yet recreate stimset Miv_VC
Can not yet recreate stimset Miv_VC
Can not yet recreate stimset Miv_VC
Can not yet recreate stimset Miv_VC
Can not yet recreate stimset Miv_VC
Can not yet recreate stimset Miv_VC
Can not yet recreate stimset Miv_VC
Can not yet recreate stimset Miv_VC
Can not yet recreate stimset Miv_VC
Can not yet recreate stimset Miv_VC
Can not yet recreate stimset Miv_VC
Increment mode Alternate is not supported.
Can not yet recreate stimset Miv_VC




'..//test_data//08-10-30.nwb'

### Inspect the final nwb file

In [10]:
# load the existing nwb files
fname_nwb = f"..//test_data//{rec_date}-1.nwb"
io = NWBHDF5IO(fname_nwb, 'r')
nwbfile = io.read()
print(nwbfile)

root pynwb.file.NWBFile at 0x140539949737296
Fields:
  acquisition: {
    index_0000 <class 'pynwb.icephys.VoltageClampSeries'>,
    index_0001 <class 'pynwb.icephys.VoltageClampSeries'>,
    index_0002 <class 'pynwb.icephys.VoltageClampSeries'>,
    index_0003 <class 'pynwb.icephys.VoltageClampSeries'>,
    index_0004 <class 'pynwb.icephys.VoltageClampSeries'>,
    index_0005 <class 'pynwb.icephys.VoltageClampSeries'>,
    index_0006 <class 'pynwb.icephys.VoltageClampSeries'>,
    index_0007 <class 'pynwb.icephys.VoltageClampSeries'>,
    index_0008 <class 'pynwb.icephys.VoltageClampSeries'>,
    index_0009 <class 'pynwb.icephys.VoltageClampSeries'>,
    index_0010 <class 'pynwb.icephys.VoltageClampSeries'>,
    index_0011 <class 'pynwb.icephys.VoltageClampSeries'>,
    index_0012 <class 'pynwb.icephys.VoltageClampSeries'>,
    index_0013 <class 'pynwb.icephys.VoltageClampSeries'>,
    index_0014 <class 'pynwb.icephys.VoltageClampSeries'>,
    index_0015 <class 'pynwb.icephys.VoltageC