## Whole brain calcium imaging data from C. elegans, Kato et al. 2015

Kato, S., Kaplan, H. S., Schrödel, T., Skora, S., Lindsay, T. H., Yemini, E., et al. (2015). Global Brain Dynamics Embed the Motor Command Sequence of Caenorhabditis elegans. Cell, 163(3), 656–669. http://doi.org/10.1016/j.cell.2015.09.034

In [1]:
!ls
from datetime import datetime
from dateutil.tz import tzlocal

import matplotlib.pyplot as plt
%matplotlib inline 
import pynwb
import math
from hdmf.backends.hdf5.h5_utils import H5DataIO

import h5py
import numpy as np
import scipy.io as sio

generate_all = True

# Data originally obtained from: https://osf.io/y8a6e/ 
all_datarefs = ['WT_NoStim']   
if generate_all:
    all_datarefs += ['AVA_HisCl']
    all_datarefs += ['WT_Stim']

for data_ref in all_datarefs:
    
    mat_file = '%s.mat'%data_ref
    h5_file = h5py.File(mat_file, 'r')
    mat_contents = h5_file[data_ref]

    print('Contents of MAT file %s: %s'%(mat_file, sorted(mat_contents.keys())))
    
    '''
    From readme:
    
    traces_raw= neural activity traces uncorrected
    traces = neural activity traces corrected for bleaching
    tracesDif = derivative of traces
    IDs = identified neuron IDs
    timeVectorSeconds = time vector in seconds
    fps = frames per second
    dataset = name of dataset
    stimulus
        -identity = what was changed e.g. O2 (oxygen)
        -type = stimulus type e.g. binary steps
        -switchtimes =  time in seconds when stimulus changed from initial state to the other state
        -initialstate = the state that the stimulus starts with, refers to "conc"
        -conc = the concentrations of the stimulus
        -concunits - units of the "conc"

    States = vector of different state types (8 states for WT_NoStim, 4 states for WT_Stim and AVA_HisCl

    8 states for WT_NoStim:
        ‘FWD’ forward crawling; ‘SLOW’ forward slowing; ‘DT’ dorsal post reversal turn
        ‘VT’ ventral post reversal turn; ‘REV1’ reverse crawling;
        ‘REV2’ reverse crawling; ‘REVSUS’ sustained reverse crawling; ‘NOSTATE’ - ambiguous

    4 states for WT_Stim and AVA_HisCl:
        ‘FWD’ forward crawling; ‘REV’ reverse crawling
        ‘REVSUS’ sustained reverse crawling; ‘TURN’ post reversal turn
    '''
    
    datasets = [0]
    if generate_all:
        datasets = range(7) if data_ref == 'WT_Stim' else range(5)
    
    for dataset_index in datasets:
        
        start_time = datetime.now(tz=tzlocal())
        create_date = datetime.now(tz=tzlocal())

        experimenter = 'Zimmer lab members'

        print('\n========  loading dataset id %i'%dataset_index)

        raw_h5ref = np.array(mat_contents['dataset'])
        raw = h5_file[raw_h5ref[dataset_index][0]]
        #print(raw)
        st = [r[0] for r in raw]
        dataset_id = ''.join(map(chr,st))
        
        main_ref = 'Kato et al. 2015 dataset: %s taken from file %s.mat'%(dataset_id,data_ref)
        nwbfile = pynwb.NWBFile(main_ref, 
                      dataset_id, 
                      start_time,
                      file_create_date=create_date,
                      notes='NWB file created with pynwb v%s'%pynwb.__version__,
                      experimenter=experimenter,
                      experiment_description='Whole-Brain Ca2+ Imaging of C. elegans',
                      institution='Research Institute of Molecular Pathology (IMP), Vienna',
                      related_publications='Kato, S., Kaplan, H. S., Schrödel, T., Skora, S., Lindsay, T. H., Yemini, E., et al. (2015). Global Brain Dynamics Embed the Motor Command Sequence of Caenorhabditis elegans. Cell, 163(3), 656–669. http://doi.org/10.1016/j.cell.2015.09.034',
                      lab='Zimmer lab, https://www.imp.ac.at/groups/manuel-zimmer')
        
        print('\n========  loading fps %i'%dataset_index)
        fps_raw_h5ref = np.array(mat_contents['fps'])
        fps_raw = h5_file[fps_raw_h5ref[dataset_index][0]]
        #print(fps_raw)
        print(fps_raw[0][0])
        
        
        print('\n========  loading IDs %i'%dataset_index)

        raw_h5ref = np.array(mat_contents['IDs'])
        print('len: %s'%len(raw_h5ref[dataset_index]))
        raw = h5_file[raw_h5ref[dataset_index][0]]
        print(raw)
        ID_info = {}
        for ri in range(len(raw)):
            r = raw[ri]
            #print('ID %i: %s, len %i'%(ri, r, len(r)))
            ref = h5_file[r[0]]
            #print('  > %s (%s), list? %s'%(ref[0], type(ref[0]), len(ref)))
            ID_info[ri] = ''
            
            for ref_entry in ref:
                if ref_entry != 0:
                    ii = h5_file[ref_entry[0]]
                    #print('    > %s: %s, shape: %s'%(ii, ii.value, ii.shape))
                   
                    if len(ii.shape)==2:
                        st = [r[0] for r in ii]
                        s2 = ''.join(map(chr,st))
                        #print(s2)
                        if len(ID_info[ri])>0: ID_info[ri]+=', '
                        ID_info[ri] += '%s'%s2  # TODO: fix for case more than one cell assigned to this recording location!!
                    else:
                        if len(ID_info[ri])>0: ID_info[ri]+=', '
                        ID_info[ri] += 'Unknown Cell'
                else:
                    ID_info[ri] = 'Unknown Cell'
                    
            #print('  Finally: %s'%ID_info[ri])
        print(ID_info)

        print('\n========  loading timeVectorSeconds %i'%dataset_index)

        if 'timeVectorSeconds' in  mat_contents:
            raw_h5ref = np.array(mat_contents['timeVectorSeconds'])
        else:
            raw_h5ref = np.array(mat_contents['timVectorSeconds'])
        raw = h5_file[raw_h5ref[dataset_index][0]]
        
        print('Data points %s: %s'%(raw.shape, raw[0]))
        timestamps = raw[0]
        
        trace_types = {'traces_raw':'Neural activity traces uncorrected',
                       'traces': 'Neural activity traces corrected for bleaching',
                       'tracesDif': 'Derivative of traces'}
        
        for trace_type in trace_types:
            print('\n========  loading %s %i'%(trace_type,dataset_index))

            raw_h5ref = np.array(mat_contents[trace_type])
            raw = h5_file[raw_h5ref[dataset_index][0]]
            #print(raw)
            #print(raw.value)
            #print(raw.value[0])

            for i in ID_info:
                data = raw.value[i]
                id = ID_info[i]
                comments='Extracted from MAT file: %s.mat'%data_ref
                wrapped_data = H5DataIO(data=data, compression=True) 
                ref = '%s__%s__%s'%(trace_type, i, id)
                desc = '%s; ID: %s; cell: %s'%(trace_types[trace_type],i,id)
                #print('Adding: %s'%desc)
                ts_acq = pynwb.TimeSeries(ref, wrapped_data, 'none', timestamps=timestamps,comments=comments,
                                         description=desc)
                nwbfile.add_acquisition(ts_acq)
        
        
        print('\n========  loading States %i'%dataset_index)

        raw_h5ref = np.array(mat_contents['States'])
        raw = h5_file[raw_h5ref[dataset_index][0]]
        states = np.array([s[0] for s in raw])
        print('Data points %s: %s'%(len(states), states))
        
        data = states

        comments='Extracted from MAT file: %s.mat'%data_ref
        wrapped_data = H5DataIO(data=data, compression=True) 
        
        if data_ref=='WT_NoStim':
            #8 states for WT_NoStim: (taken from original README)
            state_info = "‘FWD’ forward crawling; ‘SLOW’ forward slowing; ‘DT’ dorsal post reversal turn"+\
                         "; ‘VT’ ventral post reversal turn; ‘REV1’ reverse crawling;"+\
                         "; ‘REV2’ reverse crawling; ‘REVSUS’ sustained reverse crawling; ‘NOSTATE’ - ambiguous"

        if data_ref=='WT_Stim' or data_ref=='AVA_HisCl':
            #4 states for WT_Stim and AVA_HisCl: (taken from original README)
            state_info = "‘FWD’ forward crawling; ‘REV’ reverse crawling"+\
                         "; ‘REVSUS’ sustained reverse crawling; ‘TURN’ post reversal turn"

        ts_acq = pynwb.TimeSeries('States', wrapped_data, 'state', timestamps=timestamps,comments=comments,
                                 description='Vector of different state types: %s'%state_info)
        nwbfile.add_acquisition(ts_acq)


        nwb_file_name = 'KatoEtAl2018.%s.%i.nwb'%(data_ref, dataset_index)
        io = pynwb.NWBHDF5IO(nwb_file_name, mode='w')
        io.write(nwbfile)
        io.close()
        print("Written NWB file to %s"%nwb_file_name)
    
    '''
    plt.figure()
    id = '???'
    for i in range(15,25,1):
        id = h5_file[IDs[i][0]].value[0]
        print('ID: %s (%s)'%(id, type(id))) 
        if not type(id)==np.uint64:
            id = [int(a) for a in h5_file[id[0]].value]
            id = ''.join(chr(i) for i in id)
        else:
            id = '???'
        print('Plotting cell %i: %s'%(i,id))
        plt.plot(timeVectorSeconds.value[0],traces_raw.value[i], lw=.5, label='%i: %s'%(i,id))
        plt.legend()

    plt.show()    '''


AVA_HisCl.mat                KatoEtAl2018.WT_Stim.3.nwb
KatoEtAl2018.AVA_HisCl.0.nwb KatoEtAl2018.WT_Stim.4.nwb
KatoEtAl2018.AVA_HisCl.1.nwb KatoEtAl2018.WT_Stim.5.nwb
KatoEtAl2018.AVA_HisCl.2.nwb KatoEtAl2018.WT_Stim.6.nwb
KatoEtAl2018.AVA_HisCl.3.nwb PCA_Test.ipynb
KatoEtAl2018.AVA_HisCl.4.nwb Snippets-Copy1.ipynb
KatoEtAl2018.WT_NoStim.0.nwb Snippets.ipynb
KatoEtAl2018.WT_NoStim.1.nwb TestData.ipynb
KatoEtAl2018.WT_NoStim.2.nwb WT_NoStim.mat
KatoEtAl2018.WT_NoStim.3.nwb WT_NoStim_pre73.mat
KatoEtAl2018.WT_NoStim.4.nwb WT_Stim.mat
KatoEtAl2018.WT_Stim.0.nwb   readme_Kato2015.txt
KatoEtAl2018.WT_Stim.1.nwb   snip.py
KatoEtAl2018.WT_Stim.2.nwb   split.dat
Contents of MAT file WT_NoStim.mat: ['IDs', 'States', 'States_key', 'dataset', 'fps', 'timVectorSeconds', 'traces', 'tracesDif', 'traces_raw']


2.9046296296296297

len: 1
<HDF5 dataset "q": shape (109, 1), type "|O">
{0: 'Unknown Cell', 1: 'Unknown Cell', 2: 'Unknown Cell', 3: 'Unknown Cell', 4: 'Unknown Cell', 5: 'Unknown Cell', 6: 






Data points 3137: [2. 2. 2. ... 1. 1. 2.]


  % (g.name, s.name))


Written NWB file to KatoEtAl2018.WT_NoStim.0.nwb


2.901851851851852

len: 1
<HDF5 dataset "Ib": shape (107, 1), type "|O">
{0: 'Unknown Cell', 1: 'Unknown Cell', 2: 'Unknown Cell', 3: 'Unknown Cell', 4: 'Unknown Cell', 5: 'Unknown Cell', 6: 'Unknown Cell', 7: 'Unknown Cell', 8: 'Unknown Cell', 9: 'Unknown Cell', 10: 'Unknown Cell', 11: 'Unknown Cell', 12: 'Unknown Cell', 13: 'Unknown Cell', 14: 'Unknown Cell', 15: 'Unknown Cell', 16: 'Unknown Cell', 17: 'Unknown Cell', 18: 'Unknown Cell', 19: 'OLQVL', 20: 'OLQDL', 21: 'OLQVR', 22: 'Unknown Cell', 23: 'Unknown Cell', 24: 'OLQDR', 25: 'Unknown Cell', 26: 'RMED', 27: 'RMER', 28: 'RMEL', 29: 'Unknown Cell', 30: 'Unknown Cell', 31: 'Unknown Cell', 32: 'RID', 33: 'Unknown Cell', 34: 'Unknown Cell', 35: 'SMDVR, Unknown Cell', 36: 'Unknown Cell', 37: 'SMDVL', 38: 'Unknown Cell', 39: 'ALA', 40: 'AVAR', 41: 'AVAL', 42: 'Unknown Cell', 43: 'ASKR', 44: 'RMEV', 45: 'Unknown Cell', 46: 'AVER', 47: 'Unknown Cell', 48: 'AVEL', 49: 'SIBDR', 50: 'ASKL'




Data points 3021: [2. 2. 8. ... 7. 7. 7.]
Written NWB file to KatoEtAl2018.WT_NoStim.4.nwb
Contents of MAT file AVA_HisCl.mat: ['IDs', 'States', 'States_key', 'dataset', 'fps', 'timeVectorSeconds', 'traces', 'tracesDif', 'traces_raw']


3.0546296296296296

len: 1
<HDF5 dataset "q": shape (134, 1), type "|O">
{0: 'Unknown Cell', 1: 'Unknown Cell', 2: 'Unknown Cell', 3: 'Unknown Cell', 4: 'Unknown Cell', 5: 'Unknown Cell', 6: 'Unknown Cell', 7: 'Unknown Cell', 8: 'Unknown Cell', 9: 'Unknown Cell', 10: 'Unknown Cell', 11: 'Unknown Cell', 12: 'Unknown Cell', 13: 'OLQDL', 14: 'Unknown Cell', 15: 'URAVL', 16: 'Unknown Cell', 17: 'OLQVL', 18: 'URADR', 19: 'Unknown Cell', 20: 'Unknown Cell', 21: 'Unknown Cell', 22: 'Unknown Cell', 23: 'URADL', 24: 'Unknown Cell', 25: 'Unknown Cell', 26: 'Unknown Cell', 27: 'RMED', 28: 'OLQDR', 29: 'RMEL', 30: 'OLQVR', 31: 'Unknown Cell', 32: 'RMER', 33: 'Unknown Cell', 34: 'Unknown Cell', 35: 'Unknown Cell', 36: 'Unknown Cell', 37: 'Unknown Cell', 38: 'SMDV




Data points 3425: [3. 3. 3. ... 1. 1. 1.]
Written NWB file to KatoEtAl2018.AVA_HisCl.2.nwb


2.8055555555555554

len: 1
<HDF5 dataset "3d": shape (138, 1), type "|O">
{0: 'Unknown Cell', 1: 'Unknown Cell', 2: 'Unknown Cell', 3: 'Unknown Cell', 4: 'Unknown Cell', 5: 'Unknown Cell', 6: 'Unknown Cell', 7: 'Unknown Cell', 8: 'Unknown Cell', 9: 'Unknown Cell', 10: 'Unknown Cell', 11: 'Unknown Cell', 12: 'Unknown Cell', 13: 'Unknown Cell', 14: 'URYVR', 15: 'Unknown Cell', 16: 'Unknown Cell', 17: 'Unknown Cell', 18: 'URYVL', 19: 'Unknown Cell', 20: 'Unknown Cell', 21: 'URYDL', 22: 'OLQDL', 23: 'OLQDR, URYDR', 24: 'OLQVR', 25: 'OLQVL', 26: 'Unknown Cell', 27: 'Unknown Cell', 28: 'AVAR', 29: 'RMED', 30: 'RMEL', 31: 'Unknown Cell', 32: 'RMER', 33: 'Unknown Cell', 34: 'Unknown Cell', 35: 'Unknown Cell', 36: 'Unknown Cell', 37: 'Unknown Cell', 38: 'Unknown Cell', 39: 'Unknown Cell', 40: 'Unknown Cell', 41: 'Unknown Cell', 42: 'Unknown Cell', 43: 'RID', 44: 'Unknown Cell', 45: 'SMDVR', 46: 'Unkn




Data points 2198: [4. 4. 4. ... 1. 1. 1.]
Written NWB file to KatoEtAl2018.WT_Stim.0.nwb


2.801388888888889

len: 1
<HDF5 dataset "fc": shape (122, 1), type "|O">
{0: 'Unknown Cell', 1: 'Unknown Cell', 2: 'Unknown Cell', 3: 'Unknown Cell', 4: 'Unknown Cell', 5: 'Unknown Cell', 6: 'Unknown Cell', 7: 'Unknown Cell', 8: 'Unknown Cell', 9: 'Unknown Cell', 10: 'Unknown Cell', 11: 'Unknown Cell', 12: 'Unknown Cell', 13: 'Unknown Cell', 14: 'Unknown Cell', 15: 'Unknown Cell', 16: 'Unknown Cell', 17: 'Unknown Cell', 18: 'Unknown Cell', 19: 'Unknown Cell', 20: 'OLQVL, URYVL', 21: 'Unknown Cell', 22: 'Unknown Cell', 23: 'Unknown Cell', 24: 'Unknown Cell', 25: 'OLQVR, URYVR', 26: 'IL2L', 27: 'Unknown Cell', 28: 'Unknown Cell', 29: 'OLQDR, URYDR', 30: 'Unknown Cell', 31: 'BAGL', 32: 'Unknown Cell', 33: 'Unknown Cell', 34: 'Unknown Cell', 35: 'BAGR', 36: 'RMER', 37: 'RMEL', 38: 'RMED', 39: 'Unknown Cell', 40: 'Unknown Cell', 41: 'RID', 42: 'AVAR', 43: 'Unknown Cell', 44: 'SMDVR', 45: 'Unknown C




Data points 2018: [2. 2. 2. ... 1. 1. 1.]
Written NWB file to KatoEtAl2018.WT_Stim.3.nwb


3.0569444444444445

len: 1
<HDF5 dataset "7g": shape (123, 1), type "|O">
{0: 'Unknown Cell', 1: 'Unknown Cell', 2: 'Unknown Cell', 3: 'Unknown Cell', 4: 'Unknown Cell', 5: 'Unknown Cell', 6: 'Unknown Cell', 7: 'Unknown Cell', 8: 'Unknown Cell', 9: 'Unknown Cell', 10: 'Unknown Cell', 11: 'Unknown Cell', 12: 'Unknown Cell', 13: 'Unknown Cell', 14: 'Unknown Cell', 15: 'Unknown Cell', 16: 'Unknown Cell', 17: 'URYDR', 18: 'URYVL', 19: 'URYVR', 20: 'URADL', 21: 'URADR', 22: 'URYDL', 23: 'Unknown Cell', 24: 'Unknown Cell', 25: 'Unknown Cell', 26: 'BAGL', 27: 'OLQVR', 28: 'Unknown Cell', 29: 'Unknown Cell', 30: 'OLQDL, URYDL', 31: 'OLQVL, URYVL', 32: 'OLQDR, URYDR', 33: 'BAGR', 34: 'RMER', 35: 'RMEL', 36: 'RMED', 37: 'Unknown Cell', 38: 'RMEV', 39: 'Unknown Cell', 40: 'Unknown Cell', 41: 'RID', 42: 'Unknown Cell', 43: 'AVAL', 44: 'Unknown Cell', 45: '---', 46: 'RMDVL', 47: '---', 48: '---, ---', 49: 




Data points 2019: [3. 3. 3. ... 4. 4. 4.]
Written NWB file to KatoEtAl2018.WT_Stim.6.nwb
