In [1]:
import sys

In [7]:
sys.path.insert(0,'../')
from unpack_ek60 import load_ek60_raw

In [8]:
import matplotlib.pyplot as plt
import h5py

In [15]:
import numpy as np

In [9]:
input_file_path = '../OOI-D20150901-T000000.raw'

In [12]:
%%time
first_ping_metadata, data_times, power_data_dict, frequencies, bin_size, \
    config_header, config_transducer = load_ek60_raw(input_file_path)

09:24:41  unpacking file: ../OOI-D20150901-T000000.raw
CPU times: user 9.6 s, sys: 3.64 s, total: 13.2 s
Wall time: 14 s


In [56]:
def save_metadata(val,group_info,data_name,fh):
    '''
    Check data type and save to hdf5

    val          data to be saved
    group_info   a string (group name, e.g., header) or
                 a list (group name and sequence number, e.g., [tranducer, 1]).
    data_name    name of data set under group
    fh           handle of the file to be saved to
    '''
    if type(group_info)==str:  # no sequence in group_info
        # when data is a string
        if type(val)==str or type(val)==bytes:
            fh.create_dataset('%s/%s' % (group_info,data_name), (1,), data=val, dtype=h5py.special_dtype(vlen=str))
        # when data is only 1 int or float object
        elif type(val)==int or type(val)==float:
            fh.create_dataset('%s/%s' % (group_info,data_name), (1,), data=val)
        else:  # when data is numerical
            fh.create_dataset('%s/%s' % (group_info,data_name), data=val)

    elif type(group_info)==list and len(group_info)==2:  # have sequence in group_info
        # when a string
        if type(config_transducer[tx][m])==str:
            fh.create_dataset('%s%02d/%s' % (group_info[0],group_info[1],data_name),\
                              (1,), data=val, dtype=h5py.special_dtype(vlen=str))
        # when only 1 int or float object
        elif type(config_transducer[tx][m])==int or type(config_transducer[tx][m])==float:
            fh.create_dataset('%s%02d/%s' % (group_info[0],group_info[1],data_name), (1,), data=val)
        else:  # when data is numerical
            fh.create_dataset('%s%02d/%s' % (group_info[0],group_info[1],data_name), data=val)

In [16]:
# Check if input dimension makes sense, if not abort
sz_power_data = np.empty(shape=(len(frequencies),2),dtype=int)
for cnt,f in zip(range(len(frequencies)),frequencies.keys()):
    f_str = str(frequencies[f])
    sz_power_data[cnt,:] = power_data_dict[f_str].shape
if np.unique(sz_power_data).shape[0]!=2:
    print('Raw file has mismatched number of pings across channels')


In [18]:
h5_file_path = '../OOI-D20150901-T000000.h5'
h5_file = h5py.File(h5_file_path,'x')  # create file, fail if exists

In [20]:
h5_file

<HDF5 file "OOI-D20150901-T000000.h5" (mode r+)>

In [21]:
# -- ping time: resizable
h5_file.create_dataset('ping_time', (sz_power_data[0,1],), \
                maxshape=(None,), data=data_times, chunks=True)

# -- power data: resizable
for f in frequencies.values():
    h5_file.create_dataset('power_data/%s' % str(f), sz_power_data[0,:], \
                maxshape=(sz_power_data[0,0],None), data=power_data_dict[str(f)], chunks=True)

In [None]:
h5_file.create_dataset('metadata/bin_size', data=bin_size)

In [41]:
dt = h5py.special_dtype(vlen=str)

In [26]:
for m,mval in first_ping_metadata.items():
    print(m,mval)

zplsc_timestamp 20150901000000
zplsc_channel [1, 2, 3]
zplsc_transducer_depth [0.0, 0.0, 0.0]
zplsc_frequency [120000.0, 38000.0, 200000.0]
zplsc_transmit_power [250.0, 500.0, 150.0]
zplsc_pulse_length [0.001024, 0.001024, 0.001024]
zplsc_bandwidth [3026.3916, 2425.1497, 3088.4004]
zplsc_sample_interval [0.000256, 0.000256, 0.000256]
zplsc_sound_velocity [1493.8888, 1493.8888, 1493.8888]
zplsc_absorption_coeff [0.03744031, 0.009785269, 0.05268759]
zplsc_temperature [10.0, 10.0, 10.0]


In [27]:
for m,mval in first_ping_metadata.items():
    save_metadata(mval,'metadata',m,h5_file)

In [28]:
for i in h5_file.items():
    print(i)

('metadata', <HDF5 group "/metadata" (12 members)>)
('ping_time', <HDF5 dataset "ping_time": shape (28717,), type "<f8">)
('power_data', <HDF5 group "/power_data" (3 members)>)


In [44]:
h5_file['metadata/zplsc_timestamp'][0]

'20150901000000'

In [57]:
    # -- header: fixed sized
    for m,mval in config_header.items():
        save_metadata(mval,'header4',m,h5_file)

In [58]:
for i in h5_file.items():
    print(i)

('header', <HDF5 group "/header" (5 members)>)
('header2', <HDF5 group "/header2" (5 members)>)
('header3', <HDF5 group "/header3" (5 members)>)
('header4', <HDF5 group "/header4" (5 members)>)
('metadata', <HDF5 group "/metadata" (12 members)>)
('ping_time', <HDF5 dataset "ping_time": shape (28717,), type "<f8">)
('power_data', <HDF5 group "/power_data" (3 members)>)


In [33]:
for m,mval in config_header.items():
    print(m,mval)

survey_name b'OOI'
transect_name b''
sounder_name b'ER60'
version b'2.4.3'
transducer_count 3


In [46]:
type(config_header['survey_name'])

bytes

In [51]:
h5_file['header2/survey_name'][0]

b'OOI'

In [55]:
h5_file['header3/survey_name'][0]

'OOI'

In [64]:
h5_file['header4/transducer_count'][0]

3

In [None]:
for tx in range(len(config_transducer)):
    for m,mval in config_transducer[tx].items():
        save_metadata(mval,['transducer',tx],m,h5_file)

        # when a string
        if type(config_transducer[tx][m])==str:
            h5_file.create_dataset('transducer%02d/%s' % (tx,m), (1,), data=mval, dtype=h5py.special_dtype(vlen=str))
        # when only 1 int or float object
        elif type(config_transducer[tx][m])==int or type(config_transducer[tx][m])==float:
            h5_file.create_dataset('transducer%02d/%s' % (tx,m), (1,), data=mval)
        else:
            h5_file.create_dataset('transducer%02d/%s' % (tx,m), data=mval)