In [1]:
import sys

In [2]:
sys.path.insert(0,'../')
from unpack_ek60 import raw2hdf5_initiate, load_ek60_raw, save_metadata, check_metadata

  from ._conv import register_converters as _register_converters


In [3]:
import matplotlib.pyplot as plt
import h5py

In [4]:
import numpy as np

In [5]:
input_file_path1 = '../OOI-D20150901-T000000.raw'
input_file_path2 = '../OOI-D20150902-T000000.raw'

In [38]:
h5_file_path = '../test_concat5.h5'

In [39]:
%%time
raw2hdf5_initiate(input_file_path1,h5_file_path)

12:44:33  unpacking file: ../OOI-D20150901-T000000.raw
CPU times: user 11.3 s, sys: 4.38 s, total: 15.7 s
Wall time: 18.3 s


In [40]:
%%time
first_ping_metadata, data_times, power_data_dict, frequencies, bin_size, \
    config_header, config_transducer = load_ek60_raw(input_file_path2)

12:44:51  unpacking file: ../OOI-D20150902-T000000.raw
CPU times: user 9.22 s, sys: 2.98 s, total: 12.2 s
Wall time: 12.3 s


In [42]:
first_ping_metadata['bin_size'] = bin_size

In [43]:
def raw2hdf5_concat(raw_file_path,h5_file_path):
    '''
    Unpack EK60 .raw files and concatenate to an existing hdf5 files
    INPUT:
        fname      file to be unpacked
        h5_fname   hdf5 file to be concatenated to
    '''
    # Unpack raw into memory
    first_ping_metadata, data_times, power_data_dict, frequencies, bin_size, \
        config_header, config_transducer = load_ek60_raw(raw_file_path)

    # Check if input dimension makes sense, if not abort
    sz_power_data = np.empty(shape=(len(frequencies),2),dtype=int)
    for cnt,f in zip(range(len(frequencies)),frequencies.keys()):
        f_str = str(frequencies[f])
        sz_power_data[cnt,:] = power_data_dict[f_str].shape
    if np.unique(sz_power_data).shape[0]!=2:
        print('Raw file has mismatched number of pings across channels')
        # break

    # Open existing files
    fh = h5py.File(h5_file_path, 'r+')

    # Check if all metadata field matches, if not, print info and abort
    flag = check_metadata('header',config_header,fh) and \
           check_metadata('metadata',first_ping_metadata,fh) and \
           check_metadata('transducer00',config_transducer[0],fh) and \
           check_metadata('transducer01',config_transducer[1],fh) and \
           check_metadata('transducer02',config_transducer[2],fh)

    # Concatenating newly unpacked data into HDF5 file
    for f in fh['power_data'].keys():
        sz_exist = fh['power_data/'+f].shape  # shape of existing power_data mtx
        fh['power_data/'+f].resize((sz_exist[0],sz_exist[1]+sz_power_data[0,1]))
        fh['power_data/'+f][:,sz_exist[1]:] = power_data_dict[str(f)]
    fh['ping_time'].resize((sz_exist[1]+sz_power_data[0,1],))
    fh['ping_time'][sz_exist[1]:] = data_times

    # Close file
    fh.close()

In [46]:
fh=h5py.File(h5_file_path, 'r+')

In [48]:
fh['power_data/120000.0']

<HDF5 dataset "120000.0": shape (1046, 57444), type "<f8">

In [44]:
raw2hdf5_concat(input_file_path2,h5_file_path)

12:45:51  unpacking file: ../OOI-D20150902-T000000.raw


In [28]:
for p in fh.items():
    print(p)

('header', <HDF5 group "/header" (5 members)>)
('metadata', <HDF5 group "/metadata" (12 members)>)
('ping_time', <HDF5 dataset "ping_time": shape (28717,), type "<f8">)
('power_data', <HDF5 group "/power_data" (3 members)>)
('transducer00', <HDF5 group "/transducer00" (21 members)>)
('transducer01', <HDF5 group "/transducer01" (21 members)>)
('transducer02', <HDF5 group "/transducer02" (21 members)>)


In [17]:
flag = check_metadata('header',config_header,fh) and \
       check_metadata('metadata',first_ping_metadata,fh) and \
       check_metadata('transducer00',config_transducer[0],fh) and \
       check_metadata('transducer01',config_transducer[1],fh) and \
       check_metadata('transducer02',config_transducer[2],fh)


In [20]:
# Check if input dimension makes sense, if not abort
sz_power_data = np.empty(shape=(len(frequencies),2),dtype=int)
for cnt,f in zip(range(len(frequencies)),frequencies.keys()):
    f_str = str(frequencies[f])
    sz_power_data[cnt,:] = power_data_dict[f_str].shape
if np.unique(sz_power_data).shape[0]!=2:
    print('Raw file has mismatched number of pings across channels')

In [21]:
sz_power_data

array([[ 1046, 28727],
       [ 1046, 28727],
       [ 1046, 28727]])

In [35]:
# Concatenating newly unpacked data into HDF5 file
for f in fh['power_data'].keys():
    sz_exist = fh['power_data/'+f].shape  # shape of existing power_data mtx
    fh['power_data/'+f].resize((sz_exist[0],sz_exist[1]+sz_power_data[0,1]))
    fh['power_data/'+f][:,sz_exist[1]:] = power_data_dict[str(f)]
fh['ping_time'].resize((sz_exist[1]+sz_power_data[0,1],))
fh['ping_time'][sz_exist[1]:] = data_times


In [36]:
fh['power_data/120000.0']

<HDF5 dataset "120000.0": shape (1046, 114898), type "<f8">

In [26]:
for f in fh['power_data'].keys():
    sz_exist = fh['power_data/'+f].shape  # shape of existing Sv mtx
    print(sz_exist)
    print(f)
    print(power_data_dict[str(f)].shape)

(1046, 28717)
120000.0
(1046, 28727)
(1046, 28717)
200000.0
(1046, 28727)
(1046, 28717)
38000.0
(1046, 28727)


In [27]:
power_data_dict[str(f)]

array([[  -8.39591472,   11.6884303 ,   11.70018928, ...,   11.70018928,
          11.67667132,   11.70018928],
       [  -1.31700623,   19.56694972,   19.5787087 , ...,   19.56694972,
          19.55519073,   19.56694972],
       [  -1.59922185,   19.42584191,   19.43760089, ...,   19.43760089,
          19.43760089,   19.43760089],
       ...,
       [-104.31394889,  -85.07625073,  -93.31929866, ...,  -91.50841509,
        -103.97293835,  -95.98858807],
       [-106.61870979,  -91.4261022 ,  -86.69899055, ...,  -94.23649942,
        -104.87838013,  -92.6137596 ],
       [-114.83823975,  -96.82347595,  -85.92289759, ...,  -94.37760723,
        -100.33941223,  -91.67304087]])

In [None]:
for p in power_data_dict.keys():
    print(power_data_dict[p].shape)

In [None]:
sz_power_data

In [None]:
for p in first_ping_metadata.items():
    print(p)

In [None]:
for p in hf.values():
    print(p)

In [None]:
for p in hf.keys():
    print(p)

In [None]:
for p in hf['header'].keys():
    print(p)

In [None]:
hf['header']['sounder_name'][0]

In [None]:
type(config_header['sounder_name'])

In [None]:
str(config_header['sounder_name'], 'utf-8') == hf['header']['sounder_name'][0]

In [None]:
hf['header']['transducer_count'][0]

In [None]:
for p in hf['header'].keys():
    print(p)
    if type(hf['header'][p][0])==str or type(hf['header'][p][0])==bytes:
        print(str(config_header[p], 'utf-8') == hf['header'][p][0])
    elif type(hf['header'][p][0])==int or type(hf['header'][p][0])==float:
        print(config_header[p] == hf['header'][p][0])

In [None]:
for p in hf['metadata'].keys():
    print(p)

In [None]:
for p in hf['header'].keys():
    print(p)
    if type(hf['header'][p][0])==str or type(hf['header'][p][0])==bytes:
        print(str(config_header[p], 'utf-8') == hf['header'][p][0])
    elif type(hf['header'][p][0])==int or type(hf['header'][p][0])==float:
        print(config_header[p] == hf['header'][p][0])

In [None]:
group_name = 'header'
dict_name = config_header
for p in hf[group_name].keys():
    print(p)
    if isinstance(hf[group_name][p][0],(str,bytes)):
        if type(dict_name[p])==bytes:
            print(str(dict_name[p], 'utf-8') == hf[group_name][p][0])
        else:
            print(dict_name[p] == hf[group_name][p][0])
    elif isinstance(hf[group_name][p][0],(np.generic,np.ndarray,int,float)):
        print(any(dict_name[p]==hf[group_name][p][:]))

In [None]:
group_name = 'metadata'
dict_name = first_ping_metadata
for p in hf[group_name].keys():
    print(p)
    if isinstance(hf[group_name][p][0],(str,bytes)):
        if type(dict_name[p])==bytes:
            print(str(dict_name[p], 'utf-8') == hf[group_name][p][0])
        else:
            print(dict_name[p] == hf[group_name][p][0])
    elif isinstance(hf[group_name][p][0],(np.generic,np.ndarray,int,float)):
        print(any(dict_name[p]==hf[group_name][p][:]))

In [None]:
hf['transducer00']['channel_id']

In [None]:
type(config_transducer[0]['channel_id'])==bytes

In [None]:
group_name = 'transducer00'
dict_name = config_transducer[0]
for p in hf[group_name].keys():
    print(p)
    if isinstance(hf[group_name][p][0],(str,bytes)):
        if type(dict_name[p])==bytes:
            print(str(dict_name[p], 'utf-8') == hf[group_name][p][0])
        else:
            print(dict_name[p] == hf[group_name][p][0])
    elif isinstance(hf[group_name][p][0],(np.generic,np.ndarray,int,float)):
        print(any(dict_name[p]==hf[group_name][p][:]))

In [None]:
config_transducer[0]

In [None]:
any(first_ping_metadata['bin_size']==hf['metadata']['bin_size'][:])

In [None]:
hf['metadata']['bin_size'][0]

In [None]:
type(hf['metadata'][p][0])

In [None]:
isinstance(hf['metadata'][p][0],(np.generic,np.ndarray,int,float))

In [None]:
isinstance(hf['header']['transducer_count'][0],(np.generic,np.ndarray,int,float))