Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor XML parser in parse_azfp.py to maintain consistency with other parsers #1135

Merged
merged 12 commits into from
Sep 18, 2023
106 changes: 44 additions & 62 deletions echopype/convert/parse_azfp.py
praneethratna marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import xml.dom.minidom
import re
import xml.etree.ElementTree as ET
from collections import defaultdict
from datetime import datetime as dt
from struct import unpack
Expand All @@ -11,45 +12,7 @@
from .parse_base import ParseBase

FILENAME_DATETIME_AZFP = "\\w+.01A"
XML_INT_PARAMS = {
"NumFreq": "num_freq",
"SerialNumber": "serial_number",
"BurstInterval": "burst_interval",
"PingsPerBurst": "pings_per_burst",
"AverageBurstPings": "average_burst_pings",
"SensorsFlag": "sensors_flag",
}
XML_FLOAT_PARAMS = [
# Temperature coeffs
"ka",
"kb",
"kc",
"A",
"B",
"C",
# Tilt coeffs
"X_a",
"X_b",
"X_c",
"X_d",
"Y_a",
"Y_b",
"Y_c",
"Y_d",
]
XML_FREQ_PARAMS = {
"RangeSamples": "range_samples",
"RangeAveragingSamples": "range_averaging_samples",
"DigRate": "dig_rate",
"LockOutIndex": "lockout_index",
"Gain": "gain",
"PulseLen": "pulse_length",
"DS": "DS",
"EL": "EL",
"TVR": "TVR",
"VTX0": "VTX",
"BP": "BP",
}

HEADER_FIELDS = (
("profile_flag", "u2"),
("profile_number", "u2"),
Expand Down Expand Up @@ -118,34 +81,54 @@ def __init__(self, file, params, storage_options={}, dgram_zarr_vars={}):
self.xml_path = params

# Class attributes
self.parameters = dict()
self.parameters = defaultdict(list)
self.unpacked_data = defaultdict(list)
self.sonar_type = "AZFP"

def load_AZFP_xml(self):
"""Parse XML file to get params for reading AZFP data."""
"""Parses the AZFP XML file.
def _camel_to_snake(self, tag):
"""
Convert CamelCase to snake_case
"""
words = re.findall("[A-Z]+[a-z]*", tag)
words_lower = [word.lower() for word in words]
if len(words) > 1:
return "_".join(words_lower)
elif len(words) == 1:
return words_lower[0]
else:
return tag

def get_value_by_tag_name(tag_name, element=0):
"""Returns the value in an XML tag given the tag name and the number of occurrences."""
return px.getElementsByTagName(tag_name)[element].childNodes[0].data
def load_AZFP_xml(self):
"""
Parses the AZFP XML file.
"""

xmlmap = fsspec.get_mapper(self.xml_path, **self.storage_options)
px = xml.dom.minidom.parse(xmlmap.fs.open(xmlmap.root))

# Retrieve integer parameters from the xml file
for old_name, new_name in XML_INT_PARAMS.items():
self.parameters[new_name] = int(get_value_by_tag_name(old_name))
# Retrieve floating point parameters from the xml file
for param in XML_FLOAT_PARAMS:
self.parameters[param] = float(get_value_by_tag_name(param))
# Retrieve frequency dependent parameters from the xml file
for old_name, new_name in XML_FREQ_PARAMS.items():
self.parameters[new_name] = [
float(get_value_by_tag_name(old_name, ch))
for ch in range(self.parameters["num_freq"])
]
root = ET.parse(xmlmap.fs.open(xmlmap.root)).getroot()

for child in root.iter():
camel_case_tag = self._camel_to_snake(child.tag)
if len(child.attrib) > 0:
for key, val in child.attrib.items():
self.parameters[camel_case_tag + "_" + self._camel_to_snake(key)].append(val)

if all(char == "\n" for char in child.text):
continue
else:
try:
val = int(child.text)
except ValueError:
val = float(child.text)

if len(child.tag) > 3 and not child.tag.startswith("VTX"):
self.parameters[camel_case_tag].append(val)
else:
self.parameters[child.tag].append(val)

# Handling the case where there is only one value for each parameter
for key, val in self.parameters.items():
if len(val) == 1:
self.parameters[key] = val[0]

def _compute_temperature(self, ping_num, is_valid):
"""
Expand Down Expand Up @@ -245,7 +228,6 @@ def _test_valid_params(params):
header_chunk = file.read(self.HEADER_SIZE)
if header_chunk:
header_unpacked = unpack(self.HEADER_FORMAT, header_chunk)

# Reading will stop if the file contains an unexpected flag
if self._split_header(file, header_unpacked):
# Appends the actual 'data values' to unpacked_data
Expand Down
42 changes: 34 additions & 8 deletions echopype/convert/set_groups_azfp.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,14 @@ def _create_unique_channel_name(self):
"""

serial_number = self.parser_obj.unpacked_data["serial_number"]
frequency_number = self.parser_obj.parameters["frequency_number"]

if serial_number.size == 1:
freq_as_str = self.freq_sorted.astype(int).astype(str)

# TODO: replace str(i+1) with Frequency Number from XML
channel_id = [
str(serial_number) + "-" + freq + "-" + str(i + 1)
str(serial_number) + "-" + freq + "-" + frequency_number[i]
for i, freq in enumerate(freq_as_str)
]

Expand Down Expand Up @@ -146,8 +147,7 @@ def set_sonar(self) -> xr.Dataset:
"sonar_model": self.sonar_model,
"sonar_serial_number": int(self.parser_obj.unpacked_data["serial_number"]),
"sonar_software_name": "AZFP",
# TODO: software version is hardwired. Read it from the XML file's AZFP_Version node
"sonar_software_version": "1.4",
"sonar_software_version": "based on AZFP Matlab version 1.4",
"sonar_type": "echosounder",
}
ds = ds.assign_attrs(sonar_attr_dict)
Expand Down Expand Up @@ -500,7 +500,7 @@ def set_vendor(self) -> xr.Dataset:
unpacked_data = self.parser_obj.unpacked_data
parameters = self.parser_obj.parameters
ping_time = self.parser_obj.ping_time
tdn = parameters["pulse_length"][self.freq_ind_sorted] / 1e6
tdn = parameters["pulse_len"][self.freq_ind_sorted] / 1e6
anc = np.array(unpacked_data["ancillary"]) # convert to np array for easy slicing

# Build variables in the output xarray Dataset
Expand Down Expand Up @@ -648,6 +648,17 @@ def set_vendor(self) -> xr.Dataset:
parameters["gain"][self.freq_ind_sorted],
{"long_name": "(From XML file) Gain correction"},
),
"instrument_type": parameters["instrument_type"][0],
"minor": parameters["minor"],
"major": parameters["major"],
"date": parameters["date"],
"program": parameters["program"],
"cpu": parameters["cpu"],
"serial_number": parameters["serial_number"],
"board_version": parameters["board_version"],
"file_version": parameters["file_version"],
"parameter_version": parameters["parameter_version"],
"configuration_version": parameters["configuration_version"],
"XML_digitization_rate": (
["channel"],
parameters["dig_rate"][self.freq_ind_sorted],
Expand All @@ -659,7 +670,7 @@ def set_vendor(self) -> xr.Dataset:
),
"XML_lockout_index": (
["channel"],
parameters["lockout_index"][self.freq_ind_sorted],
parameters["lock_out_index"][self.freq_ind_sorted],
{
"long_name": "(From XML file) The distance, rounded to the nearest "
"Bin Size after the pulse is transmitted that over which AZFP will "
Expand All @@ -680,10 +691,25 @@ def set_vendor(self) -> xr.Dataset:
"units": "dB re 1uPa/V at 1m",
},
),
"VTX": (
"VTX0": (
["channel"],
parameters["VTX"][self.freq_ind_sorted],
{"long_name": "Amplified voltage sent to the transducer"},
parameters["VTX0"][self.freq_ind_sorted],
{"long_name": "Amplified voltage 0 sent to the transducer"},
),
"VTX1": (
["channel"],
parameters["VTX1"][self.freq_ind_sorted],
{"long_name": "Amplified voltage 1 sent to the transducer"},
),
"VTX2": (
["channel"],
parameters["VTX2"][self.freq_ind_sorted],
{"long_name": "Amplified voltage 2 sent to the transducer"},
),
"VTX3": (
["channel"],
parameters["VTX3"][self.freq_ind_sorted],
{"long_name": "Amplified voltage 3 sent to the transducer"},
),
"Sv_offset": (["channel"], Sv_offset),
"number_of_samples_digitized_per_pings": (
Expand Down
52 changes: 52 additions & 0 deletions echopype/tests/convert/test_convert_azfp.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@
from scipy.io import loadmat
from echopype import open_raw
import pytest
from echopype.convert.parse_azfp import ParseAZFP


@pytest.fixture
def azfp_path(test_path):
return test_path["AZFP"]


def check_platform_required_scalar_vars(echodata):
# check convention-required variables in the Platform group
for var in [
Expand Down Expand Up @@ -172,3 +174,53 @@ def test_convert_azfp_01a_notemperature_notilt(azfp_path):
assert "tilt_y" in echodata["Platform"]
assert echodata["Platform"]["tilt_x"].isnull().all()
assert echodata["Platform"]["tilt_y"].isnull().all()


def test_load_parse_azfp_xml(azfp_path):
praneethratna marked this conversation as resolved.
Show resolved Hide resolved

azfp_01a_path = azfp_path / '17082117.01A'
azfp_xml_path = azfp_path / '17030815.XML'
parseAZFP = ParseAZFP(str(azfp_01a_path), str(azfp_xml_path))
parseAZFP.load_AZFP_xml()
expected_params = ['instrument_type_string', 'instrument_type', 'major', 'minor', 'date',
'program_name', 'program', 'CPU', 'serial_number', 'board_version',
'file_version', 'parameter_version', 'configuration_version', 'eclock',
'digital_board_version', 'sensors_flag_pressure_sensor_installed',
'sensors_flag_paros_installed', 'sensors_flag', 'U0', 'Y1', 'Y2', 'Y3',
'C1', 'C2', 'C3', 'D1', 'D2', 'T1', 'T2', 'T3', 'T4', 'T5', 'X_a', 'X_b',
'X_c', 'X_d', 'Y_a', 'Y_b', 'Y_c', 'Y_d', 'period', 'ppm_offset',
'calibration', 'a0', 'a1', 'a2', 'a3', 'ka', 'kb', 'kc', 'A', 'B', 'C',
'num_freq', 'hz_units', 'kHz', 'TVR', 'num_vtx', 'VTX0', 'VTX1', 'VTX2',
'VTX3', 'BP', 'EL', 'DS', 'min_pulse_len', 'sound_speed',
'start_date_svalue', 'start_date', 'num_frequencies', 'num_phases',
'data_output_svalue', 'data_output', 'frequency_units', 'frequency',
'phase_number', 'phase_type_svalue', 'phase_type', 'duration_svalue',
'duration', 'ping_period_units', 'ping_period', 'burst_interval_units',
'burst_interval', 'pings_per_burst_units', 'pings_per_burst',
'average_burst_pings_units', 'average_burst_pings', 'frequency_number',
'acquire_frequency_units', 'acquire_frequency', 'pulse_len_units',
'pulse_len', 'dig_rate_units', 'dig_rate', 'range_samples_units',
'range_samples', 'range_averaging_samples_units', 'range_averaging_samples',
'lock_out_index_units', 'lock_out_index', 'gain_units', 'gain',
'storage_format_units', 'storage_format']
assert set(parseAZFP.parameters.keys()) == set(expected_params)
assert list(set(parseAZFP.parameters['instrument_type_string']))[0] == 'AZFP'
assert isinstance(parseAZFP.parameters['num_freq'], int)
assert isinstance(parseAZFP.parameters['pulse_len'], list)
assert parseAZFP.parameters['num_freq'] == 4
assert len(parseAZFP.parameters['frequency_number']) == 4
assert parseAZFP.parameters['frequency_number'] == ['1', '2', '3', '4']
assert parseAZFP.parameters['kHz'] == [125, 200, 455, 769]

expected_len_params = ['acquire_frequency', 'pulse_len', 'dig_rate', 'range_samples',
'range_averaging_samples', 'lock_out_index', 'gain', 'storage_format']
assert all(len(parseAZFP.parameters[x]) == 4 for x in expected_len_params)
assert parseAZFP.parameters['acquire_frequency'] == [1, 1, 1, 1]
assert parseAZFP.parameters['pulse_len'] == [300, 300, 300, 300]
assert parseAZFP.parameters['dig_rate'] == [20000, 20000, 20000, 20000]
assert parseAZFP.parameters['range_samples'] == [1752, 1752, 1764, 540]
assert parseAZFP.parameters['range_averaging_samples'] == [4, 4, 4, 4]
assert parseAZFP.parameters['lock_out_index'] == [0, 0, 0, 0]
assert parseAZFP.parameters['gain'] == [1, 1, 1, 1]
assert parseAZFP.parameters['storage_format'] == [1, 1, 1, 1]

Loading