From dbcdd18d65d5498a2fdc065616e0585803c5a895 Mon Sep 17 00:00:00 2001 From: Kyle Wilcox Date: Mon, 25 Apr 2022 16:58:10 -0400 Subject: [PATCH 1/3] Move the "extras" data to be in their own profile netCDF file This assumes that the "extras" data occurs before or after a profile and for now I think that is OK. This reduces the complexity of the code quite a bit. We just make the "extras" data look like a profile dataset, optionally setting the depths to zero if there are no depths to capture the data as surface measurements. This also changes some of the extras configuration kwarg names so we can control the creation of the ASCII pseudogram and the inclustion of that data into the netCDF profile files, and the image creation. --- gutils/filters.py | 17 +--- gutils/nc.py | 67 ++++++++++----- gutils/slocum/__init__.py | 86 ++++++++++++------- gutils/templates/slocum_dac.json | 27 +----- .../slocum/ecometrics/config/deployment.json | 10 +++ .../slocum/ecometrics2/config/deployment.json | 19 ++-- .../slocum/ecometrics3/config/deployment.json | 19 ++-- .../slocum/ecometrics4/config/deployment.json | 19 ++-- 8 files changed, 141 insertions(+), 123 deletions(-) diff --git a/gutils/filters.py b/gutils/filters.py index 3c16343..386bdb8 100644 --- a/gutils/filters.py +++ b/gutils/filters.py @@ -2,7 +2,6 @@ # coding=utf-8 import os import pandas as pd -import numpy as np from gutils.yo import assign_profiles @@ -177,23 +176,9 @@ def process_dataset(file, tolerance=pd.Timedelta(minutes=10) ).set_index(extras.index) extras['profile'] = merge.profile.ffill() - - # To have consistent netCDF files, empty "extras" variables need to exist - # in for each valid profile that was calculated above into "filtered". - profile_list = set(filtered['profile'].unique()) - extras_list = set(extras['profile'].unique().astype('int32')) - profiles_to_add = profile_list.difference(extras_list) - if profiles_to_add: - first_t_in_profiles = filtered.groupby(by=["profile"]).min()['t'] - for profile_to_add in profiles_to_add: - empty_df = pd.DataFrame([[np.nan] * len(extras.columns)], columns=extras.columns) - empty_df['profile'] = profile_to_add - empty_df['pseudogram_time'] = first_t_in_profiles[profile_to_add] - empty_df.set_index('pseudogram_time', inplace=True) - extras = pd.concat([extras, empty_df], sort=True) - except BaseException as e: L.error(f"Could not merge 'extras' data, skipping: {e}") + extras = pd.DataFrame() except ValueError as e: L.exception('{} - Skipping'.format(e)) diff --git a/gutils/nc.py b/gutils/nc.py index 1f33c67..eae90c4 100644 --- a/gutils/nc.py +++ b/gutils/nc.py @@ -14,6 +14,7 @@ from datetime import datetime from collections import OrderedDict +import numpy as np import pandas as pd import netCDF4 as nc4 from compliance_checker.runner import ComplianceChecker, CheckSuite @@ -191,11 +192,7 @@ def get_creation_attributes(profile): } -def create_profile_netcdf(attrs, profile, output_path, mode, profile_id_type=ProfileIdTypes.EPOCH, - extras_df=None): - - if extras_df is None: - extras_df = pd.DataFrame() +def create_profile_netcdf(attrs, profile, output_path, mode, profile_id_type=ProfileIdTypes.EPOCH): try: # Path to hold file while we create it @@ -281,9 +278,6 @@ def create_profile_netcdf(attrs, profile, output_path, mode, profile_id_type=Pro reduce_dims=True, mode='a') as ncd: - # Set an extras data - set_extra_data(ncd, extras_df) - # We only want to apply metadata from the `attrs` map if the variable is already in # the netCDF file or it is a scalar variable (no shape defined). This avoids # creating measured variables that were not measured in this profile. @@ -359,21 +353,22 @@ def create_netcdf(attrs, data, output_path, mode, profile_id_type=ProfileIdTypes # Create NetCDF Files for Each Profile written_files = [] - for df in [data, extras_df]: + reserved_columns = [ + 'trajectory', + 'profile', + 't', + 'x', + 'y', + 'z', + 'u_orig', + 'v_orig' + ] + for df in [data, extras_df]: # Optionally, remove any variables from the dataframe that do not have metadata assigned if subset is True: all_columns = set(df.columns) - reserved_columns = [ - 'trajectory', - 'profile', - 't', - 'x', - 'y', - 'z', - 'u_orig', - 'v_orig' - ] + removable_columns = all_columns - set(reserved_columns) orphans = removable_columns - set(attrs.get('variables', {}).keys()) L.debug( @@ -393,11 +388,39 @@ def create_netcdf(attrs, data, output_path, mode, profile_id_type=ProfileIdTypes profile_extras = pd.DataFrame() if not extras_df.empty: - profile_extras = extras_df.loc[extras_df.profile == pi] + + # Write the extras dimension to a new profile file + profile_extras = extras_df.loc[extras_df.profile == pi].copy() + if profile_extras.empty: + continue + + # Standardize the columns of the "extras" from the matched profile + profile_extras.loc[:, 't'] = profile_extras.index + profile_extras = profile_extras.reset_index(drop=True) + profile_extras.loc[:, 'x'] = profile.x.dropna().iloc[0] + profile_extras.loc[:, 'y'] = profile.y.dropna().iloc[0] + + # Fill in extras with empty data + for c in profile: + if c not in profile_extras: + profile_extras.loc[:, c] = np.nan + profile_extras.loc[:, c] = profile_extras[c].astype(profile[c].dtype) + + # Fill in regular profile with empty data + for c in profile_extras: + if c not in profile: + profile.loc[:, c] = np.nan + profile.loc[:, c] = profile[c].astype(profile_extras[c].dtype) + + try: + cr = create_profile_netcdf(attrs, profile_extras, output_path, mode, profile_id_type) + written.append(cr) + except BaseException: + L.exception('Error creating extra netCDF profile {}. Skipping.'.format(pi)) + continue try: - cr = create_profile_netcdf(attrs, profile, output_path, mode, profile_id_type, - extras_df=profile_extras) + cr = create_profile_netcdf(attrs, profile, output_path, mode, profile_id_type) written.append(cr) except BaseException: L.exception('Error creating netCDF for profile {}. Skipping.'.format(pi)) diff --git a/gutils/slocum/__init__.py b/gutils/slocum/__init__.py index 8371988..8501f14 100644 --- a/gutils/slocum/__init__.py +++ b/gutils/slocum/__init__.py @@ -76,14 +76,28 @@ def extras(self, data, **kwargs): using an extras time dimension. """ - ECOMETRICS_SENSORS = [ 'sci_echodroid_aggindex', 'sci_echodroid_ctrmass', 'sci_echodroid_eqarea', 'sci_echodroid_inertia', 'sci_echodroid_propocc', 'sci_echodroid_sa', 'sci_echodroid_sv'] - PSEUDOGRAM_VARS = ['pseudogram_time', 'pseudogram_depth', 'pseudogram_sv'] + ECOMETRICS_SENSORS = [ + 'sci_echodroid_aggindex', + 'sci_echodroid_ctrmass', + 'sci_echodroid_eqarea', + 'sci_echodroid_inertia', + 'sci_echodroid_propocc', + 'sci_echodroid_sa', + 'sci_echodroid_sv', + ] + + PSEUDOGRAM_VARS = [ + 'pseudogram_time', + 'pseudogram_depth', + 'pseudogram_sv', + ] # Default extra settings pseudograms_attrs = kwargs.get('pseudograms', {}) - enable_pseudograms = pseudograms_attrs.get('enable', False) + enable_nc = pseudograms_attrs.get('enable_nc', False) + enable_ascii = pseudograms_attrs.get('enable_ascii', False) - if enable_pseudograms: + if enable_nc and enable_ascii: # Two possible outcomes: # (1) If the pseudogram exists, align ecometrics data along @@ -123,13 +137,18 @@ def extras(self, data, **kwargs): # Create ECOMETRICS variable placeholders if have_pseudogram: # ecometrics data is inserted into time data as provided by the pseudogram + size = len(self._extras['pseudogram_time']) for sensor in ECOMETRICS_SENSORS: - self._extras[sensor] = np.full((len(self._extras['pseudogram_time'])), np.nan) + self._extras[sensor] = np.full(size, np.nan) + else: # with a missing pseudogram, we can use a shorter list of times # we have to create placeholders for PSEUDOGRAM and ECOMETRICS variables - for sensor in PSEUDOGRAM_VARS + ECOMETRICS_SENSORS: - self._extras[sensor] = np.full((len(ecometricsData)), np.nan) + size = len(ecometricsData) + for sensor in ECOMETRICS_SENSORS: + self._extras[sensor] = np.full(size, np.nan) + for sensor in PSEUDOGRAM_VARS: + self._extras[sensor] = np.full(size, np.nan) if have_pseudogram: for _, row in ecometricsData.iterrows(): @@ -172,15 +191,24 @@ def extras(self, data, **kwargs): self._extras.pseudogram_time, unit='s', origin='unix' ) - if have_pseudogram: - self._extras = self._extras.sort_values([ - 'pseudogram_time', - 'pseudogram_depth' - ]) - else: - self._extras = self._extras.sort_values(['pseudogram_time']) - - self._extras.set_index("pseudogram_time", inplace=True) + if not self._extras['pseudogram_depth'].any(): + self._extras.loc[:, 'pseudogram_depth'] = 0.0 + + self._extras = self._extras.sort_values([ + 'pseudogram_time', + 'pseudogram_depth' + ]) + + # Return a "standardized" dataframe with "t" as the index + # and a column named "z". + self._extras.rename( + columns={ + "pseudogram_time": "t", + "pseudogram_depth": "z" + }, + inplace=True + ) + self._extras.set_index("t", inplace=True) return self._extras, data @@ -518,8 +546,8 @@ def convert(self): ] pseudograms_attrs = self.extra_kwargs.get('pseudograms', {}) - have_pseudograms = pseudograms_attrs.get('enable', False) - if have_pseudograms: + enable_ascii = pseudograms_attrs.get('enable_ascii', False) + if enable_ascii: # Perform pseudograms if this ASCII file matches the deployment # name of things we know to have the data. There needs to be a # better way to figure this out, but we don't have any understanding @@ -534,25 +562,19 @@ def convert(self): # https://github.com/smerckel/dbdreader # Defaults - create_images = pseudograms_attrs.get('create_images', False) + enable_image = pseudograms_attrs.get('enable_image', False) echosounderRange = pseudograms_attrs.get('echosounderRange', 60.0) echosounderDirection = pseudograms_attrs.get('echosounderDirection', 'down') if echosounderDirection == 'up': echosounderRange = - (echosounderRange) - if create_images: - pargs = pargs + [ - '-y', sys.executable, - '-g', # Makes the pseudogram ASCII - '-i', # Makes the pseudogram images. This is slow! - '-r', f"{echosounderRange}" - ] - else: - pargs = pargs + [ - '-y', sys.executable, - '-g', # Makes the pseudogram ASCII - '-r', f"{echosounderRange}" - ] + pargs = pargs + [ + '-y', sys.executable, + '-g', # Makes the pseudogram ASCII + '-r', f"{echosounderRange}" + ] + if enable_image: + pargs.append('-i') # Makes the pseudogram images. This is slow! pargs.append(self.tmpdir) pargs.append(self.destination_directory) diff --git a/gutils/templates/slocum_dac.json b/gutils/templates/slocum_dac.json index aaca414..8d9dedd 100644 --- a/gutils/templates/slocum_dac.json +++ b/gutils/templates/slocum_dac.json @@ -1005,32 +1005,8 @@ "_FillValue": {"type": "float", "data": -9999.9} } }, - "pseudogram_time": { - "type": "double", - "attributes": { - "long_name": "Pseudogram Time", - "ioos_category": "Other", - "standard_name": "pseudogram_time", - "platform": "platform", - "observation_type": "measured", - "_FillValue": {"type": "double", "data": -1} - } - }, - "pseudogram_depth": { - "type": "double", - "attributes": { - "units": "m", - "long_name": "Pseudogram Depth", - "valid_min": 0.0, - "valid_max": 2000.0, - "ioos_category": "Other", - "standard_name": "pseudogram_depth", - "platform": "platform", - "observation_type": "measured", - "_FillValue": {"type": "double", "data": -9999.9} - } - }, "pseudogram_sv": { + "shape": ["time"], "type": "double", "attributes": { "units": "db", @@ -1041,7 +1017,6 @@ "standard_name": "pseudogram_sv", "platform": "platform", "observation_type": "measured", - "coordinates": "pseudogram_time pseudogram_depth", "_FillValue": {"type": "double", "data": -9999.9} } } diff --git a/gutils/tests/resources/slocum/ecometrics/config/deployment.json b/gutils/tests/resources/slocum/ecometrics/config/deployment.json index 2b45675..40ab93e 100644 --- a/gutils/tests/resources/slocum/ecometrics/config/deployment.json +++ b/gutils/tests/resources/slocum/ecometrics/config/deployment.json @@ -1,6 +1,16 @@ { "glider": "ecometrics", "trajectory_date": "20220212T0000", + "extra_kwargs": { + "pseudograms": { + "enable_nc": false, + "enable_ascii": false, + "enable_image": false, + "echosounderRange": 60.0, + "echosounderDirection": "up", + "echosounderRangeUnits": "meters" + } + }, "attributes": { "acknowledgement": "This work was supported by funding from NOAA/IOOS/AOOS.", "comment": "", diff --git a/gutils/tests/resources/slocum/ecometrics2/config/deployment.json b/gutils/tests/resources/slocum/ecometrics2/config/deployment.json index 5d2baab..5797229 100644 --- a/gutils/tests/resources/slocum/ecometrics2/config/deployment.json +++ b/gutils/tests/resources/slocum/ecometrics2/config/deployment.json @@ -1,6 +1,16 @@ { "glider": "ecometrics", "trajectory_date": "20220212T0000", + "extra_kwargs": { + "pseudograms": { + "enable_nc": true, + "enable_ascii": true, + "enable_image": false, + "echosounderRange": 60.0, + "echosounderDirection": "down", + "echosounderRangeUnits": "meters" + } + }, "attributes": { "acknowledgement": "This work was supported by funding from NOAA/IOOS/AOOS.", "comment": "", @@ -36,15 +46,6 @@ "title": "G507 Slocum Glider Dataset (Feb 2022)", "wmo_id": 4802989 }, - "extra_kwargs": { - "pseudograms": { - "enable": true, - "create_images": false, - "echosounderRange": 60.0, - "echosounderDirection": "down", - "echosounderRangeUnits": "meters" - } - }, "variables": { "platform": { "attributes": { diff --git a/gutils/tests/resources/slocum/ecometrics3/config/deployment.json b/gutils/tests/resources/slocum/ecometrics3/config/deployment.json index 2a15c89..17354aa 100644 --- a/gutils/tests/resources/slocum/ecometrics3/config/deployment.json +++ b/gutils/tests/resources/slocum/ecometrics3/config/deployment.json @@ -8,6 +8,16 @@ "filter_points": 5, "filter_distance": 1 }, + "extra_kwargs": { + "pseudograms": { + "enable_nc": false, + "enable_ascii": false, + "enable_image": false, + "echosounderRange": 60.0, + "echosounderDirection": "up", + "echosounderRangeUnits": "meters" + } + }, "attributes": { "acknowledgement": "This work was supported by funding from NOAA/IOOS/AOOS.", "comment": "", @@ -43,15 +53,6 @@ "title": "G507 Slocum Glider Dataset (Feb 2022)", "wmo_id": 4802989 }, - "extra_kwargs": { - "pseudograms": { - "enable": false, - "create_images": true, - "echosounderRange": 60.0, - "echosounderDirection": "up", - "echosounderRangeUnits": "meters" - } - }, "variables": { "platform": { "attributes": { diff --git a/gutils/tests/resources/slocum/ecometrics4/config/deployment.json b/gutils/tests/resources/slocum/ecometrics4/config/deployment.json index b6df20d..98d4e41 100644 --- a/gutils/tests/resources/slocum/ecometrics4/config/deployment.json +++ b/gutils/tests/resources/slocum/ecometrics4/config/deployment.json @@ -8,6 +8,16 @@ "filter_points": 5, "filter_distance": 1 }, + "extra_kwargs": { + "pseudograms": { + "enable_nc": false, + "enable_ascii": true, + "enable_image": false, + "echosounderRange": 60.0, + "echosounderDirection": "up", + "echosounderRangeUnits": "meters" + } + }, "attributes": { "acknowledgement": "This work was supported by funding from NOAA/IOOS/AOOS.", "comment": "", @@ -43,15 +53,6 @@ "title": "G507 Slocum Glider Dataset (Feb 2022)", "wmo_id": 4802989 }, - "extra_kwargs": { - "pseudograms": { - "enable": true, - "create_images": false, - "echosounderRange": 60.0, - "echosounderDirection": "up", - "echosounderRangeUnits": "meters" - } - }, "variables": { "platform": { "attributes": { From 18268cc16d4c3826a689e9d0762ca81baa408706 Mon Sep 17 00:00:00 2001 From: Kyle Wilcox Date: Mon, 25 Apr 2022 21:57:01 -0400 Subject: [PATCH 2/3] Fix pseodogram test --- gutils/tests/test_slocum.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/gutils/tests/test_slocum.py b/gutils/tests/test_slocum.py index 2c80648..e754a83 100644 --- a/gutils/tests/test_slocum.py +++ b/gutils/tests/test_slocum.py @@ -393,17 +393,19 @@ def test_pseudogram(self): output_files = sorted(os.listdir(self.netcdf_path)) output_files = [ os.path.join(self.netcdf_path, o) for o in output_files ] - assert len(output_files) == 17 + assert len(output_files) == 33 # First profile with nc4.Dataset(output_files[0]) as ncd: assert ncd.variables['profile_id'].ndim == 0 + # first time in the first profile assert ncd.variables['profile_id'][0] == 1639020410 # Last profile with nc4.Dataset(output_files[-1]) as ncd: assert ncd.variables['profile_id'].ndim == 0 - assert ncd.variables['profile_id'][0] == 1639069272 + # first time in the last ecodroid profile + assert ncd.variables['profile_id'][0] == 1639070632 # Check netCDF file for compliance ds = namedtuple('Arguments', ['file']) From 49af4a01f6f1ed7b13768873accf1c683c51f3b1 Mon Sep 17 00:00:00 2001 From: Kyle Wilcox Date: Mon, 25 Apr 2022 22:26:40 -0400 Subject: [PATCH 3/3] Counting is not my thing tonight --- gutils/tests/test_slocum.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gutils/tests/test_slocum.py b/gutils/tests/test_slocum.py index e754a83..742b0e2 100644 --- a/gutils/tests/test_slocum.py +++ b/gutils/tests/test_slocum.py @@ -393,7 +393,7 @@ def test_pseudogram(self): output_files = sorted(os.listdir(self.netcdf_path)) output_files = [ os.path.join(self.netcdf_path, o) for o in output_files ] - assert len(output_files) == 33 + assert len(output_files) == 32 # First profile with nc4.Dataset(output_files[0]) as ncd: