Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move the "extras" data to be in their own profile netCDF file #18

Merged
merged 3 commits into from
Apr 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 1 addition & 16 deletions gutils/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
# coding=utf-8
import os
import pandas as pd
import numpy as np

from gutils.yo import assign_profiles

Expand Down Expand Up @@ -177,23 +176,9 @@ def process_dataset(file,
tolerance=pd.Timedelta(minutes=10)
).set_index(extras.index)
extras['profile'] = merge.profile.ffill()

# To have consistent netCDF files, empty "extras" variables need to exist
# in for each valid profile that was calculated above into "filtered".
profile_list = set(filtered['profile'].unique())
extras_list = set(extras['profile'].unique().astype('int32'))
profiles_to_add = profile_list.difference(extras_list)
if profiles_to_add:
first_t_in_profiles = filtered.groupby(by=["profile"]).min()['t']
for profile_to_add in profiles_to_add:
empty_df = pd.DataFrame([[np.nan] * len(extras.columns)], columns=extras.columns)
empty_df['profile'] = profile_to_add
empty_df['pseudogram_time'] = first_t_in_profiles[profile_to_add]
empty_df.set_index('pseudogram_time', inplace=True)
extras = pd.concat([extras, empty_df], sort=True)

except BaseException as e:
L.error(f"Could not merge 'extras' data, skipping: {e}")
extras = pd.DataFrame()

except ValueError as e:
L.exception('{} - Skipping'.format(e))
Expand Down
67 changes: 45 additions & 22 deletions gutils/nc.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from datetime import datetime
from collections import OrderedDict

import numpy as np
import pandas as pd
import netCDF4 as nc4
from compliance_checker.runner import ComplianceChecker, CheckSuite
Expand Down Expand Up @@ -191,11 +192,7 @@ def get_creation_attributes(profile):
}


def create_profile_netcdf(attrs, profile, output_path, mode, profile_id_type=ProfileIdTypes.EPOCH,
extras_df=None):

if extras_df is None:
extras_df = pd.DataFrame()
def create_profile_netcdf(attrs, profile, output_path, mode, profile_id_type=ProfileIdTypes.EPOCH):

try:
# Path to hold file while we create it
Expand Down Expand Up @@ -281,9 +278,6 @@ def create_profile_netcdf(attrs, profile, output_path, mode, profile_id_type=Pro
reduce_dims=True,
mode='a') as ncd:

# Set an extras data
set_extra_data(ncd, extras_df)

# We only want to apply metadata from the `attrs` map if the variable is already in
# the netCDF file or it is a scalar variable (no shape defined). This avoids
# creating measured variables that were not measured in this profile.
Expand Down Expand Up @@ -359,21 +353,22 @@ def create_netcdf(attrs, data, output_path, mode, profile_id_type=ProfileIdTypes
# Create NetCDF Files for Each Profile
written_files = []

for df in [data, extras_df]:
reserved_columns = [
'trajectory',
'profile',
't',
'x',
'y',
'z',
'u_orig',
'v_orig'
]

for df in [data, extras_df]:
# Optionally, remove any variables from the dataframe that do not have metadata assigned
if subset is True:
all_columns = set(df.columns)
reserved_columns = [
'trajectory',
'profile',
't',
'x',
'y',
'z',
'u_orig',
'v_orig'
]

removable_columns = all_columns - set(reserved_columns)
orphans = removable_columns - set(attrs.get('variables', {}).keys())
L.debug(
Expand All @@ -393,11 +388,39 @@ def create_netcdf(attrs, data, output_path, mode, profile_id_type=ProfileIdTypes

profile_extras = pd.DataFrame()
if not extras_df.empty:
profile_extras = extras_df.loc[extras_df.profile == pi]

# Write the extras dimension to a new profile file
profile_extras = extras_df.loc[extras_df.profile == pi].copy()
if profile_extras.empty:
continue

# Standardize the columns of the "extras" from the matched profile
profile_extras.loc[:, 't'] = profile_extras.index
profile_extras = profile_extras.reset_index(drop=True)
profile_extras.loc[:, 'x'] = profile.x.dropna().iloc[0]
profile_extras.loc[:, 'y'] = profile.y.dropna().iloc[0]

# Fill in extras with empty data
for c in profile:
if c not in profile_extras:
profile_extras.loc[:, c] = np.nan
profile_extras.loc[:, c] = profile_extras[c].astype(profile[c].dtype)

# Fill in regular profile with empty data
for c in profile_extras:
if c not in profile:
profile.loc[:, c] = np.nan
profile.loc[:, c] = profile[c].astype(profile_extras[c].dtype)

try:
cr = create_profile_netcdf(attrs, profile_extras, output_path, mode, profile_id_type)
written.append(cr)
except BaseException:
L.exception('Error creating extra netCDF profile {}. Skipping.'.format(pi))
continue

try:
cr = create_profile_netcdf(attrs, profile, output_path, mode, profile_id_type,
extras_df=profile_extras)
cr = create_profile_netcdf(attrs, profile, output_path, mode, profile_id_type)
written.append(cr)
except BaseException:
L.exception('Error creating netCDF for profile {}. Skipping.'.format(pi))
Expand Down
86 changes: 54 additions & 32 deletions gutils/slocum/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,28 @@ def extras(self, data, **kwargs):
using an extras time dimension.
"""

ECOMETRICS_SENSORS = [ 'sci_echodroid_aggindex', 'sci_echodroid_ctrmass', 'sci_echodroid_eqarea', 'sci_echodroid_inertia', 'sci_echodroid_propocc', 'sci_echodroid_sa', 'sci_echodroid_sv']
PSEUDOGRAM_VARS = ['pseudogram_time', 'pseudogram_depth', 'pseudogram_sv']
ECOMETRICS_SENSORS = [
'sci_echodroid_aggindex',
'sci_echodroid_ctrmass',
'sci_echodroid_eqarea',
'sci_echodroid_inertia',
'sci_echodroid_propocc',
'sci_echodroid_sa',
'sci_echodroid_sv',
]

PSEUDOGRAM_VARS = [
'pseudogram_time',
'pseudogram_depth',
'pseudogram_sv',
]

# Default extra settings
pseudograms_attrs = kwargs.get('pseudograms', {})
enable_pseudograms = pseudograms_attrs.get('enable', False)
enable_nc = pseudograms_attrs.get('enable_nc', False)
enable_ascii = pseudograms_attrs.get('enable_ascii', False)

if enable_pseudograms:
if enable_nc and enable_ascii:

# Two possible outcomes:
# (1) If the pseudogram exists, align ecometrics data along
Expand Down Expand Up @@ -123,13 +137,18 @@ def extras(self, data, **kwargs):
# Create ECOMETRICS variable placeholders
if have_pseudogram:
# ecometrics data is inserted into time data as provided by the pseudogram
size = len(self._extras['pseudogram_time'])
for sensor in ECOMETRICS_SENSORS:
self._extras[sensor] = np.full((len(self._extras['pseudogram_time'])), np.nan)
self._extras[sensor] = np.full(size, np.nan)

else:
# with a missing pseudogram, we can use a shorter list of times
# we have to create placeholders for PSEUDOGRAM and ECOMETRICS variables
for sensor in PSEUDOGRAM_VARS + ECOMETRICS_SENSORS:
self._extras[sensor] = np.full((len(ecometricsData)), np.nan)
size = len(ecometricsData)
for sensor in ECOMETRICS_SENSORS:
self._extras[sensor] = np.full(size, np.nan)
for sensor in PSEUDOGRAM_VARS:
self._extras[sensor] = np.full(size, np.nan)

if have_pseudogram:
for _, row in ecometricsData.iterrows():
Expand Down Expand Up @@ -172,15 +191,24 @@ def extras(self, data, **kwargs):
self._extras.pseudogram_time, unit='s', origin='unix'
)

if have_pseudogram:
self._extras = self._extras.sort_values([
'pseudogram_time',
'pseudogram_depth'
])
else:
self._extras = self._extras.sort_values(['pseudogram_time'])

self._extras.set_index("pseudogram_time", inplace=True)
if not self._extras['pseudogram_depth'].any():
self._extras.loc[:, 'pseudogram_depth'] = 0.0

self._extras = self._extras.sort_values([
'pseudogram_time',
'pseudogram_depth'
])

# Return a "standardized" dataframe with "t" as the index
# and a column named "z".
self._extras.rename(
columns={
"pseudogram_time": "t",
"pseudogram_depth": "z"
},
inplace=True
)
self._extras.set_index("t", inplace=True)

return self._extras, data

Expand Down Expand Up @@ -518,8 +546,8 @@ def convert(self):
]

pseudograms_attrs = self.extra_kwargs.get('pseudograms', {})
have_pseudograms = pseudograms_attrs.get('enable', False)
if have_pseudograms:
enable_ascii = pseudograms_attrs.get('enable_ascii', False)
if enable_ascii:
# Perform pseudograms if this ASCII file matches the deployment
# name of things we know to have the data. There needs to be a
# better way to figure this out, but we don't have any understanding
Expand All @@ -534,25 +562,19 @@ def convert(self):
# https://github.com/smerckel/dbdreader

# Defaults
create_images = pseudograms_attrs.get('create_images', False)
enable_image = pseudograms_attrs.get('enable_image', False)
echosounderRange = pseudograms_attrs.get('echosounderRange', 60.0)
echosounderDirection = pseudograms_attrs.get('echosounderDirection', 'down')
if echosounderDirection == 'up':
echosounderRange = - (echosounderRange)

if create_images:
pargs = pargs + [
'-y', sys.executable,
'-g', # Makes the pseudogram ASCII
'-i', # Makes the pseudogram images. This is slow!
'-r', f"{echosounderRange}"
]
else:
pargs = pargs + [
'-y', sys.executable,
'-g', # Makes the pseudogram ASCII
'-r', f"{echosounderRange}"
]
pargs = pargs + [
'-y', sys.executable,
'-g', # Makes the pseudogram ASCII
'-r', f"{echosounderRange}"
]
if enable_image:
pargs.append('-i') # Makes the pseudogram images. This is slow!

pargs.append(self.tmpdir)
pargs.append(self.destination_directory)
Expand Down
27 changes: 1 addition & 26 deletions gutils/templates/slocum_dac.json
Original file line number Diff line number Diff line change
Expand Up @@ -1005,32 +1005,8 @@
"_FillValue": {"type": "float", "data": -9999.9}
}
},
"pseudogram_time": {
"type": "double",
"attributes": {
"long_name": "Pseudogram Time",
"ioos_category": "Other",
"standard_name": "pseudogram_time",
"platform": "platform",
"observation_type": "measured",
"_FillValue": {"type": "double", "data": -1}
}
},
"pseudogram_depth": {
"type": "double",
"attributes": {
"units": "m",
"long_name": "Pseudogram Depth",
"valid_min": 0.0,
"valid_max": 2000.0,
"ioos_category": "Other",
"standard_name": "pseudogram_depth",
"platform": "platform",
"observation_type": "measured",
"_FillValue": {"type": "double", "data": -9999.9}
}
},
"pseudogram_sv": {
"shape": ["time"],
"type": "double",
"attributes": {
"units": "db",
Expand All @@ -1041,7 +1017,6 @@
"standard_name": "pseudogram_sv",
"platform": "platform",
"observation_type": "measured",
"coordinates": "pseudogram_time pseudogram_depth",
"_FillValue": {"type": "double", "data": -9999.9}
}
}
Expand Down
10 changes: 10 additions & 0 deletions gutils/tests/resources/slocum/ecometrics/config/deployment.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
{
"glider": "ecometrics",
"trajectory_date": "20220212T0000",
"extra_kwargs": {
"pseudograms": {
"enable_nc": false,
"enable_ascii": false,
"enable_image": false,
"echosounderRange": 60.0,
"echosounderDirection": "up",
"echosounderRangeUnits": "meters"
}
},
"attributes": {
"acknowledgement": "This work was supported by funding from NOAA/IOOS/AOOS.",
"comment": "",
Expand Down
19 changes: 10 additions & 9 deletions gutils/tests/resources/slocum/ecometrics2/config/deployment.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
{
"glider": "ecometrics",
"trajectory_date": "20220212T0000",
"extra_kwargs": {
"pseudograms": {
"enable_nc": true,
"enable_ascii": true,
"enable_image": false,
"echosounderRange": 60.0,
"echosounderDirection": "down",
"echosounderRangeUnits": "meters"
}
},
"attributes": {
"acknowledgement": "This work was supported by funding from NOAA/IOOS/AOOS.",
"comment": "",
Expand Down Expand Up @@ -36,15 +46,6 @@
"title": "G507 Slocum Glider Dataset (Feb 2022)",
"wmo_id": 4802989
},
"extra_kwargs": {
"pseudograms": {
"enable": true,
"create_images": false,
"echosounderRange": 60.0,
"echosounderDirection": "down",
"echosounderRangeUnits": "meters"
}
},
"variables": {
"platform": {
"attributes": {
Expand Down
19 changes: 10 additions & 9 deletions gutils/tests/resources/slocum/ecometrics3/config/deployment.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,16 @@
"filter_points": 5,
"filter_distance": 1
},
"extra_kwargs": {
"pseudograms": {
"enable_nc": false,
"enable_ascii": false,
"enable_image": false,
"echosounderRange": 60.0,
"echosounderDirection": "up",
"echosounderRangeUnits": "meters"
}
},
"attributes": {
"acknowledgement": "This work was supported by funding from NOAA/IOOS/AOOS.",
"comment": "",
Expand Down Expand Up @@ -43,15 +53,6 @@
"title": "G507 Slocum Glider Dataset (Feb 2022)",
"wmo_id": 4802989
},
"extra_kwargs": {
"pseudograms": {
"enable": false,
"create_images": true,
"echosounderRange": 60.0,
"echosounderDirection": "up",
"echosounderRangeUnits": "meters"
}
},
"variables": {
"platform": {
"attributes": {
Expand Down
Loading