Skip to content

Commit

Permalink
Merge pull request #18 from SECOORA/extras-as-profile-files
Browse files Browse the repository at this point in the history
Move the "extras" data to be in their own profile netCDF file
  • Loading branch information
kwilcox committed Apr 26, 2022
2 parents ebbe59d + 49af4a0 commit f32d479
Show file tree
Hide file tree
Showing 9 changed files with 145 additions and 125 deletions.
17 changes: 1 addition & 16 deletions gutils/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
# coding=utf-8
import os
import pandas as pd
import numpy as np

from gutils.yo import assign_profiles

Expand Down Expand Up @@ -177,23 +176,9 @@ def process_dataset(file,
tolerance=pd.Timedelta(minutes=10)
).set_index(extras.index)
extras['profile'] = merge.profile.ffill()

# To have consistent netCDF files, empty "extras" variables need to exist
# in for each valid profile that was calculated above into "filtered".
profile_list = set(filtered['profile'].unique())
extras_list = set(extras['profile'].unique().astype('int32'))
profiles_to_add = profile_list.difference(extras_list)
if profiles_to_add:
first_t_in_profiles = filtered.groupby(by=["profile"]).min()['t']
for profile_to_add in profiles_to_add:
empty_df = pd.DataFrame([[np.nan] * len(extras.columns)], columns=extras.columns)
empty_df['profile'] = profile_to_add
empty_df['pseudogram_time'] = first_t_in_profiles[profile_to_add]
empty_df.set_index('pseudogram_time', inplace=True)
extras = pd.concat([extras, empty_df], sort=True)

except BaseException as e:
L.error(f"Could not merge 'extras' data, skipping: {e}")
extras = pd.DataFrame()

except ValueError as e:
L.exception('{} - Skipping'.format(e))
Expand Down
67 changes: 45 additions & 22 deletions gutils/nc.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from datetime import datetime
from collections import OrderedDict

import numpy as np
import pandas as pd
import netCDF4 as nc4
from compliance_checker.runner import ComplianceChecker, CheckSuite
Expand Down Expand Up @@ -191,11 +192,7 @@ def get_creation_attributes(profile):
}


def create_profile_netcdf(attrs, profile, output_path, mode, profile_id_type=ProfileIdTypes.EPOCH,
extras_df=None):

if extras_df is None:
extras_df = pd.DataFrame()
def create_profile_netcdf(attrs, profile, output_path, mode, profile_id_type=ProfileIdTypes.EPOCH):

try:
# Path to hold file while we create it
Expand Down Expand Up @@ -281,9 +278,6 @@ def create_profile_netcdf(attrs, profile, output_path, mode, profile_id_type=Pro
reduce_dims=True,
mode='a') as ncd:

# Set an extras data
set_extra_data(ncd, extras_df)

# We only want to apply metadata from the `attrs` map if the variable is already in
# the netCDF file or it is a scalar variable (no shape defined). This avoids
# creating measured variables that were not measured in this profile.
Expand Down Expand Up @@ -359,21 +353,22 @@ def create_netcdf(attrs, data, output_path, mode, profile_id_type=ProfileIdTypes
# Create NetCDF Files for Each Profile
written_files = []

for df in [data, extras_df]:
reserved_columns = [
'trajectory',
'profile',
't',
'x',
'y',
'z',
'u_orig',
'v_orig'
]

for df in [data, extras_df]:
# Optionally, remove any variables from the dataframe that do not have metadata assigned
if subset is True:
all_columns = set(df.columns)
reserved_columns = [
'trajectory',
'profile',
't',
'x',
'y',
'z',
'u_orig',
'v_orig'
]

removable_columns = all_columns - set(reserved_columns)
orphans = removable_columns - set(attrs.get('variables', {}).keys())
L.debug(
Expand All @@ -393,11 +388,39 @@ def create_netcdf(attrs, data, output_path, mode, profile_id_type=ProfileIdTypes

profile_extras = pd.DataFrame()
if not extras_df.empty:
profile_extras = extras_df.loc[extras_df.profile == pi]

# Write the extras dimension to a new profile file
profile_extras = extras_df.loc[extras_df.profile == pi].copy()
if profile_extras.empty:
continue

# Standardize the columns of the "extras" from the matched profile
profile_extras.loc[:, 't'] = profile_extras.index
profile_extras = profile_extras.reset_index(drop=True)
profile_extras.loc[:, 'x'] = profile.x.dropna().iloc[0]
profile_extras.loc[:, 'y'] = profile.y.dropna().iloc[0]

# Fill in extras with empty data
for c in profile:
if c not in profile_extras:
profile_extras.loc[:, c] = np.nan
profile_extras.loc[:, c] = profile_extras[c].astype(profile[c].dtype)

# Fill in regular profile with empty data
for c in profile_extras:
if c not in profile:
profile.loc[:, c] = np.nan
profile.loc[:, c] = profile[c].astype(profile_extras[c].dtype)

try:
cr = create_profile_netcdf(attrs, profile_extras, output_path, mode, profile_id_type)
written.append(cr)
except BaseException:
L.exception('Error creating extra netCDF profile {}. Skipping.'.format(pi))
continue

try:
cr = create_profile_netcdf(attrs, profile, output_path, mode, profile_id_type,
extras_df=profile_extras)
cr = create_profile_netcdf(attrs, profile, output_path, mode, profile_id_type)
written.append(cr)
except BaseException:
L.exception('Error creating netCDF for profile {}. Skipping.'.format(pi))
Expand Down
86 changes: 54 additions & 32 deletions gutils/slocum/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,28 @@ def extras(self, data, **kwargs):
using an extras time dimension.
"""

ECOMETRICS_SENSORS = [ 'sci_echodroid_aggindex', 'sci_echodroid_ctrmass', 'sci_echodroid_eqarea', 'sci_echodroid_inertia', 'sci_echodroid_propocc', 'sci_echodroid_sa', 'sci_echodroid_sv']
PSEUDOGRAM_VARS = ['pseudogram_time', 'pseudogram_depth', 'pseudogram_sv']
ECOMETRICS_SENSORS = [
'sci_echodroid_aggindex',
'sci_echodroid_ctrmass',
'sci_echodroid_eqarea',
'sci_echodroid_inertia',
'sci_echodroid_propocc',
'sci_echodroid_sa',
'sci_echodroid_sv',
]

PSEUDOGRAM_VARS = [
'pseudogram_time',
'pseudogram_depth',
'pseudogram_sv',
]

# Default extra settings
pseudograms_attrs = kwargs.get('pseudograms', {})
enable_pseudograms = pseudograms_attrs.get('enable', False)
enable_nc = pseudograms_attrs.get('enable_nc', False)
enable_ascii = pseudograms_attrs.get('enable_ascii', False)

if enable_pseudograms:
if enable_nc and enable_ascii:

# Two possible outcomes:
# (1) If the pseudogram exists, align ecometrics data along
Expand Down Expand Up @@ -123,13 +137,18 @@ def extras(self, data, **kwargs):
# Create ECOMETRICS variable placeholders
if have_pseudogram:
# ecometrics data is inserted into time data as provided by the pseudogram
size = len(self._extras['pseudogram_time'])
for sensor in ECOMETRICS_SENSORS:
self._extras[sensor] = np.full((len(self._extras['pseudogram_time'])), np.nan)
self._extras[sensor] = np.full(size, np.nan)

else:
# with a missing pseudogram, we can use a shorter list of times
# we have to create placeholders for PSEUDOGRAM and ECOMETRICS variables
for sensor in PSEUDOGRAM_VARS + ECOMETRICS_SENSORS:
self._extras[sensor] = np.full((len(ecometricsData)), np.nan)
size = len(ecometricsData)
for sensor in ECOMETRICS_SENSORS:
self._extras[sensor] = np.full(size, np.nan)
for sensor in PSEUDOGRAM_VARS:
self._extras[sensor] = np.full(size, np.nan)

if have_pseudogram:
for _, row in ecometricsData.iterrows():
Expand Down Expand Up @@ -172,15 +191,24 @@ def extras(self, data, **kwargs):
self._extras.pseudogram_time, unit='s', origin='unix'
)

if have_pseudogram:
self._extras = self._extras.sort_values([
'pseudogram_time',
'pseudogram_depth'
])
else:
self._extras = self._extras.sort_values(['pseudogram_time'])

self._extras.set_index("pseudogram_time", inplace=True)
if not self._extras['pseudogram_depth'].any():
self._extras.loc[:, 'pseudogram_depth'] = 0.0

self._extras = self._extras.sort_values([
'pseudogram_time',
'pseudogram_depth'
])

# Return a "standardized" dataframe with "t" as the index
# and a column named "z".
self._extras.rename(
columns={
"pseudogram_time": "t",
"pseudogram_depth": "z"
},
inplace=True
)
self._extras.set_index("t", inplace=True)

return self._extras, data

Expand Down Expand Up @@ -518,8 +546,8 @@ def convert(self):
]

pseudograms_attrs = self.extra_kwargs.get('pseudograms', {})
have_pseudograms = pseudograms_attrs.get('enable', False)
if have_pseudograms:
enable_ascii = pseudograms_attrs.get('enable_ascii', False)
if enable_ascii:
# Perform pseudograms if this ASCII file matches the deployment
# name of things we know to have the data. There needs to be a
# better way to figure this out, but we don't have any understanding
Expand All @@ -534,25 +562,19 @@ def convert(self):
# https://github.com/smerckel/dbdreader

# Defaults
create_images = pseudograms_attrs.get('create_images', False)
enable_image = pseudograms_attrs.get('enable_image', False)
echosounderRange = pseudograms_attrs.get('echosounderRange', 60.0)
echosounderDirection = pseudograms_attrs.get('echosounderDirection', 'down')
if echosounderDirection == 'up':
echosounderRange = - (echosounderRange)

if create_images:
pargs = pargs + [
'-y', sys.executable,
'-g', # Makes the pseudogram ASCII
'-i', # Makes the pseudogram images. This is slow!
'-r', f"{echosounderRange}"
]
else:
pargs = pargs + [
'-y', sys.executable,
'-g', # Makes the pseudogram ASCII
'-r', f"{echosounderRange}"
]
pargs = pargs + [
'-y', sys.executable,
'-g', # Makes the pseudogram ASCII
'-r', f"{echosounderRange}"
]
if enable_image:
pargs.append('-i') # Makes the pseudogram images. This is slow!

pargs.append(self.tmpdir)
pargs.append(self.destination_directory)
Expand Down
27 changes: 1 addition & 26 deletions gutils/templates/slocum_dac.json
Original file line number Diff line number Diff line change
Expand Up @@ -1005,32 +1005,8 @@
"_FillValue": {"type": "float", "data": -9999.9}
}
},
"pseudogram_time": {
"type": "double",
"attributes": {
"long_name": "Pseudogram Time",
"ioos_category": "Other",
"standard_name": "pseudogram_time",
"platform": "platform",
"observation_type": "measured",
"_FillValue": {"type": "double", "data": -1}
}
},
"pseudogram_depth": {
"type": "double",
"attributes": {
"units": "m",
"long_name": "Pseudogram Depth",
"valid_min": 0.0,
"valid_max": 2000.0,
"ioos_category": "Other",
"standard_name": "pseudogram_depth",
"platform": "platform",
"observation_type": "measured",
"_FillValue": {"type": "double", "data": -9999.9}
}
},
"pseudogram_sv": {
"shape": ["time"],
"type": "double",
"attributes": {
"units": "db",
Expand All @@ -1041,7 +1017,6 @@
"standard_name": "pseudogram_sv",
"platform": "platform",
"observation_type": "measured",
"coordinates": "pseudogram_time pseudogram_depth",
"_FillValue": {"type": "double", "data": -9999.9}
}
}
Expand Down
10 changes: 10 additions & 0 deletions gutils/tests/resources/slocum/ecometrics/config/deployment.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
{
"glider": "ecometrics",
"trajectory_date": "20220212T0000",
"extra_kwargs": {
"pseudograms": {
"enable_nc": false,
"enable_ascii": false,
"enable_image": false,
"echosounderRange": 60.0,
"echosounderDirection": "up",
"echosounderRangeUnits": "meters"
}
},
"attributes": {
"acknowledgement": "This work was supported by funding from NOAA/IOOS/AOOS.",
"comment": "",
Expand Down
19 changes: 10 additions & 9 deletions gutils/tests/resources/slocum/ecometrics2/config/deployment.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
{
"glider": "ecometrics",
"trajectory_date": "20220212T0000",
"extra_kwargs": {
"pseudograms": {
"enable_nc": true,
"enable_ascii": true,
"enable_image": false,
"echosounderRange": 60.0,
"echosounderDirection": "down",
"echosounderRangeUnits": "meters"
}
},
"attributes": {
"acknowledgement": "This work was supported by funding from NOAA/IOOS/AOOS.",
"comment": "",
Expand Down Expand Up @@ -36,15 +46,6 @@
"title": "G507 Slocum Glider Dataset (Feb 2022)",
"wmo_id": 4802989
},
"extra_kwargs": {
"pseudograms": {
"enable": true,
"create_images": false,
"echosounderRange": 60.0,
"echosounderDirection": "down",
"echosounderRangeUnits": "meters"
}
},
"variables": {
"platform": {
"attributes": {
Expand Down
19 changes: 10 additions & 9 deletions gutils/tests/resources/slocum/ecometrics3/config/deployment.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,16 @@
"filter_points": 5,
"filter_distance": 1
},
"extra_kwargs": {
"pseudograms": {
"enable_nc": false,
"enable_ascii": false,
"enable_image": false,
"echosounderRange": 60.0,
"echosounderDirection": "up",
"echosounderRangeUnits": "meters"
}
},
"attributes": {
"acknowledgement": "This work was supported by funding from NOAA/IOOS/AOOS.",
"comment": "",
Expand Down Expand Up @@ -43,15 +53,6 @@
"title": "G507 Slocum Glider Dataset (Feb 2022)",
"wmo_id": 4802989
},
"extra_kwargs": {
"pseudograms": {
"enable": false,
"create_images": true,
"echosounderRange": 60.0,
"echosounderDirection": "up",
"echosounderRangeUnits": "meters"
}
},
"variables": {
"platform": {
"attributes": {
Expand Down
Loading

0 comments on commit f32d479

Please sign in to comment.