# Creating a netCDF file from a standard DataFrame

In [1]:
from IPython.lib.pretty import pprint
import logging
logger = logging.getLogger('gutils')
logger.handlers = [logging.StreamHandler()]
logger.setLevel(logging.DEBUG)

### Helper for creating standard DataFrame

In [2]:
from pathlib import Path

from gutils.slocum import SlocumReader
from gutils.filters import process_dataset

ascii_folder = Path('.').absolute().parent.parent / 'gutils' / 'tests' / 'resources' / 'slocum'
ascii_file = ascii_folder / 'usf_bass_2016_252_1_12_sbd.dat'

# Do it all in one shot
standard, mode = process_dataset(
    file=str(ascii_file),
    reader_class=SlocumReader,
    filter_z=1,
    filter_points=3,
    filter_time=10,
    filter_distance=1
)
print('Profiles : ', len(standard.profile.unique()))
standard.head(5)

('Filtered 6/12 profiles from /data/Development/secoora/sgs/GUTILS/gutils/tests/resources/slocum/usf_bass_2016_252_1_12_sbd.dat', 'Depth (1m): 1', 'Points (3): 0', 'Time (10s): 5', 'Distance (1m): 0')


Profiles :  6


Unnamed: 0,c_heading,c_wpt_lat,m_altitude,m_avg_speed,m_ballast_pumped,m_battery,m_battpos,m_depth,m_depth_rate,m_gps_lat,...,t,drv_m_gps_lat,drv_m_gps_lon,y,x,pressure,z,salinity,density,profile
12,,,,,,,,0.036173,,,...,2016-09-09 16:52:25.372530,,,28.367681,-80.298163,,,,,1
13,,,,,,,,0.036173,,,...,2016-09-09 16:52:30.387050,,,28.367684,-80.29817,,,,,1
14,,,,,,,,,,,...,2016-09-09 16:52:33.376830,,,28.367686,-80.298175,,,,,1
15,,,,,,,,,,,...,2016-09-09 16:52:34.410100,,,28.367687,-80.298176,,,,,1
16,,,,,,,0.941406,0.125632,,,...,2016-09-09 16:52:35.397030,,,28.367687,-80.298178,,,,,1


## Create netCDF from a default template

In [3]:
import tempfile
from gutils.nc import read_attrs, create_netcdf

# output folder
netcdf_output = tempfile.mkdtemp()

In [4]:
# Default "trajectory" metadata
default_attrs = read_attrs()

# The required attributes to identify a unique deployment
default_attrs['glider'] = 'bass'
default_attrs['trajectory_date'] = '20160909T1733'

list(default_attrs['variables'].keys())

['crs',
 'conductivity',
 'density',
 'depth',
 'm_altitude',
 'lat',
 'lat_uv',
 'lon',
 'lon_uv',
 'platform',
 'pressure',
 'profile_id',
 'profile_lat',
 'profile_lon',
 'profile_time',
 'salinity',
 'temperature',
 'time',
 'time_uv',
 'trajectory',
 'u',
 'v',
 'u_orig',
 'v_orig',
 'sci_oxy3835_oxygen',
 'sci_oxy3835_wphase_oxygen',
 'sci_oxy3835_wphase_saturation',
 'sci_bbfl2s_chlor_scaled',
 'sci_flbbcd_chlor_units',
 'sci_flntu_chlor_units',
 'sci_flntu_turb_units',
 'sci_bbfl2s_cdom_scaled',
 'sci_flbbcd_cdom_units']

In [5]:
netcdf_files = create_netcdf(
    attrs=default_attrs,
    data=standard,
    output_path=netcdf_output,
    mode=mode
)
import netCDF4 as nc4
with nc4.Dataset(netcdf_files[0]) as first_profile:
    pprint(list(first_profile.variables.keys()))

Excluded from output (absent from JSON config):
  * m_roll
  * c_wpt_lat
  * m_pitch
  * sci_bbfl2s_bb_scaled
  * m_gps_lon
  * m_mission_avg_speed_climbing
  * m_present_time
  * m_battery
  * m_leakdetect_voltage
  * drv_m_gps_lon
  * m_vacuum
  * m_water_depth
  * m_battpos
  * m_depth
  * sci_water_pressure
  * m_lat
  * m_vehicle_temp
  * m_lon
  * m_mission_avg_speed_diving
  * m_ballast_pumped
  * m_avg_speed
  * m_depth_rate
  * m_gps_lat
  * sci_m_present_time
  * m_heading
  * drv_m_gps_lat
  * c_heading
Created: /tmp/tmpxalj0k8v/bass_20160909T165225Z_1473439945_rt.nc
Created: /tmp/tmpxalj0k8v/bass_20160909T165714Z_1473440234_rt.nc
Created: /tmp/tmpxalj0k8v/bass_20160909T170015Z_1473440415_rt.nc
Created: /tmp/tmpxalj0k8v/bass_20160909T170453Z_1473440693_rt.nc
Created: /tmp/tmpxalj0k8v/bass_20160909T170754Z_1473440874_rt.nc
Created: /tmp/tmpxalj0k8v/bass_20160909T171123Z_1473441083_rt.nc


['trajectory',
 'crs',
 'time',
 'depth',
 'lat',
 'lon',
 'm_altitude',
 'sci_bbfl2s_cdom_scaled',
 'sci_bbfl2s_chlor_scaled',
 'sci_oxy3835_oxygen',
 'conductivity',
 'temperature',
 'pressure',
 'salinity',
 'density',
 'lat_uv',
 'lon_uv',
 'platform',
 'profile_id',
 'profile_lat',
 'profile_lon',
 'profile_time',
 'time_uv',
 'u',
 'v']


## Create netCDF from an IOOS template

In [6]:
# IOOS NGDAC metadata
ioos_attrs = read_attrs(template='ioos_ngdac')

# The required attributes to identify a unique deployment
ioos_attrs['glider'] = 'bass'
ioos_attrs['trajectory_date'] = '20160909T1733'

list(ioos_attrs['variables'].keys())

['crs',
 'conductivity',
 'density',
 'depth',
 'lat',
 'lat_uv',
 'lon',
 'lon_uv',
 'platform',
 'pressure',
 'profile_id',
 'profile_lat',
 'profile_lon',
 'profile_time',
 'salinity',
 'temperature',
 'time',
 'time_uv',
 'trajectory',
 'u',
 'v']

In [7]:
netcdf_files = create_netcdf(
    attrs=ioos_attrs,
    data=standard,
    output_path=netcdf_output,
    mode=mode
)
import netCDF4 as nc4
with nc4.Dataset(netcdf_files[0]) as first_profile:
    pprint(list(first_profile.variables.keys()))

Excluded from output (absent from JSON config):
  * m_roll
  * m_altitude
  * c_wpt_lat
  * m_pitch
  * sci_bbfl2s_bb_scaled
  * m_gps_lon
  * m_mission_avg_speed_climbing
  * sci_bbfl2s_cdom_scaled
  * m_present_time
  * m_battery
  * m_leakdetect_voltage
  * drv_m_gps_lon
  * m_vacuum
  * m_water_depth
  * m_battpos
  * sci_oxy3835_oxygen
  * m_depth
  * sci_bbfl2s_chlor_scaled
  * sci_water_pressure
  * m_lat
  * m_vehicle_temp
  * m_lon
  * m_mission_avg_speed_diving
  * m_ballast_pumped
  * m_avg_speed
  * m_depth_rate
  * m_gps_lat
  * sci_m_present_time
  * m_heading
  * drv_m_gps_lat
  * c_heading
Created: /tmp/tmpxalj0k8v/bass_20160909T165225Z_1473439945_rt.nc
Created: /tmp/tmpxalj0k8v/bass_20160909T165714Z_1473440234_rt.nc
Created: /tmp/tmpxalj0k8v/bass_20160909T170015Z_1473440415_rt.nc
Created: /tmp/tmpxalj0k8v/bass_20160909T170453Z_1473440693_rt.nc
Created: /tmp/tmpxalj0k8v/bass_20160909T170754Z_1473440874_rt.nc
Created: /tmp/tmpxalj0k8v/bass_20160909T171123Z_1473441083_rt.

['trajectory',
 'crs',
 'time',
 'depth',
 'lat',
 'lon',
 'conductivity',
 'temperature',
 'pressure',
 'salinity',
 'density',
 'lat_uv',
 'lon_uv',
 'platform',
 'profile_id',
 'profile_lat',
 'profile_lon',
 'profile_time',
 'time_uv',
 'u',
 'v']


## Create netCDF without subsetting any variables

By default only variables that have metadata defined for them are exported


In [8]:
netcdf_files = create_netcdf(
    attrs=default_attrs,
    data=standard,
    output_path=netcdf_output,
    subset=False,
    mode=mode
)
import netCDF4 as nc4
with nc4.Dataset(netcdf_files[0]) as first_profile:
    pprint(list(first_profile.variables.keys()))

Created: /tmp/tmpxalj0k8v/bass_20160909T165225Z_1473439945_rt.nc
Created: /tmp/tmpxalj0k8v/bass_20160909T165714Z_1473440234_rt.nc
Created: /tmp/tmpxalj0k8v/bass_20160909T170015Z_1473440415_rt.nc
Created: /tmp/tmpxalj0k8v/bass_20160909T170453Z_1473440693_rt.nc
Created: /tmp/tmpxalj0k8v/bass_20160909T170754Z_1473440874_rt.nc
Created: /tmp/tmpxalj0k8v/bass_20160909T171123Z_1473441083_rt.nc


['trajectory',
 'crs',
 'time',
 'depth',
 'lat',
 'lon',
 'c_heading',
 'c_wpt_lat',
 'm_altitude',
 'm_avg_speed',
 'm_ballast_pumped',
 'm_battery',
 'm_battpos',
 'm_depth',
 'm_depth_rate',
 'm_gps_lat',
 'm_gps_lon',
 'm_heading',
 'm_lat',
 'm_leakdetect_voltage',
 'm_lon',
 'm_mission_avg_speed_climbing',
 'm_mission_avg_speed_diving',
 'm_pitch',
 'm_present_time',
 'm_roll',
 'm_vacuum',
 'm_vehicle_temp',
 'm_water_depth',
 'sci_bbfl2s_bb_scaled',
 'sci_bbfl2s_cdom_scaled',
 'sci_bbfl2s_chlor_scaled',
 'sci_m_present_time',
 'sci_oxy3835_oxygen',
 'conductivity',
 'sci_water_pressure',
 'temperature',
 'drv_m_gps_lat',
 'drv_m_gps_lon',
 'pressure',
 'salinity',
 'density',
 'lat_uv',
 'lon_uv',
 'platform',
 'profile_id',
 'profile_lat',
 'profile_lon',
 'profile_time',
 'time_uv',
 'u',
 'v']


## Produced netCDF files have metadata automatically calculated

In [9]:
with nc4.Dataset(netcdf_files[0]) as first_profile:
    print(first_profile)

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    featureType: trajectory
    Conventions: CF-1.6, Unidata Dataset Discovery v1.0
    date_created: 2017-11-06T15:43:59Z
    cdm_data_type: trajectory
    format_version: IOOS_Glider_NetCDF_v3.0-noqartod.nc
    keywords: AUVS > Autonomous Underwater Vehicles, Oceans > Ocean Pressure > Water Pressure, Oceans > Ocean Temperature > Water Temperature, Oceans > Salinity/Density > Conductivity, Oceans > Salinity/Density > Density, Oceans > Salinity/Density > Salinity
    keywords_vocabulary: GCMD Science Keywords
    license: This data may be redistributed and used without restriction.  Data provided as is with no expressed or implied assurance of quality assurance or quality control
    Metadata_Conventions: CF-1.6, Unidata Dataset Discovery v1.0
    platform_type: Slocum Glider
    processing_level: Dataset taken from glider native file format and is provided as is with no expressed or implied assurance

## Built-in compliance checker

In [20]:
from types import SimpleNamespace
from gutils.nc import check_dataset

first = SimpleNamespace(file=netcdf_files[0])
assert check_dataset(first) == 0

In [23]:
with nc4.Dataset(netcdf_files[-1], 'a') as last_profile:
    last_profile.id = ""

last = SimpleNamespace(file=netcdf_files[-1])
check_dataset(last)

OSError: NetCDF: HDF error