# Combine all netCDF files from the GDP hourly dataset into a data set

In [50]:
import netCDF4 as nc
import xarray as xr
import numpy as np
import sys
import matplotlib.pyplot as plt
from pathlib import Path
import dask
import dask.bag as db
from datetime import datetime
import os

# figure out where this warning comes from
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [51]:
path_gdp = '/Users/pmiron/OneDrive - Florida State University/projects/clouddrift/data/raw/netcdf' # v2.00
files = sorted(list(Path(path_gdp).rglob('*.nc')))

# Update the dataset

In [52]:
# ~6h to sync the first time (!)
# better first manually download and use this only to update
#!./sync_gdp_hourly.sh

# Calculate the number of observations

In [53]:
def number_of_observations(file):
    """
    Load a file and get the size of the observations.
    """
    df = xr.open_dataset(file, decode_times=False)
    return df.sizes['obs']

In [54]:
file_traj_sz = 'process/rowsize.csv'
if not os.path.isfile(file_traj_sz) or (os.path.getmtime(path_gdp) > os.path.getmtime(file_traj_sz)): 
    # files were modified
    print('Updating the rowsize variable.')
    rowsize = []
    for file in files:
        rowsize_i = dask.delayed(number_of_observations)(file)
        rowsize.append(rowsize_i)  
    rowsize = dask.compute(*rowsize)
    
    # save to file
    rowsize = np.array(rowsize)
    np.savetxt(file_traj_sz, rowsize, fmt='%d')
else: 
    # load if exists already and up to date
    print('Already up-to-date.')
    rowsize = np.loadtxt(file_traj_sz, dtype='int')

Updating the rowsize variable.


# Create metadata structures

In [56]:
nb_traj = len(rowsize)

# values define per trajectory
id = np.zeros(nb_traj, dtype='uint32')
imei = np.zeros(nb_traj, dtype='uint64')
location_type = np.zeros(nb_traj, dtype='bool') # 0 Argos, 1 GPS
wmo = np.zeros(nb_traj, dtype='uint32')
expno = np.zeros(nb_traj, dtype='uint32')
deploy_date = np.zeros(nb_traj, dtype='datetime64[ns]')
deploy_lat = np.zeros(nb_traj, dtype='float32')
deploy_lon = np.zeros(nb_traj, dtype='float32')
end_date = np.zeros(nb_traj, dtype='datetime64[ns]')
end_lon = np.zeros(nb_traj, dtype='float32')
end_lat = np.zeros(nb_traj, dtype='float32')
drogue_lost_date = np.zeros(nb_traj, dtype='datetime64[ns]')
type_death = np.zeros(nb_traj, dtype='uint8')
type_buoy = np.chararray(nb_traj, itemsize=15)
deployment_ship = np.chararray(nb_traj, itemsize=15)
deployment_status = np.chararray(nb_traj, itemsize=15)
buoy_type_manufacturer = np.chararray(nb_traj, itemsize=15)
buoy_type_sensor_array = np.chararray(nb_traj, itemsize=15)
current_program = np.zeros(nb_traj, dtype='uint32')
purchaser_funding = np.chararray(nb_traj, itemsize=15)
sensor_upgrade = np.chararray(nb_traj, itemsize=15)
transmissions = np.chararray(nb_traj, itemsize=15)
deploying_country = np.chararray(nb_traj, itemsize=15)
deployment_comments = np.chararray(nb_traj, itemsize=15, unicode=True) # some char needs Unicode support
manufacture_year = np.zeros(nb_traj, dtype='uint16')
manufacture_month = np.zeros(nb_traj, dtype='uint16')
manufacture_sensor_type = np.chararray(nb_traj, itemsize=5)
manufacture_voltage = np.zeros(nb_traj, dtype='uint16')
float_diameter = np.zeros(nb_traj, dtype='float32')
subsfc_float_presence = np.zeros(nb_traj, dtype='bool')
drogue_type = np.chararray(nb_traj, itemsize=15)
drogue_length = np.zeros(nb_traj, dtype='float32')
drogue_ballast = np.zeros(nb_traj, dtype='float32')
drag_area_above_drogue = np.zeros(nb_traj, dtype='float32')
drag_area_drogue = np.zeros(nb_traj, dtype='float32')
drag_area_ratio = np.zeros(nb_traj, dtype='float32')
drag_center_depth = np.zeros(nb_traj, dtype='float32')
drogue_detect_sensor = np.chararray(nb_traj, itemsize=15)

# Create the data structures

In [57]:
nb_obs = np.sum(rowsize)

# values define at every observations (timesteps)
longitude = np.zeros(nb_obs, dtype='float32')
latitude = np.zeros(nb_obs, dtype='float32')
time = np.zeros(nb_obs, dtype='datetime64[ns]')
ve = np.zeros(nb_obs, dtype='float32')
vn = np.zeros(nb_obs, dtype='float32')
err_lat = np.zeros(nb_obs, dtype='float32')
err_lon = np.zeros(nb_obs, dtype='float32')
err_ve = np.zeros(nb_obs, dtype='float32')
err_vn = np.zeros(nb_obs, dtype='float32')
gap = np.zeros(nb_obs, dtype='datetime64[ns]')

# sst data set
sst = np.zeros(nb_obs, dtype='float32')
sst1 = np.zeros(nb_obs, dtype='float32')
sst2 = np.zeros(nb_obs, dtype='float32')
err_sst = np.zeros(nb_obs, dtype='float32')
err_sst1 = np.zeros(nb_obs, dtype='float32')
err_sst2 = np.zeros(nb_obs, dtype='float32')
flg_sst = np.zeros(nb_obs, dtype='float32')
flg_sst1 = np.zeros(nb_obs, dtype='float32')
flg_sst2 = np.zeros(nb_obs, dtype='uint32')

In [61]:
def str_to_float(value, default=np.nan):
    try:
        fvalue = float(value)
        if np.isnan(fvalue):
            return default
        else:
            return fvalue
    except ValueError:
        return default
    
def cut_str(value, max_length):
    return value[:max_length]

def location(value: str):
    """
    Convert string value "Argos" and "GPS" string to bool [0,1]
    :param value:
    :return:
    """
    if value=='Argos':
        return 0
    elif value=='GPS':
        return 1
    
def fill_ragged_array(file, tid, oid):
    """
    Fill the ragged array from the xr.Dataset corresponding to one trajectory
    
    Input filename: path and filename of the netCDF file
          tid: trajectory index
          oid: observation index in the ragged array
    """
    ds = xr.open_dataset(file)
    size = ds.dims['obs']
    
    # scalar
    id[tid] = ds.ID.data[0]
    imei[tid] = 0 if ds.attrs['imei']=='' else cut_str(ds.attrs['imei'], 15)
    location_type[tid] = location(ds.location_type) # 0 Argos, 1 GPS
    wmo[tid] = ds.WMO.data[0]
    expno[tid] = ds.expno.data[0]
    deploy_date[tid] = ds.deploy_date.data[0]
    deploy_lon[tid] = ds.deploy_lon.data[0]
    deploy_lat[tid] = ds.deploy_lat.data[0]
    end_date[tid] = ds.end_date.data[0]
    end_lon[tid] = ds.end_lon.data[0]
    end_lat[tid] = ds.end_lat.data[0]
    drogue_lost_date[tid] = ds.drogue_lost_date.data[0]
    type_death[tid] = ds.typedeath.data[0]
    type_buoy[tid] = ds.typebuoy.data[0]
    
    # vectors
    longitude[oid:oid+size] = ds.longitude.data[0]
    latitude[oid:oid+size] = ds.latitude.data[0]
    time[oid:oid+size] = ds.time.data[0]
    ve[oid:oid+size] = ds.ve.data[0]
    vn[oid:oid+size] = ds.vn.data[0]
    err_lat[oid:oid+size] = ds.err_lat.data[0]
    err_lon[oid:oid+size] = ds.err_lon.data[0]
    err_ve[oid:oid+size] = ds.err_ve.data[0]
    err_vn[oid:oid+size] = ds.err_vn.data[0]
    gap[oid:oid+size] = ds.gap.data[0]
    sst[oid:oid+size] = ds.sst.data[0]
    sst1[oid:oid+size] = ds.sst1.data[0]
    sst2[oid:oid+size] = ds.sst2.data[0]
    err_sst[oid:oid+size] = ds.err_sst.data[0]
    err_sst1[oid:oid+size] = ds.err_sst1.data[0]
    err_sst2[oid:oid+size] = ds.err_sst2.data[0]
    flg_sst[oid:oid+size] = ds.flg_sst.data[0]
    flg_sst1[oid:oid+size] = ds.flg_sst1.data[0]
    flg_sst2[oid:oid+size] = ds.flg_sst2.data[0]
    
    # those values were store in the attributes
    deployment_ship[tid] = cut_str(ds.attrs['DeployingShip'], 15)
    deployment_status[tid] = cut_str(ds.attrs['DeploymentStatus'], 15)
    buoy_type_manufacturer[tid] = cut_str(ds.attrs['BuoyTypeManufacturer'], 15)
    buoy_type_sensor_array[tid] = cut_str(ds.attrs['BuoyTypeSensorArray'], 15)
    current_program[tid] = str_to_float(ds.attrs['CurrentProgram'])
    purchaser_funding[tid] = cut_str(ds.attrs['PurchaserFunding'], 15)
    sensor_upgrade[tid] = cut_str(ds.attrs['SensorUpgrade'], 15)
    transmissions[tid] = cut_str(ds.attrs['Transmissions'], 15)
    deploying_country[tid] = cut_str(ds.attrs['DeployingCountry'], 15)
    deployment_comments[tid] = cut_str(ds.attrs['DeploymentComments'], 15)
    manufacture_year[tid] = str_to_float(ds.attrs['ManufactureYear'], -1)
    manufacture_month[tid] = str_to_float(ds.attrs['ManufactureMonth'], -1)
    manufacture_sensor_type[tid] = cut_str(ds.attrs['ManufactureSensorType'], 5)
    manufacture_voltage[tid] = str_to_float(ds.attrs['ManufactureVoltage'][:-6], -1) # X volts
    float_diameter[tid] = str_to_float(ds.attrs['FloatDiameter'][:-3]) # X cm
    subsfc_float_presence[tid] = str_to_float(ds.attrs['SubsfcFloatPresence'])
    drogue_type[tid] = cut_str(ds.attrs['DrogueType'], 7)
    drogue_length[tid] = str_to_float(ds.attrs['DrogueLength'][:-2]) # X m
    drogue_ballast[tid] = str_to_float(ds.attrs['DrogueBallast'][:-3]) # X kg
    drag_area_above_drogue[tid] = str_to_float(ds.attrs['DragAreaAboveDrogue'][:-4]) # X m^2
    drag_area_drogue[tid] = str_to_float(ds.attrs['DragAreaOfDrogue'][:-4]) # X m^2
    drag_area_ratio[tid] = str_to_float(ds.attrs['DragAreaRatio'])
    drag_center_depth[tid] = str_to_float(ds.attrs['DrogueCenterDepth'][:-2]) # X m
    drogue_detect_sensor[tid] = cut_str(ds.attrs['DrogueDetectSensor'], 15)

In [62]:
# create the index for each trajectory in the continuous ragged array representation
index_traj = np.cumsum(rowsize)
index_traj = np.insert(index_traj, 0, 0)

# create a repeated ids[obs] 
ids = np.repeat(id, rowsize) # might be removed if grouping performed more efficiently

# Previous files with date issues:
- /Users/pmiron/OneDrive - Florida State University/projects/clouddrift/data/raw/gps/drifter_12294040.nc
- /Users/pmiron/OneDrive - Florida State University/projects/clouddrift/data/raw/gps/drifter_12469250.nc
- /Users/pmiron/OneDrive - Florida State University/projects/clouddrift/data/raw/gps/drifter_60725400.nc


In [None]:
%%time

list_problem_dates = []
for i, filename in enumerate(files):
    print('%d/%d' % (i, len(files)-1), end='\r')
    fill_ragged_array(filename, i, index_traj[i])

# Create the xarray Datasets

## Metadata

In [44]:
ds_traj = xr.Dataset(
    data_vars=dict(
        rowsize=(['traj'], rowsize, {"long_name": "Number of observations per trajectory", "units":""}), # don't really need anymore
        imei=(['traj'], imei, {"long_name": "International Mobile Equipment Identity", "units":""}),
        location_type=(['traj'], location_type, {"long_name": "Argos (0), GPS (1)", "units":""}),
        WMO=(['traj'], wmo, {"long_name": "World Meteorological Organization buoy identification number", "units":""}),
        expno=(['traj'], expno, {"long_name": "Experiment number", "units":""}),
        deploy_date=(['traj'], deploy_date, {"long_name": "Deployment date and time"}),#, "units":"seconds since 1970-01-01 00:00:00 UTC"}),
        deploy_lon=(['traj'], deploy_lon, {"long_name": "Deployment longitude", "units":"degrees_east"}),
        deploy_lat=(['traj'], deploy_lat, {"long_name": "Deployment latitude", "units":"degrees_north"}),
        end_date=(['traj'], end_date, {"long_name": "End date and time"}),#, "units":"seconds since 1970-01-01 00:00:00 UTC"}),
        end_lat=(['traj'], end_lat, {"long_name": "End latitude", "units":"degrees_north"}),
        end_lon=(['traj'], end_lon, {"long_name": "End longitude", "units":"degrees_east"}),
        drogue_lost_date=(['traj'], drogue_lost_date, {"long_name": "Date and time of drogue loss"}),#, "units":"seconds since 1970-01-01 00:00:00 UTC"}),
        type_death=(['traj'], type_death, {"long_name": "Type of death (0=buoy still alive, 1=buoy ran aground, 2=picked up by vessel, 3=stop transmitting, 4=sporadic transmissions, 5=bad batteries, 6=inactive status)", "units":""}),
        type_buoy=(['traj'], type_buoy, {"long_name": "Buoy type (see https://www.aoml.noaa.gov/phod/dac/dirall.html)", "units":""}), 
        DeploymentShip=(['traj'], deployment_ship, {"long_name": "Name of deployment ship", "units":""}), 
        DeploymentStatus=(['traj'], deployment_status, {"long_name": "Deployment status", "units":""}), 
        BuoyTypeManufacturer=(['traj'], buoy_type_manufacturer, {"long_name": "Buoy type manufacturer", "units":""}),
        BuoyTypeSensorArray=(['traj'], buoy_type_sensor_array, {"long_name": "Buoy type sensor array", "units":""}), 
        CurrentProgram=(['traj'], current_program, {"long_name": "Current Program", "units":""}),
        PurchaserFunding=(['traj'], purchaser_funding, {"long_name": "Purchaser funding", "units":""}), 
        SensorUpgrade=(['traj'], sensor_upgrade, {"long_name": "Sensor upgrade", "units":""}),
        Transmissions=(['traj'], transmissions, {"long_name": "Transmissions", "units":""}), 
        DeployingCountry=(['traj'], deploying_country, {"long_name": "Deploying country", "units":""}),
        DeploymentComments=(['traj'], deployment_comments, {"long_name": "Deployment comments", "units":""}), 
        ManufactureYear=(['traj'], manufacture_year, {"long_name": "Manufacture year", "units":""}), 
        ManufactureMonth=(['traj'], manufacture_month, {"long_name": "Manufacture month", "units":""}), 
        ManufactureSensorType=(['traj'], manufacture_sensor_type, {"long_name": "Manufacture voltage", "units":""}), 
        ManufactureVoltage=(['traj'], manufacture_voltage, {"long_name": "Manufacture voltage", "units":""}), 
        FloatDiameter=(['traj'], float_diameter, {"long_name": "Diameter of surface floater", "units":"cm"}),
        SubsfcFloatPresence=(['traj'], subsfc_float_presence, {"long_name": "Subsurface Float Presence", "units":""}),
        DrogueType=(['traj'], type_buoy, {"drogue_type": "Drogue Type", "units":""}),
        DrogueLength=(['traj'], drogue_length, {"long_name": "Length of drogue.", "units":"m"}),
        DrogueBallast=(['traj'], drogue_ballast, {"long_name": "Weight of the drogue's ballast.", "units":"kg"}),
        DragAreaAboveDrogue=(['traj'], drag_area_above_drogue, {"long_name": "Drag area above drogue.", "units":"m^2"}),
        DragAreaOfDrogue=(['traj'], drag_area_drogue, {"long_name": "Drag area drogue.", "units":"m^2"}),
        DragAreaRatio=(['traj'], drag_area_ratio, {"long_name": "Drag area ratio", "units":"m"}),
        DrogueCenterDepth=(['traj'], drag_center_depth, {"long_name": "Average depth of the drogue.", "units":"m"}),
        DrogueDetectSensor=(['traj'], drogue_detect_sensor, {"long_name": "Drogue detection sensor", "units":""}), 
    ),
    
    coords=dict(
        ID=(['traj'], id, {"long_name": "Global Drifter Program Buoy ID", "units":""}), 
    ),

    attrs={
        'title': 'Global Drifter Program hourly drifting buoy collection',
        'id': 'Global Drifter Program ID 13555',
        'history': 'Version 1.04.  Metadata from dirall.dat and deplog.dat',
        'Conventions': 'CF-1.6',
        'date_created': datetime.now().isoformat(),
        'publisher_name': 'GDP Drifter DAC',
        'publisher_email': 'aoml.dftr@noaa.gov',
        'publisher_url': 'https://www.aoml.noaa.gov/phod/gdp',
        'licence': 'MIT License',
        'processing_level': 'Level 2 QC by GDP drifter DAC',
        'metadata_link': 'https://www.aoml.noaa.gov/phod/dac/dirall.html',
        'contributor_name': 'NOAA Global Drifter Program',
        'contributor_role': 'Data Acquisition Center',
        'institution': 'NOAA Atlantic Oceanographic and Meteorological Laboratory',
        'acknowledgement': 'Elipot et al. (2016). Global Drifter Program quality-controlled hourly interpolated data from ocean surface drifting buoys, version 1.04. NOAA National Centers for Environmental Information. https://agupubs.onlinelibrary.wiley.com/doi/full/10.1002/2016JC011716TBA. Accessed [date].',
        'doi': '10.1002/2016JC011716TBA',
        'summary': 'Global Drifter Program hourly data'
    }
)

ds_traj.deploy_date.encoding['units'] = "seconds since 1970-01-01 00:00:00"
ds_traj.end_date.encoding['units'] = "seconds since 1970-01-01 00:00:00"
ds_traj.drogue_lost_date.encoding['units'] = "seconds since 1970-01-01 00:00:00"

## Data

In [46]:
ds = xr.Dataset(
    data_vars=dict(
        ve=(['obs'], ve, {"long_name": "Eastward velocity", "units":"m/s"}),
        vn=(['obs'], vn, {"long_name": "Northward velocity", "units":"m/s"}),
        err_lat=(['obs'], err_lat, {"long_name": "Standard error in latitude", "units":"degrees_north"}),
        err_lon=(['obs'], err_lon, {"long_name": "Standard error in longitude", "units":"degrees_east"}),
        err_ve=(['obs'], err_ve, {"long_name": "Standard error in eastward velocity", "units":"m/s"}),
        err_vn=(['obs'], err_vn, {"long_name": "Standard error in northward velocity", "units":"m/s"}),
        
        # sst
        sst=(['obs'], err_vn, {"long_name": "", "units":"m/s"}),
        sst1=(['obs'], err_vn, {"long_name": "", "units":"m/s"}),
        sst2=(['obs'], err_vn, {"long_name": "", "units":"m/s"}),
        err_sst=(['obs'], err_vn, {"long_name": "", "units":"m/s"}),
        err_sst1=(['obs'], err_vn, {"long_name": "", "units":"m/s"}),
        err_sst2=(['obs'], err_vn, {"long_name": "", "units":"m/s"}),
        flg_sst=(['obs'], err_vn, {"long_name": "", "units":"m/s"}),
        flg_sst1=(['obs'], err_vn, {"long_name": "", "units":"m/s"}),
        flg_sst2=(['obs'], err_vn, {"long_name": "", "units":"m/s"}),
        
        # convert gap to nanoseconds from nanoseconds from 1970-01-01
        gap=(['obs'], (gap-np.datetime64('1970-01-01')), {"long_name": "time interval between previous and next location"}),
    ),

    coords=dict(
        longitude=(['obs'], longitude, {"long_name": "Longitude", "units":"degrees_east"}),
        latitude=(['obs'], latitude, {"long_name": "Latitude", "units":"degrees_north"}),
        time=(['obs'], time, {"long_name": "Time"}),#, "units":"seconds since 1970-01-01 00:00:00 UTC"}),
        ids=(['obs'], ids, {"long_name": "Trajectory index of vars['traj'] for all observations", "units":""}),
    ),

    attrs={
        'title': 'Global Drifter Program hourly drifting buoy collection',
        'id': 'Global Drifter Program ID 13555',
        'history': 'Version 1.04.  Metadata from dirall.dat and deplog.dat',
        'Conventions': 'CF-1.6',
        'date_created': datetime.now().isoformat(),
        'publisher_name': 'GDP Drifter DAC',
        'publisher_email': 'aoml.dftr@noaa.gov',
        'publisher_url': 'https://www.aoml.noaa.gov/phod/gdp',
        'licence': 'MIT License',
        'processing_level': 'Level 2 QC by GDP drifter DAC',
        'metadata_link': 'https://www.aoml.noaa.gov/phod/dac/dirall.html',
        'contributor_name': 'NOAA Global Drifter Program',
        'contributor_role': 'Data Acquisition Center',
        'institution': 'NOAA Atlantic Oceanographic and Meteorological Laboratory',
        'acknowledgement': 'Elipot et al. (2016). Global Drifter Program quality-controlled hourly interpolated data from ocean surface drifting buoys, version 1.04. NOAA National Centers for Environmental Information. https://agupubs.onlinelibrary.wiley.com/doi/full/10.1002/2016JC011716TBA. Accessed [date].',
        'doi': '10.1002/2016JC011716TBA',
        'summary': 'Global Drifter Program hourly data'
    }
)

ds.gap.encoding['units'] = "nanoseconds"
ds.time.encoding['units'] = "seconds since 1970-01-01 00:00:00"

# Combined Metadata and Data

In [None]:
ds_traj = xr.Dataset(
    data_vars=dict(
        rowsize=(['traj'], rowsize, {"long_name": "Number of observations per trajectory", "units":""}), # don't really need anymore
        imei=(['traj'], imei, {"long_name": "International Mobile Equipment Identity", "units":""}),
        location_type=(['traj'], location_type, {"long_name": "Argos (0), GPS (1)", "units":""}),
        WMO=(['traj'], wmo, {"long_name": "World Meteorological Organization buoy identification number", "units":""}),
        expno=(['traj'], expno, {"long_name": "Experiment number", "units":""}),
        deploy_date=(['traj'], deploy_date, {"long_name": "Deployment date and time"}),#, "units":"seconds since 1970-01-01 00:00:00 UTC"}),
        deploy_lon=(['traj'], deploy_lon, {"long_name": "Deployment longitude", "units":"degrees_east"}),
        deploy_lat=(['traj'], deploy_lat, {"long_name": "Deployment latitude", "units":"degrees_north"}),
        end_date=(['traj'], end_date, {"long_name": "End date and time"}),#, "units":"seconds since 1970-01-01 00:00:00 UTC"}),
        end_lat=(['traj'], end_lat, {"long_name": "End latitude", "units":"degrees_north"}),
        end_lon=(['traj'], end_lon, {"long_name": "End longitude", "units":"degrees_east"}),
        drogue_lost_date=(['traj'], drogue_lost_date, {"long_name": "Date and time of drogue loss"}),#, "units":"seconds since 1970-01-01 00:00:00 UTC"}),
        type_death=(['traj'], type_death, {"long_name": "Type of death (0=buoy still alive, 1=buoy ran aground, 2=picked up by vessel, 3=stop transmitting, 4=sporadic transmissions, 5=bad batteries, 6=inactive status)", "units":""}),
        type_buoy=(['traj'], type_buoy, {"long_name": "Buoy type (see https://www.aoml.noaa.gov/phod/dac/dirall.html)", "units":""}), 
        DeploymentShip=(['traj'], deployment_ship, {"long_name": "Name of deployment ship", "units":""}), 
        DeploymentStatus=(['traj'], deployment_status, {"long_name": "Deployment status", "units":""}), 
        BuoyTypeManufacturer=(['traj'], buoy_type_manufacturer, {"long_name": "Buoy type manufacturer", "units":""}),
        BuoyTypeSensorArray=(['traj'], buoy_type_sensor_array, {"long_name": "Buoy type sensor array", "units":""}), 
        CurrentProgram=(['traj'], current_program, {"long_name": "Current Program", "units":""}),
        PurchaserFunding=(['traj'], purchaser_funding, {"long_name": "Purchaser funding", "units":""}), 
        SensorUpgrade=(['traj'], sensor_upgrade, {"long_name": "Sensor upgrade", "units":""}),
        Transmissions=(['traj'], transmissions, {"long_name": "Transmissions", "units":""}), 
        DeployingCountry=(['traj'], deploying_country, {"long_name": "Deploying country", "units":""}),
        DeploymentComments=(['traj'], deployment_comments, {"long_name": "Deployment comments", "units":""}), 
        ManufactureYear=(['traj'], manufacture_year, {"long_name": "Manufacture year", "units":""}), 
        ManufactureMonth=(['traj'], manufacture_month, {"long_name": "Manufacture month", "units":""}), 
        ManufactureSensorType=(['traj'], manufacture_sensor_type, {"long_name": "Manufacture voltage", "units":""}), 
        ManufactureVoltage=(['traj'], manufacture_voltage, {"long_name": "Manufacture voltage", "units":""}), 
        FloatDiameter=(['traj'], float_diameter, {"long_name": "Diameter of surface floater", "units":"cm"}),
        SubsfcFloatPresence=(['traj'], subsfc_float_presence, {"long_name": "Subsurface Float Presence", "units":""}),
        DrogueType=(['traj'], type_buoy, {"drogue_type": "Drogue Type", "units":""}),
        DrogueLength=(['traj'], drogue_length, {"long_name": "Length of drogue.", "units":"m"}),
        DrogueBallast=(['traj'], drogue_ballast, {"long_name": "Weight of the drogue's ballast.", "units":"kg"}),
        DragAreaAboveDrogue=(['traj'], drag_area_above_drogue, {"long_name": "Drag area above drogue.", "units":"m^2"}),
        DragAreaOfDrogue=(['traj'], drag_area_drogue, {"long_name": "Drag area drogue.", "units":"m^2"}),
        DragAreaRatio=(['traj'], drag_area_ratio, {"long_name": "Drag area ratio", "units":"m"}),
        DrogueCenterDepth=(['traj'], drag_center_depth, {"long_name": "Average depth of the drogue.", "units":"m"}),
        DrogueDetectSensor=(['traj'], drogue_detect_sensor, {"long_name": "Drogue detection sensor", "units":""}), 
    
        ve=(['obs'], ve, {"long_name": "Eastward velocity", "units":"m/s"}),
        vn=(['obs'], vn, {"long_name": "Northward velocity", "units":"m/s"}),
        err_lat=(['obs'], err_lat, {"long_name": "Standard error in latitude", "units":"degrees_north"}),
        err_lon=(['obs'], err_lon, {"long_name": "Standard error in longitude", "units":"degrees_east"}),
        err_ve=(['obs'], err_ve, {"long_name": "Standard error in eastward velocity", "units":"m/s"}),
        err_vn=(['obs'], err_vn, {"long_name": "Standard error in northward velocity", "units":"m/s"}),
        
        # convert gap to nanoseconds from nanoseconds from 1970-01-01
        gap=(['obs'], (gap-np.datetime64('1970-01-01')), {"long_name": "time interval between previous and next location"}),
    ),

    coords=dict(
        ID=(['traj'], id, {"long_name": "Global Drifter Program Buoy ID", "units":""}),
        longitude=(['obs'], longitude, {"long_name": "Longitude", "units":"degrees_east"}),
        latitude=(['obs'], latitude, {"long_name": "Latitude", "units":"degrees_north"}),
        time=(['obs'], time, {"long_name": "Time"}),#, "units":"seconds since 1970-01-01 00:00:00 UTC"}),
        ids=(['obs'], ids, {"long_name": "Trajectory index of vars['traj'] for all observations", "units":""}),
    ),

    attrs={
        'title': 'Global Drifter Program hourly drifting buoy collection',
        'id': 'Global Drifter Program ID 13555',
        'history': 'Version 1.04.  Metadata from dirall.dat and deplog.dat',
        'Conventions': 'CF-1.6',
        'date_created': datetime.now().isoformat(),
        'publisher_name': 'GDP Drifter DAC',
        'publisher_email': 'aoml.dftr@noaa.gov',
        'publisher_url': 'https://www.aoml.noaa.gov/phod/gdp',
        'licence': 'MIT License',
        'processing_level': 'Level 2 QC by GDP drifter DAC',
        'metadata_link': 'https://www.aoml.noaa.gov/phod/dac/dirall.html',
        'contributor_name': 'NOAA Global Drifter Program',
        'contributor_role': 'Data Acquisition Center',
        'institution': 'NOAA Atlantic Oceanographic and Meteorological Laboratory',
        'acknowledgement': 'Elipot et al. (2016). Global Drifter Program quality-controlled hourly interpolated data from ocean surface drifting buoys, version 1.04. NOAA National Centers for Environmental Information. https://agupubs.onlinelibrary.wiley.com/doi/full/10.1002/2016JC011716TBA. Accessed [date].',
        'doi': '10.1002/2016JC011716TBA',
        'summary': 'Global Drifter Program hourly data'
    }
)

ds.deploy_date.encoding['units'] = "seconds since 1970-01-01 00:00:00"
ds.end_date.encoding['units'] = "seconds since 1970-01-01 00:00:00"
ds.drogue_lost_date.encoding['units'] = "seconds since 1970-01-01 00:00:00"
ds.gap.encoding['units'] = "nanoseconds"
ds.time.encoding['units'] = "seconds since 1970-01-01 00:00:00"

In [48]:
# save to one giant netcdf
ds.to_netcdf('process/gdp_v2.00.nc')

# split metadata/data files
ds_traj.to_netcdf('process/gdp_v2.00_traj.nc')
ds_obs.to_netcdf('process/gdp_v2.00_obs.nc')

# and to zarr
#ds_meta.to_zarr('../data/process/gdp_1.04c_meta', consolidated=True)
#ds.to_zarr('../data/process/gdp_1.04c', consolidated=True)