In [None]:
import os
import sys
sys.path.append(r'C:\Users\dpoppema\Documents\GitHub\HybridDune\Ruben\ADV')
import pandas as pd
import numpy as np
from vector import Vector
from datetime import datetime

In [2]:
# Define data

# Data loctions and filenames --------------------------------------------------------------------------------- 
# location of raw data
dataFolder_all = [ r'O:\HybridDune experiment\data ADV, OBS\ADV RWS1\Deployment 1, until 23dec',
                   r'O:\HybridDune experiment\data ADV, OBS\ADV RWS2\Deployment 1, until 23dec',
                   r'O:\HybridDune experiment\data ADV, OBS\ADV RWS3\S3, Deployment1, until dec23',
                   r'O:\HybridDune experiment\data ADV, OBS\ADV RWS4\Deployment until 23dec',
                   r'O:\HybridDune experiment\data ADV, OBS\ADV TUD10\Deployment until 23dec' ]

# name of the instantiated vector class. Will be used for saving file, and ...
name_all = [ 'S1ADV1 raw data - period 1',
             'S2ADV1 raw data - period 1',
             'S3ADV1 raw data - period 1',
             'S4ADV1 raw data - period 1',
             'S3ADV2 raw data - period 1'  ]

# start time over which to read data (must be larger than first recorded time)
tstart = '2024-12-13 09:00:00'  # Start date of all ADVs 

# stop time over which to read data (must be smaller than last recorded time)
tstop_all = [ '2024-12-23 19:00:00', # last full hour of ADV
              '2024-12-26 04:00:00', # last full hour of ADV (NB: some incomplete hours available after this time. empty battery?)
              '2024-12-27 16:00:00', # last full hour of ADV  
              '2024-12-27 17:00:00', # last availabe air pressure data (last full hour of ADV was 30-Dec 10:00, some incomplete hours afterwards)
              '2024-12-23 04:00:00'] # last full hour of ADV (NB: some incomplete hours available after this time. empty battery?)            
t_installed_all = ['2024-12-17 09:00:00', # datetime that ADV was installed at location at the beach (time before this can be used to calibrate pressure sensors, but is otherwise useless)
                   '2024-12-17 09:00:00',
                   '2024-12-16 15:00:00',
                   '2024-12-17 09:00:00',
                   '2024-12-17 10:00:00'] 
t_removed_all = [ '2024-12-23 10:00:00',
                  '2024-12-26 11:00:00',
                  '2024-12-27 11:00:00',
                  '2024-12-27 11:00:00',
                  '2024-12-23 04:00:00']

#filename_out = 'ADV_RWS4_Deployment1.nc'
filename_out_all = [ 'ADV S1ADV1 raw data - period 1.nc',
                     'ADV S2ADV1 raw data - period 1.nc',
                     'ADV S3ADV1 raw data - period 1.nc',
                     'ADV S4ADV1 raw data - period 1.nc',
                     'ADV S3ADV2 raw data - period 1.nc'  ]

# make map 'raw_netcdf' in dataFolder and set this as output directory
ncOutDir = r'O:\HybridDune experiment\data ADV, OBS\raw NetCDF'
if not os.path.exists(ncOutDir):
    os.makedirs(ncOutDir)

# metadata: position/elevation sensor and bed ------------------------------------------------------------------------
# x, y location in RD coordinates [m]
xRD_all = [  72471.921,  72455.816,  72439.635,  72425.352,  72404.520] # S1 AdV 1-S4ADV1, S3ADV2 
yRD_all = [ 452143.646, 452121.479, 452099.868, 452080.129, 452125.233]

# bed level
zb_i1 = np.array([          0.700,               0.633,                                    0.488,               0.488,              0.468             ]) # S1ADV1  [m NAP]
zb_i2 = np.array([          0.673,               0.628,                                    0.551,               0.521,              0.451             ]) # S2ADV1
zb_i3 = np.array([          0.665,               0.620,                                    0.675,               0.505,              0.437             ]) # S3ADV1
zb_i4 = np.array([          0.671,               0.611,                                    0.631,               0.456,              0.376             ]) # S4ADV1
zb_i5 = np.array([         -0.989,                                   -0.614,              -0.776,                                   -0.568            ]) # S3ADV2

t_zb_i1 = pd.to_datetime([ '2024-12-17 11:30',  '2024-12-19 12:00',                       '2024-12-21 14:00',  '2024-12-22 15:30',  '2024-12-23 12:00']) 
t_zb_i2 = pd.to_datetime([ '2024-12-17 11:30',  '2024-12-19 12:00',                       '2024-12-21 14:00',  '2024-12-22 15:30',  '2024-12-23 12:00']) 
t_zb_i3 = pd.to_datetime([ '2024-12-17 11:30',  '2024-12-19 12:00',                       '2024-12-21 14:00',  '2024-12-22 15:30',  '2024-12-23 12:00']) 
t_zb_i4 = pd.to_datetime([ '2024-12-17 11:30',  '2024-12-19 12:00',                       '2024-12-21 14:00',  '2024-12-22 15:30',  '2024-12-23 12:00']) 
t_zb_i5 = pd.to_datetime([ '2024-12-17 11:30',                       '2024-12-20 12:00',  '2024-12-21 14:00',                       '2024-12-23 12:00']) 

# Instrument: elevation, orientation
ti_i1 = pd.to_datetime([  '2024-12-17 11:30', '2024-12-21 14:00'                    ])  # datetime of zi, zi_p, zi_OBS and theta
ti_i2 = pd.to_datetime([  '2024-12-17 11:30', '2024-12-21 14:00'                    ])  
ti_i3 = pd.to_datetime([  '2024-12-17 11:30',                     '2024-12-22 15:30'])  
ti_i4 = pd.to_datetime([  '2024-12-17 11:30', '2024-12-21 14:00', '2024-12-22 15:30'])  
ti_i5 = pd.to_datetime([  '2024-12-17 11:30', '2024-12-21 14:00'])  

zi_i1 = np.array([         0.953,              0.731                                ]) # NAP elevation of ADV measurement volume
zi_i2 = np.array([         0.906,              0.794                                ])
zi_i3 = np.array([         0.908,                                   0.750           ])
zi_i4 = np.array([         0.924,              0.874,               0.699           ])
zi_i5 = np.array([        -0.736,             -0.535                                ]) 

zi_OBShigh_i1 = np.array([ 0.995,              0.778                                ]) # NAP elevation of highest OBS sensor
zi_OBShigh_i2 = np.array([ 0.953,              0.841                                ])
zi_OBShigh_i3 = np.array([ 0.954,                                   0.797           ])
zi_OBShigh_i4 = np.array([ 0.985,              0.921,               0.746           ])
zi_OBShigh_i5 = np.array([-0.704,             -0.488                                ]) 

zi_p_i1      = zi_i1 + 0.157 + 0.22 # NAP elevation of pressure sensor (measurement volume is 15.7 cm below ADV head, pressure sensor 22 cm above head, if sensor vertical)
zi_p_i2      = zi_i2 + 0.157 + 0.22
zi_p_i3      = zi_i3 + 0.157 + 0.22
zi_p_i4      = zi_i4 + 0.157 + 0.22
zi_p_i5      = zi_i5 + 0.157 + 0.22

zi_OBSlow_i1 = zi_OBShigh_i1 - 0.08 # for all sensors 8 cm below high OBS
zi_OBSlow_i2 = zi_OBShigh_i2 - 0.08
zi_OBSlow_i3 = zi_OBShigh_i3 - 0.08
zi_OBSlow_i4 = zi_OBShigh_i4 - 0.08
zi_OBSlow_i5 = zi_OBShigh_i5 - 0.08

# orientation of ADV (marked leg with respect to north, clockwise positive, in degrees)
t_theta_i1 = pd.to_datetime([ '2024-12-17 09:00',  '2024-12-22 18:00'                                           ])  # time that ADV orientation was measured
t_theta_i2 = pd.to_datetime([ '2024-12-17 09:00',  '2024-12-22 23:00',  '2024-12-23 07:00'                      ])
t_theta_i3 = pd.to_datetime([ '2024-12-16 15:00',  '2024-12-22 15:21'                                           ])
t_theta_i4 = pd.to_datetime([ '2024-12-17 09:00',  '2024-12-20 05:00',  '2024-12-21 11:00',  '2024-12-22 15:34' ])
t_theta_i5 = pd.to_datetime([ '2024-12-17 10:00',  '2024-12-21 12:00',  '2024-12-22 11:00'                      ])

theta_i1   = np.array([       304.9,               312.6                                                        ]) # degrees
theta_i2   = np.array([       308.5,               314.5,               338.2                                   ])
theta_i3   = np.array([       312.6,               304.8                                                        ])
theta_i4   = np.array([       310.3,               315.7,               309.8,                306.2             ])
theta_i5   = np.array([       306.3,               303.7,               307.5                                   ])

# convert RD coordinates to local coordinates
xy_RD = np.array([xRD_all, yRD_all]).T
a = np.deg2rad(36)
transformation_matrix = np.array([ [np.cos(a), np.sin(a)],[-np.sin(a), np.cos(a)] ])
xy_loc = ( xy_RD - [71683.584, 452356.055] ) @ transformation_matrix
x_loc_all = xy_loc.T[0]
y_loc_all = xy_loc.T[1]

# Metadata: rest ------------------------------------------------------------------------------------------------------
serial_number_all = ['VEC13638', 'VEC14793', 'VEC14808', 'VEC13625', 'VEC13933']
OBS_type_all      = ['Cambell OBS3+', 'Cambell OBS3+', 'Cambell OBS3+', 'Cambell OBS3+', 'Seapoint STM'] 
offset_all        = [ 102718,     97849,      98378,      99549,      104195   ] # offset to add to pressure sensor data to convert to absolute pressure [Pa]

In [3]:
for n_ADV in range(0,5):
    # DEFINE FILE NAME, METADATA, ETC
    # file name etc.  (tstart is the same for all ADVs, does not need to be defined in loop)
    name          = name_all[n_ADV]
    dataFolder    = dataFolder_all[n_ADV]
    tstop         = tstop_all[n_ADV]
    filename_out  = filename_out_all[n_ADV]

    # Metadata
    serial_number = serial_number_all[n_ADV]
    OBS_type      = OBS_type_all[n_ADV]
    offset        = offset_all[n_ADV]
    xRD           = xRD_all[n_ADV]
    yRD           = yRD_all[n_ADV]
    x_loc         = x_loc_all[n_ADV] 
    y_loc         = y_loc_all[n_ADV]
    t_installed = t_installed_all[n_ADV]  
    t_removed = t_removed_all[n_ADV]

    # Instrument, bed heights + corrresponding times 
    # Not the same length for all ADVs (eg sometimes measured twice, sometime 3 times), so defined separately in zi_i1, zi_i2 etc instead of in single matrix zb_all
    zb            = eval('zb_i{}'.format(n_ADV+1))         # bed level [m NAP]. i+1 because n_ADV starts at 0, zb_i# at 1 
    t_zb          = eval('t_zb_i{}'.format(n_ADV+1))
    zi            = eval('zi_i{}'.format(n_ADV+1))         # NAP elevation of ADV measurement volume
    t_zi          = eval('ti_i{}'.format(n_ADV+1))
    zi_OBShigh    = eval('zi_OBShigh_i{}'.format(n_ADV+1)) # NAP elevation of highest OBS sensor, measured at same time as zi
    zi_OBSlow     = eval('zi_OBSlow_i{}'.format(n_ADV+1))  # NAP elevation of lowest OBS sensor
    zi_p          = eval('zi_p_i{}'.format(n_ADV+1))       # NAP elevation of pressure sensor 
    theta         = eval('theta_i{}'.format(n_ADV+1))      # orientation of ADV (marked leg with respect to north, clockwise positive, in degrees)
    t_theta       = eval('t_theta_i{}'.format(n_ADV+1))    # time that theta was measured

    # IMPORT RAW DATA ---------------------------------------------------------------------------------
    vec = Vector(name, dataFolder, tstart=tstart, tstop=tstop)

    # reads the raw data from tstart to tstop and casts all data in a pandas DataFrame that is stored under vec.dfpuv.
    # in case there is no data between tstart and tstop the DataFrame is not instantiated
    vec.read_raw_data()

    # break up the data into burst blocks
    # NB: because data is measured in bursts, with 10s missing per hour, the function will always cast exactly in these burst, no matter what blockwidth is selected
    vec.cast_to_blocks_in_xarray(blockWidth=3590) # 3590 s = 1 hr  

    # compute burst averages (make sure to read vector.py what is happening exactly!)
    vec.compute_block_averages()

    # all data is collected in an xarray Dataset ds. We extract this from the class instantiation and
    # we can easily write it to netCDF
    ds = vec.ds

    # CALIBRATE PRESSURE ---------------------------------------------------------------------------------
    ds.p.values[ds.p.values == 0] = np.nan       # make p nan where P=0: absolute pressure unknown: either be equal to offset or lower!
    ds.p.values = ds.p.values + offset  # add offset for calibration, to obtain absolute pressure

    # ADD METADATA: GLOBAL ATTRIBUTES AND METADATA VARIABLES ---------------------------------------------------
    # Update pressure metadata
    if n_ADV == 4: # S3ADV2 had a different calibration period and method: during the regular calibration period, the atmospheric pressure was below the minimum presure it could measure
        cal_text = '{}'.format(offset) + ' Pa added to raw pressure to obtain absolute pressure, based on comparison with reference pressure sensor S3.P2 and S3.P3, for calibratinon period 17 Dec 2024, 16:00-18:00'

    else:
        cal_text = '{}'.format(offset) + ' Pa added to raw pressure to obtain absolute pressure, based on comparison with reference pressure sensor ref.P1 during the calibration period (16dec, 21:00-22:00, when ADV S1.ADV1-S4.ADV1 were all dry)'
    ds.p.attrs.update({'comments': 'see variable zi_p for the NAP elevation of the pressure sensor', 'calibration': cal_text})

    # Update OBS variables metadata
    ds.anl1.attrs.update({'long_name': 'analog input 1: OBShigh', 'comment': 'unitless counts, 0-65535'})
    ds.anl2.attrs.update({'long_name': 'analog input 2: OBSlow', 'comment': 'unitless counts, 0-65535'})

    # Global attributes
    ds.attrs = {'Conventions': 'CF-1.6',
                'title': '{}'.format(vec.name),
                'instrument': 'Nortek Vector',
                'instrument serial number': serial_number,
                'connected OBS type': OBS_type,
                'time zone': 'UTC+1',
                'coordinate type': 'XYZ',
                'summary': 'hybrid-Dune field campaign',
                'contact person': 'Daan Poppema',
                'emailadres': 'd.w.poppema@tudelft.nl',
                'construction datetime': datetime.now().strftime("%d-%b-%Y (%H:%M:%S)"),
                'version': 'v1',
                'version comments': 'constructed with xarray'}
    
    # Add instrument variables for metadata (position etc are saved as variables, to make it easier to explain their meaning in the netCDF attributes)
    # Scalar variables
    ds['x_RD'] = xRD                                         # x position of instrument, in RDNAP coordinates [m]
    ds['y_RD'] = yRD                                         # y position of instrument, in RDNAP coordinates [m]
    ds['x_local'] = x_loc                                    # x position of instrument, in local coordinate system [m]
    ds['y_local'] = y_loc                                    # y position of instrument, in local coordinate system [m] 
    ds['t_installed'] = t_installed                          # time that the instrument was installed at the indicated height and location at the beach 
    ds['t_removed'] = t_removed                              # time that the instrument was removed

    # Matrix variables: position, orientation (measured at multiple times, so vectors instead of scalars) 
    # Syntax: first add the dimension, then assign the variable to the dimension
    ds['t_theta'] = t_theta                              # time that theta was measured
    ds['theta'] = ('t_theta',theta)                      # orientation of ADV (marked leg with respect to north, clockwise positive, in degrees)
    ds['t_zb'] = t_zb                                    # time that zb was measured
    ds['zb'] = ('t_zb',zb)                               # bed level [m NAP]
    ds['t_zi'] = t_zi                                    # time that sensor height was measured (measurement volume, pressure sensor and OBSs) 
    ds['zi'] = zi                                        # NAP elevation of ADV measurement volume
    ds['zi_OBShigh'] = ('t_zi',zi_OBShigh)               # NAP elevation of highest OBS sensor
    ds['zi_OBSlow'] = ('t_zi',zi_OBSlow)                 # NAP elevation of lowest OBS sensor
    ds['zi_p'] = ('t_zi',zi_p)                           # NAP elevation of pressure sensor

    # Add attributes to metadata variables
    local_coord_sys  = 'x=cross-shore (positive=landward); y=alongshore (positive is to north-east); (800,200) is the southern seaward corner of the containers'
    coord_conv       = '(0,0) local is (71683.584,452356.055) RD coordinates; local x-axis is 36° clockwise from RD x-axis; i.e. [x_loc y_loc] = [x_RD y_RD] - [x0 y0] .* [cosd(36) sind(36); -sind(36) cosd(36)]'
    ds.x_RD.attrs    = {'units': 'm', 'long_name': 'x position of instrument in RDNAP coordinates', 'epsg': 28992} # epsg: RD new
    ds.y_RD.attrs    = {'units': 'm', 'long_name': 'y position of instrument in RDNAP coordinates', 'epsg': 28992}
    ds.x_local.attrs = {'units': 'm', 'long_name': 'cross-shore position of instrument in local coordinate system','local_coordinate_system': local_coord_sys, 'coordinate_conversion': coord_conv}
    ds.y_local.attrs = {'units': 'm', 'long_name': 'alongshore position of instrument in local coordinate system','local_coordinate_system': local_coord_sys, 'coordinate_conversion': coord_conv}
    ds['t_installed'].attrs = {'long name': 'date and time that the instrument was installed at the indicated height and location at the beach'}
    ds['t_removed'].attrs = {'long name': 'date and time that the instrument was removed'}

    ds.theta.attrs      = {'units': 'degrees', 'long_name': 'ADV orientation', 'definition': 'orientation of ADV x-pod with respect to north, clockwise positive', 'comment': 'measured manually with RTK GPS. For orientaton as continuously measured by vector, see heading, pitch, roll variables'}
    ds.zb.attrs         = {'units': 'm +NAP', 'long_name': 'bed level'}  
    ds.zi.attrs         = {'units': 'm +NAP', 'long_name': 'instrument elevation: ADV measurement volume'}  # instrument height
    ds.zi_OBShigh.attrs = {'units': 'm +NAP', 'long_name': 'instrument elevation: high OBS sensor'}
    ds.zi_OBSlow.attrs  = {'units': 'm +NAP', 'long_name': 'instrument elevation: low OBS sensor'}
    ds.zi_p.attrs       = {'units': 'm +NAP', 'long_name': 'instrument elevation: pressure sensor'}
    ds.t_theta.attrs    = {'long name': 'time that instrument orientation was measured'}
    ds.t_zb.attrs       = {'long name': 'time that bed level at instrument was measured'}
    ds.t_zi.attrs       = {'long name': 'time that instrument elevation was measured (measurement volume, pressure sensor and OBSs)'}

    # Save the dataset to netCDF --------------------------------------------------------
    # For compression, define a custom encoding dictionary for the ADV variables, to save variables with the same accuracy (same number of decimals) as the 
    # original text file with data
    encoding = {'p': { 'scale_factor': 10.0, 'dtype': 'uint16', '_FillValue': 0, 'add_offset': offset, 'shuffle': False}, #NB: scale factor 10.0, not 10, to make unpacked p float (able to contain NaN)
                'u': { 'scale_factor': 0.001, 'dtype': 'int16', '_FillValue': -9999, 'shuffle': False},  # three decimals originally, so scale factor 0.001. max value is 7m/s, with 3 decimals is 7000 options, so int16 scale of ± 32767 is sufficient
                'v': { 'scale_factor': 0.001, 'dtype': 'int16', '_FillValue': -9999, 'shuffle': False},  # shuffle: flag for bit order. I just tried for which variables it saves data. (default flag is True when using deflate compression)
                'w': { 'scale_factor': 0.001, 'dtype': 'int16', '_FillValue': -9999, 'shuffle': False},
                'anl1': { 'dtype': 'uint16'},
                'anl2': { 'dtype': 'uint16'},
                'a1': { 'dtype': 'int16', '_FillValue': -9999},
                'a2': { 'dtype': 'int16', '_FillValue': -9999},
                'a3': { 'dtype': 'int16', '_FillValue': -9999},
                'cor1': { 'dtype': 'int8', '_FillValue': -99},
                'cor2': { 'dtype': 'int8', '_FillValue': -99},
                'cor3': { 'dtype': 'int8', '_FillValue': -99},
                'snr1': { 'scale_factor': 0.1, 'dtype': 'int16', '_FillValue': -9999, 'shuffle': False},
                'snr2': { 'scale_factor': 0.1, 'dtype': 'int16', '_FillValue': -9999, 'shuffle': False},
                'snr3': { 'scale_factor': 0.1, 'dtype': 'int16', '_FillValue': -9999, 'shuffle': False},
                'voltage': { 'scale_factor': 0.1, 'dtype': 'int16', '_FillValue': -9999, 'shuffle': False},
                'heading': { 'scale_factor': 0.1, 'dtype': 'int16', '_FillValue': -9999},
                'pitch': { 'scale_factor': 0.1, 'dtype': 'int16', '_FillValue': -9999, 'shuffle': False},
                'roll': { 'scale_factor': 0.1, 'dtype': 'int16', '_FillValue': -9999, 'shuffle': False},
                'burst': { 'scale_factor': 1.0, 'dtype': 'int16', '_FillValue': -9999} }

    # Then extend the dictionary: add deflate compression level 4 to all variables and coordinates in netCDF, without overwriting existing keys
    compression = {var: {"zlib": True, "complevel": 4} for var in list(ds.data_vars) + list(ds.coords)}  # temporary dict, with only compression settings
    for var, comp in compression.items():  # for each variable in the dataset, 
        if var in encoding:                # if the variable already has an encoding, update it with the compression settings
            encoding[var].update(comp)
        else:                              # if the variable does not have an encoding yet, add it 
            encoding[var] = comp
    ds.encoding = encoding  # add the encoding to the dataset (not really necessary, but allows retrieval later on)

    ds.to_netcdf(os.path.join(ncOutDir, filename_out), encoding=encoding)

.dat file was read
.sen file was read


  df3 = df3.fillna(method='ffill')
  ds.to_netcdf(os.path.join(ncOutDir, filename_out), encoding=encoding)
  ds.to_netcdf(os.path.join(ncOutDir, filename_out), encoding=encoding)


.dat file was read
.sen file was read


  df3 = df3.fillna(method='ffill')
  ds.to_netcdf(os.path.join(ncOutDir, filename_out), encoding=encoding)
  ds.to_netcdf(os.path.join(ncOutDir, filename_out), encoding=encoding)


.dat file was read
.sen file was read


  df3 = df3.fillna(method='ffill')
  ds.to_netcdf(os.path.join(ncOutDir, filename_out), encoding=encoding)
  ds.to_netcdf(os.path.join(ncOutDir, filename_out), encoding=encoding)


.dat file was read
.sen file was read


  df3 = df3.fillna(method='ffill')
  ds.to_netcdf(os.path.join(ncOutDir, filename_out), encoding=encoding)
  ds.to_netcdf(os.path.join(ncOutDir, filename_out), encoding=encoding)


.dat file was read
.sen file was read


  df3 = df3.fillna(method='ffill')
  ds.to_netcdf(os.path.join(ncOutDir, filename_out), encoding=encoding)
  ds.to_netcdf(os.path.join(ncOutDir, filename_out), encoding=encoding)
