In [21]:
import os
from pathlib import Path
import numpy as np
import pandas as pd
from datetime import datetime, timedelta


import xarray as xr
import matplotlib.pyplot as plt
import sys
# sys.path.append(r'C:\Users\ruro\OneDrive - Boskalis\Documents\python\OSSI')

sys.path.append(r'C:\Users\dpoppema\Documents\GitHub\HybridDune\Ruben\Pressure_sensors\S1\RBR_05')
from scipy.signal import welch
from scipy.ndimage import uniform_filter1d
import puv 
from copy import copy


In [12]:
def solo_data_reader(dataFile, sf):
    '''
    Function to read solo datafile.
    Returns a dataframe with a time column and pressure column in Pascal
    '''
    p = []
    datt = []
    with open(dataFile) as myfile:
        for index, line in enumerate(myfile):
            if index >= 1:
                lin = line.split(',')
                datt.append(lin[0])
                p.append(float(lin[1]))
    p = np.array(p) * 1e4  # dBar to Pa

    t = pd.date_range(datt[0], periods=len(datt), freq='{}s'.format(1 / sf))

    dfp = pd.DataFrame(data={'p': p}, index=t)

    dfp.index.name = 't'
    return dfp

In [3]:
def movmean(x, N):
    # calculate moving mean. NB: this divides values at the edges by the window length, instead of the available number of values
    y = uniform_filter1d(x, size=N, mode='constant') # for even window: backward avg. So window 2: x_m(i)=[x(i-1)+x(i)]/2. x_m(i=1) = x(i=1)/2

    # compensate edges for number of values, i.e. the truncated window length
    S1 = np.arange(np.ceil(N/2), N)
    S2 = np.ones(len(x)-N+1)*N
    S3 = np.arange(N-1, np.floor(N/2), -1)
    S = np.concatenate((S1, S2, S3)) 
    return y * N / S
    # for 2D inspiration, see https://stackoverflow.com/questions/23000260/numpy-two-dimensional-moving-average

In [None]:
# # LOAD RAW DATA, SAVE TO NETCDF
# Physical constants -----------------------------------------------------------------------------------
rho = 1027 #kg/m3, for seawater at 9C, avg temp at HKZ measurement station
g = 9.8125  # value Zandmotor

# # input parameters per file ----------------------------------
subfolder_in_all = ['refP1 RBR4', 'S1P3 RBR5', 'S2P3 RBR1', 'S3P3 RBR6', 'S4P3 RBR2','S1P2 RBR3'] # subfolder where file is sitting within experimentFolder (on O drive Daan)
instrumentname_all = subfolder_in_all 
namedatafile_all = ['S4 - 202441_20241227_1702_data.txt',       # name of the datafile
                    'S5 - 208681_20241227_1715_data.txt',
                    'RBR 1 - 202438_20241223_2203_data.txt',
                    'RBR6 - 208682_20241227_1744_data.txt',
                    'RBR2 - 202439_20241227_1624_data.txt',
                    'S3 - 202440_20241227_1627_data.txt']                                                          
#                   RBR4(ref),       RBR5,       RBR1,       RBR6,       RBR2,       RBR3
sf_all            = [       8,         16,          8,         16,          8,          8]              # [hz] sampling frequency
offset_all        = [    -597,       -225,        315,         30,        353,        124]              # [Pa] offset to be added to the pressure data, for instrument calibration
xRD_all           = [  np.nan,  72429.072,  72412.478,  72398.360,  72382.977,  72444.875]              # x position of placement in field
yRD_all           = [  np.nan, 452174.014, 452150.390, 452130.334, 452111.119, 452163.053]              # y position of placement in field
zi_all            = [  np.nan,     -0.463,     -0.630,     -0.704,     -0.910,      0.301]              # z position of instrument (sensor) in field
serial_number_all = ['202441',   '208681',   '202438',   '208682',   '202439',   '202440']              # unique serial number of the instrument

zb_all = np.array([ [np.nan, np.nan, np.nan],
                    [-0.969, -0.880, -0.933],
                    [-1.142, -0.957, -0.910],
                    [-1.229, -1.103, -0.964],
                    [-1.291, -1.199, -1.120],
                    [-0.023, -0.029, -0.007] ])  
t_zb_all = pd.to_datetime(['2024-12-17 10:30',  # Appies to all RBRs except refP1 RBR4R.
                           '2024-12-20 12:20',
                           '2024-12-23 12:00'])

# convert RD coordinates to local coordinates
xy_RD = np.array([xRD_all, yRD_all]).T
a = np.deg2rad(36)
transformation_matrix = np.array([ [np.cos(a), np.sin(a)],[-np.sin(a), np.cos(a)] ])
xy_loc = ( xy_RD - [71683.584, 452356.055] ) @ transformation_matrix
x_loc_all = xy_loc.T[0]
y_loc_all = xy_loc.T[1]

# # input parameters general ---------------------------------------------
experimentFolder = r'O:\HybridDune experiment\data RBR, OSSI\copy RBR Udrive series1'                   # path where the data is sitting

# # Define input parameters for file i   (start loop) -----------------------
for i in [0]:#range(0,6):
    sf = sf_all[i]  #[hz] sampling frequency
    data_inDir = os.path.join(experimentFolder, subfolder_in_all[i])
    namedatafile = namedatafile_all[i]
    instrumentName = instrumentname_all[i]                                                              # designated name of the instrument
    xRD = xRD_all[i]                                                                                    # x position of placement in field
    yRD =  yRD_all[i]                                                                                  # y position of placement in field
    x_loc = x_loc_all[i]                                                                                # x position of instrument in local coordinate system [m]
    y_loc = y_loc_all[i]                                                                                # y position of instrument in local coordinate system [m]
    if i >= 1: # instrument 0 is reference sensor, instrument and bed height only applicable for sensors installed at beach
        zi = zi_all[i]
        zb = zb_all[i] 
        t_zb = t_zb_all  # bed height: n/a for iteration 0 (ref sensor), the same for all other RBRs
        t_zi =  pd.to_datetime('2024-12-17 10:30')

    # Do the reading from file and cast in xarray dataset ----------------
    dataFile =  os.path.join(data_inDir, namedatafile)                                                 # path + name datafile
    dfp = solo_data_reader(dataFile, sf)
    ds = dfp.to_xarray()

    # add dimension t_zb and variables zb, zi and t_zi, for bed level observations over time and instrument height
    if i >= 1: # instrument 0 is reference sensor, instrument and bed height only applicable for the remaining sensors
        ds['t_zb'] = t_zb # t_zb is a vector instead of scalar, so this syntax adds a dimension (instead of variable)
        ds['zb'] = ('t_zb',zb)                                   # initial bed level [m NAP]
        ds['t_zi'] = t_zi                                        # time that zi was measured
        ds['zi'] = zi                                            # instrument height [m NAP]

    # Add instrument variables for metadata: location, frequency
    ds['x_RD'] = xRD                                         # x position of instrument, in RDNAP coordinates [m]
    ds['y_RD'] = yRD                                         # y position of instrument, in RDNAP coordinates [m]
    ds['x_local'] = x_loc                                    # x position of instrument, in local coordinate system [m]
    ds['y_local'] = y_loc                                    # y position of instrument, in local coordinate system [m] 
    ds['sf'] = sf                                            # sampling frequency [hz]

    # Add global attribute metadata
    ds.attrs = {
        'Conventions': 'CF-1.6',
        'name': 'Pressure sensor ' + instrumentName[0:-4] + 'RBR, period 1',
        'instrument': 'Pressure sensor ' + instrumentName,
        'instrument type': 'Ruskin RBR Solo',
        'instrument serial number': '{}'.format(serial_number_all[i]),
        'time zone': 'UTC+2',
        'start time': pd.to_datetime(ds.t.values[0]).strftime("%d-%b-%Y %H:%M:%S"),
        'end time':   pd.to_datetime(ds.t.values[-1]).strftime("%d-%b-%Y %H:%M:%S"),
        'summary': 'HybridDune experiment: raw pressure data',
        'contact person': 'Daan Poppema',
        'emailadres': 'd.w.poppema@tudelft.nl',
        'construction datetime': datetime.now().strftime("%d-%b-%Y %H:%M:%S"),
        'version': 'v1',
        'comment_1': 'constructed with xarray',
        'url of online dataset': 'ADD LATER'}      # DAAN: ADD URL LATER
    
    # Add attributes to variables for metadata
    local_coord_sys = 'x=cross-shore (positive=landward); y=alongshore (positive is to north-east); (800,200) is the southern seaward corner of the containers'
    coord_conv   = '(0,0) local is (71683.584,452356.055) RD coordinates; local x-axis is 36° clockwise from RD x-axis; i.e. [x_loc y_loc] = [x_RD y_RD] - [x0 y0] .* [cosd(36) sind(36); -sind(36) cosd(36)]'
    ds.p.attrs = {'units': 'Pa', 'long_name': 'pressure', 'comments': 'raw data'}
    ds.x_RD.attrs = {'units': 'm', 'long_name': 'x position of instrument in RDNAP coordinates', 'epsg': 28992}
    ds.y_RD.attrs = {'units': 'm', 'long_name': 'y position of instrument in RDNAP coordinates', 'epsg': 28992}
    ds.x_local.attrs = {'units': 'm', 'long_name': 'cross-shore position of instrument in local coordinate system','local_coordinate_system': local_coord_sys, 'coordinate_conversion': coord_conv}
    ds.y_local.attrs = {'units': 'm', 'long_name': 'alongshore position of instrument in local coordinate system','local_coordinate_system': local_coord_sys, 'coordinate_conversion': coord_conv}
    ds.sf.attrs = {'units': 'Hz', 'long_name': 'sampling frequency'}
    if i >= 1: # instrument 0 is reference sensor, instrument and bed height only applicable for the remaining sensors
        ds.zi.attrs = {'units': 'm +NAP', 'long_name': 'elevation of sensor'}  # instrument height
        ds.zb.attrs = {'units': 'm +NAP', 'long_name': 'bed level'}  
        ds.t_zi.attrs = {'long name': 'time that instrument elevation was measured'}
        ds.t_zb.attrs = {'long name': 'time that bed level at instrument was measured'}

    # Save to netcdf -------------------------------
    ds.p.values = np.round(ds.p.values)    # Round pressure to 1 Pa = 0.1 mm (file size 7 times smaller)
    encoding = {var: {"zlib": True, "complevel": 4} for var in list(ds.data_vars) + list(ds.coords)}  # Apply deflate compression to all variables and coordinates in netCDF
    
    ncOutDir = os.path.join(experimentFolder, 'raw NetCDF')
    if not os.path.isdir(ncOutDir):
        os.mkdir(ncOutDir)
    #ds.to_netcdf(os.path.join(ncOutDir, instrumentName + ' raw data - period 1.nc'), encoding=encoding)  

In [34]:
ds.p.values = np.round(ds.p.values)    # Round pressure to 1 Pa = 0.1 mm (file size 7 times smaller)
encoding = {var: {"zlib": True, "complevel": 4} for var in list(ds.data_vars) + list(ds.coords)}  # Apply deflate compression to all variables and coordinates in netCDF

ds.to_netcdf(os.path.join(ncOutDir, instrumentName + ' raw data - period 1 round deflate4.nc'), encoding=encoding)  

In [None]:
# # CALCULATE ATMOSPHERIC AIR PRESSURE, ...
# Set smoothing window for atmpospheric pressure
t_smooth_air = 10 # [s]     # measured with 8 hz. But p_water and p_air are measured up to 100m apart (and p_air inside). Affected different by wind gusts, so filter out short-term variation

# Open raw data file  of reference sensor -------------------------------------------------------------------
dataFile =r'O:\HybridDune experiment\data RBR, OSSI\copy RBR Udrive series1\raw NetCDF\refP1 RBR4 raw data - period 1.nc'
ds = xr.open_dataset(dataFile)

# crop dataset to the time range of interest. 5 RBRs with about the same end time, make exactly the same
t0 = datetime(2024, 12, 17, 11, 0, 0)         # first full hour that all 5 instruments were installed at the beach
t_end = datetime(2024, 12, 23, 13, 0)         # last full hour that all 5 instruments were installed at the beach
# t0 = datetime(2024, 12, 12, 9, 0, 0)        # old lines: cropped files to the same length, but contained obs before instruments were installed at beach, ,,,
# t_end = datetime(2024, 12, 27, 16, 20, 0)   # and after they were already removed
ds_air_8hz = ds.sel(t=slice(t0, t_end))
tAir_8hz = ds_air_8hz['t']

# Determine moving average. 
pAir_smooth_8hz = movmean(ds_air_8hz['p'], 8*t_smooth_air) # smooth over 8hz * n seconds

# interpolate for 16hz sensors
tAir_16hz   = pd.date_range(t0, t_end, freq='{}s'.format(1 / 16)) # 16hz time vector
pAir_smooth_16hz = np.interp(tAir_16hz, tAir_8hz, pAir_smooth_8hz) # interpolate to 16hz time vector

# Add to dataset
ds_air_8hz['pAir_smooth'] = (('t'), pAir_smooth_8hz )
ds_air_16hz = xr.Dataset( # make dataset for 16hz air pressure
    data_vars={'pAir_smooth': (('t',), pAir_smooth_16hz)},
    coords={'t': tAir_16hz} )

# Make shorter series for RBR1 (instrument turned off earlier)
t_end_RBR1 = t_end                              # not needed anymore. But RBR1 was turned off 0.5day after de-installation from beach, the rest 4 days later. So old code to deal with
#t_end_RBR1 = datetime(2024, 12, 23, 22, 4, 36)  # RBR1 stopping earlier than the rest. Now all cropped to the same length anyway
ds_air_8hz_RBR1 = ds_air_8hz.sel(t=slice(t0, t_end_RBR1))

In [None]:
# Instrument to be corrected, filtered

for i in [1,2,3,4,5]:
    instrumentName = instrumentname_all[i]                                                                               # designated name of the instrument
    print(instrumentName)
    dataFile = os.path.join(experimentFolder,'raw NetCDF',instrumentName + ' raw data - period 1.nc')
    print(dataFile)

    ds0 = xr.open_dataset(dataFile)
    # crop air pressure and instrument dataset to the same time
    # if i != 2:  # S2P3 RBR1 has a shorter time series  # skip: old code for when RBR1 was treated separately. Now use line below instead
    #    ds0 = ds0.sel(t=slice(t0, t_end))  # crop dataset to the time range of interest
    ds0 = ds0.sel(t=slice(t0, t_end))  # crop dataset to the time range of interest

    # Relative pressure: correct the pressure signal with pAir
    if i == 2: # RBR1
        ds0['p_rel'] = ds0['p'] + offset_all[i] - (ds_air_8hz_RBR1['pAir_smooth'] + offset_all[0])  # relative pressure. Add calibration offset per instrument. RBR1 has a shorter time series, treated separately
    elif sf_all[i] == 8:    
        ds0['p_rel'] = ds0['p'] + offset_all[i] - (ds_air_8hz['pAir_smooth']   + offset_all[0])
    else:
        ds0['p_rel'] = ds0['p'] + offset_all[i] - (ds_air_16hz['pAir_smooth'] + offset_all[0])
        ds_air_16hz['pAir_smooth']

    cal_text = '{}'.format(offset_all[i]) + ' Pa added to raw pressure of instrument (and ' + '{}'.format(offset_all[0]) + ' Pa added to air pressure of sensor refP1), based on the period between 12dec 20:00 and 13dec 6:00 that all RBRs measured just atmospheric pressure'
    ds0['p_rel'].attrs = {'units': 'Pa', 'long_name': 'Relative pressure', 'comments': 'corrected for air pressure and calibrated','calibration': cal_text}
    ds0.attrs['summary'] = 'Hybrid-Dune campaign: pressure corrected for air pressure'
    ds0.attrs['start time'] = pd.to_datetime(ds.t.values[0]).strftime("%d-%b-%Y %H:%M:%S")
    ds0.attrs['end time'] =   pd.to_datetime(ds.t.values[-1]).strftime("%d-%b-%Y %H:%M:%S")

    if not os.path.isdir(os.path.join(experimentFolder,'QC')):
        os.mkdir(os.path.join(experimentFolder,'QC'))
    ncFilePath = os.path.join(experimentFolder, 'QC', instrumentName + ' p_rel - period 1.nc')
    encoding = {var: {"zlib": True, "complevel": 4} for var in list(ds0.data_vars) + list(ds0.coords)}  # Apply deflate compression to all variables and coordinates in netCDF
    ds0.to_netcdf(ncFilePath, encoding=encoding) # save to netcdf

S1P3 RBR5
O:\HybridDune experiment\data RBR, OSSI\copy RBR Udrive series1\raw NetCDF\S1P3 RBR5.nc
S2P3 RBR1
O:\HybridDune experiment\data RBR, OSSI\copy RBR Udrive series1\raw NetCDF\S2P3 RBR1.nc
S3P3 RBR6
O:\HybridDune experiment\data RBR, OSSI\copy RBR Udrive series1\raw NetCDF\S3P3 RBR6.nc
S4P3 RBR2
O:\HybridDune experiment\data RBR, OSSI\copy RBR Udrive series1\raw NetCDF\S4P3 RBR2.nc
S1P2 RBR3
O:\HybridDune experiment\data RBR, OSSI\copy RBR Udrive series1\raw NetCDF\S1P2 RBR3.nc


In [10]:
#'{}'.format(serial_number)
cal_text = '{}'.format(offset_all[i]) + ' Pa added to raw pressure of instrument (and '

print(cal_text)


124 Pa added to raw pressure of instrument (and 
