In [None]:
import os
from pathlib import Path
import numpy as np
import pandas as pd
from datetime import datetime, timedelta


import xarray as xr
import matplotlib.pyplot as plt
import sys
# sys.path.append(r'C:\Users\ruro\OneDrive - Boskalis\Documents\python\OSSI')

sys.path.append(r'C:\Users\dpoppema\Documents\GitHub\HybridDune\Ruben\Pressure_sensors\S1\RBR_05')
from scipy.signal import welch
from scipy.ndimage import uniform_filter1d
import puv 


In [2]:
def solo_data_reader(dataFile, sf):
    '''
    Function to read solo datafile.
    Returns a dataframe with a time column and pressure column in Pascal
    '''
    p = []
    datt = []
    with open(dataFile) as myfile:
        for index, line in enumerate(myfile):
            if index >= 1:
                lin = line.split(',')
                datt.append(lin[0])
                p.append(float(lin[1]))
    p = np.array(p) * 1e4  # dBar to Pa

    t = pd.date_range(datt[0], periods=len(datt), freq='{}s'.format(1 / sf))

    dfp = pd.DataFrame(data={'p': p}, index=t)

    dfp.index.name = 't'
    return dfp

In [3]:
def movmean(x, N):
    # calculate moving mean. NB: this divides values at the edges by the window length, instead of the available number of values
    y = uniform_filter1d(x, size=N, mode='constant') # for even window: backward avg. So window 2: x_m(i)=[x(i-1)+x(i)]/2. x_m(i=1) = x(i=1)/2

    # compensate edges for number of values, i.e. the truncated window length
    S1 = np.arange(np.ceil(N/2), N)
    S2 = np.ones(len(x)-N+1)*N
    S3 = np.arange(N-1, np.floor(N/2), -1)
    S = np.concatenate((S1, S2, S3)) 
    return y * N / S
    # for 2D inspiration, see https://stackoverflow.com/questions/23000260/numpy-two-dimensional-moving-average

In [4]:
# # LOAD RAW DATA, SAVE TO NETCDF
# Physical constants -----------------------------------------------------------------------------------
rho = 1027 #kg/m3, for seawater at 9C, avg temp at HKZ measurement station
g = 9.8125  # value Zandmotor

# # input parameters per file ----------------------------------
subfolder_in_all = ['refP1 RBR4','S1P2 RBR3', 'S1P3 RBR5', 'S2P3 RBR1', 'S3P3 RBR6', 'S4P3 RBR2'] # subfolder where file is sitting within experimentFolder (on O drive Daan)
instrumentname_all = subfolder_in_all #['refP1 RBR4', 'S3P6 RBR6']
namedatafile_all = ['S4 - 202441_20241227_1702_data.txt',
                    'S3 - 202440_20241227_1627_data.txt',
                    'S5 - 208681_20241227_1715_data.txt',
                    'RBR 1 - 202438_20241223_2203_data.txt',
                    'RBR6 - 208682_20241227_1744_data.txt',
                    'RBR2 - 202439_20241227_1624_data.txt']                                                          # name of the datafile
sf_all = [8, 8, 16, 8, 16, 8] #[hz] sampling frequency
offset = [-597, 124, -225, 315, 30, 353]  # [Pa] offset to be added to the pressure data, for instrument calibration

# # input parameters general ---------------------------------------------
experimentFolder = r'O:\HybridDune experiment\data RBR, OSSI\copy RBR Udrive series1'                  # path where the data is sitting # Rubens Laptop

# # Define input parameters for file i   (start loop) -----------------------
for i in [0, 4]: #in range(5,6):
    sf = sf_all[i]  #[hz] sampling frequency
    data_inDir = os.path.join(experimentFolder, subfolder_in_all[i])
    namedatafile = namedatafile_all[i]
    instrumentName = instrumentname_all[i]                                                                               # designated name of the instrument


    # # Instrument info, TEMPORARILY THE SAME FOR ALL INSTRUMENTS. --------------------
    serial_number = '202438'                                                                                 # unique serial number of the instrument
    xRD = 72398.448                                                                                          # x position of placement in field
    yRD =  452130.253                                                                                        # y position of placement in field
    zRD_bedlevel =zb = -1.229         # m NAP
    zRD_sensor = zi =  -0.704         # m NAP

    # Do the reading from file and cast in xarray dataset ----------------
    dataFile =  os.path.join(data_inDir, namedatafile)                                                 # path + name datafile
    dfp = solo_data_reader(dataFile, sf)
    ds = dfp.to_xarray()
    ds.p.attrs = {'long_name': 'pressure', 'units': 'Pa'}

    # Add global attribute metadata
    ds.attrs = {
        'Conventions': 'CF-1.6',
        'name': '{}'.format(instrumentName),
        'instrument': '{}'.format(instrumentName),
        'instrument type': 'Ruskin RBR Solo',
        'instrument serial number': '{}'.format(serial_number),
        'epsg': 28992,
        'x': xRD,
        'y': yRD,
        'time zone': 'UTC+2',
        'coordinate type': 'XYZ',
        'summary': 'Hybrid-Dune experiment',
        'contact person': 'Daan Poppema',
        'emailadres': 'd.w.poppema@tudelft.nl',
        'construction datetime': datetime.now().strftime("%d-%b-%Y (%H:%M:%S)"),
        'version': 'v1',
        'version comments': 'constructed with xarray'}

    # Save to netcdf
    ncOutDir = os.path.join(experimentFolder, 'raw NetCDF')
    if not os.path.isdir(ncOutDir):
        os.mkdir(ncOutDir)
    ds.to_netcdf(os.path.join(ncOutDir, instrumentName + '.nc')) #uncomment to save to NETCDF

In [6]:
# # SAVE RELATIVE AIR PRESSURE, ...
# Open raw data file  of reference sensor -------------------------------------------------------------------
dataFile =r'O:\HybridDune experiment\data RBR, OSSI\copy RBR Udrive series1\raw NetCDF\refP1 RBR4.nc'
ds = xr.open_dataset(dataFile)

# crop dataset to the time range of interest. 5 RBRs with about the same end time, make exactly the same
t0 = datetime(2024, 12, 12, 9, 0, 0)
t_end = datetime(2024, 12, 27, 16, 20, 0)
ds_air_8hz = ds.sel(t=slice(t0, t_end))
tAir_8hz = ds_air_8hz['t']

# Determine moving average. 
pAir_smooth_8hz = movmean(ds_air_8hz['p'], 8*10) # smooth over 8hz * 10s

# interpolate for 16hz sensors
tAir_16hz   = pd.date_range(t0, t_end, freq='{}s'.format(1 / 16)) # 16hz time vector
pAir_smooth_16hz = np.interp(tAir_16hz, tAir_8hz, pAir_smooth_8hz) # interpolate to 16hz time vector

# Add to dataset
ds_air_8hz['pAir_smooth'] = (('t'), pAir_smooth_8hz )
ds_air_16hz = xr.Dataset( # make dataset for 16hz air pressure
    data_vars={'pAir_smooth': (('t',), pAir_smooth_16hz)},
    coords={'t': tAir_16hz} )

# Make shorter series for RBR1 (instrument turned off earlier)
t_end_RBR1 = datetime(2024, 12, 23, 22, 4, 36)  # RBR1 stopped at this time
ds_air_8hz_RBR1 = ds_air_8hz.sel(t=slice(t0, t_end_RBR1))

In [7]:
# Instrument to be corrected, filtered
for i in [4]:#[1,2,3,4,5]:
    instrumentName = instrumentname_all[i]                                                                               # designated name of the instrument
    print(instrumentName)
    dataFile = os.path.join(experimentFolder,'raw NetCDF',instrumentName + '.nc')
    print(dataFile)

    ds0 = xr.open_dataset(dataFile)
    # crop air pressure and instrument dataset to the same time
    if i != 3:  # S2P3 RBR1 has a shorter time series
        ds0 = ds0.sel(t=slice(t0, t_end))  # crop dataset to the time range of interest
    instr = ds0.instrument

    # Relative pressure: correct the pressure signal with pAir
    if i == 3: # RBR1
        ds0['pc'] = ds0['p'] + offset[i] - (ds_air_8hz_RBR1['pAir_smooth'] + offset[0])  # relative pressure. Add calibration offset per instrument. RBR1 has a shorter time series, treated separately
    elif sf_all[i] == 8:    
        ds0['pc'] = ds0['p'] + offset[i] - (ds_air_8hz['pAir_smooth']   + offset[0])
    else:
        ds0['pc'] = ds0['p'] + offset[i] - (ds_air_16hz['pAir_smooth'] + offset[0])
        print(i)
        print(len( ds0['p'] ))
        print(len( ds0['p'] ))
        ds_air_16hz['pAir_smooth']

    # remove all values below 0 Pa
    # ds0['pc'] = ds0['pc'].where(ds0['pc'] > 0)

    ds0['pc'].attrs = {'units': 'Pa', 'long_name': 'Relative pressure', 'comments': 'corrected for air pressure'}

    # frequency resolution in fourier space
    p_blocks = 30   # number of segments within block, for the Welch method
    D_length = 1800 # Duration of block in seconds (30 minutes)
    fresolution = p_blocks / D_length # Frequency resolution is 1/T_segment = n_segments / T_block
    #print(f'fresolution is: {fresolution}')
    nperseg = D_length * sf_all[i] / p_blocks - 0.5 #dim should be len(ds.f); whelch has (nperseg/2 +1)
    #print(f'nperseg is: {nperseg}') 

    # -----------------------------------------------------------------------------
    # reshape to one row per burst in data array
    pt = ds0.pc.values # relative pressure, pAir subtracted
    nSamples = len(pt)
    dt = ds0.isel(t=1).t - ds0.isel(t=0).t
    #print(dt)

    D_length = '1200s'

    burstDuration = pd.Timedelta(D_length)  # Burst duration (1200 seconds = 20 minutes)
    burstLength = int(burstDuration / dt)
    nBursts = int(np.floor(nSamples / burstLength))

    pt = pt[:nBursts * burstLength]
    t = ds0.t[::burstLength]  # take every nth step, so t = t0 of every burst
    t = t[:nBursts]  # take value for every complete burst, i.e. skip final value

    N = (ds0.t.values[:burstLength] - ds0.t.values[0]) / np.timedelta64(1, 's')  # time in seconds since start of burst

    # --------------------------------------------------------------------------
    # cast into a 2D array
    ds = xr.Dataset(data_vars={},
                    coords={'t': t, 'N': N})
    # copy all data over into this new structure
    ds['p'] = (('t', 'N'), pt.reshape((nBursts, burstLength)))      # relative pressure, pAir subtracted
    ds['zi'] = zi                                                   # instrument height
    ds['zb'] = zb                                                   # initial bed level                                
    ds['sf'] = sf_all[i]                                              # sampling frequency

    # temp copy unfiltered p
    ds['p_rel'] = ds['p']

    # Remove water height below 30 cm
    ds['p'] = ds['p'].where(ds['p'] > 0.3 * (rho * g))

    # Remove bursts where the standard deviation is too low, indicating the instrument fell dry
    ds['p'] = ds['p'].where(ds.p.std(dim='N') > 70)                  # keep when std > 70 Pa, i.e. > 7 mm water height equivalent

    # --------------------------------------------------------------------------
    ds['p'].attrs = {'units': 'Pa', 'long_name': 'pressure', 'comments': 'corrected for air pressure'}
    ds['zi'].attrs = {'units': 'm+NAP', 'long_name': 'z instrument'}  # instrument height
    ds['zb'].attrs = {'units': 'm+NAP', 'long_name': 'z bed'}  # bed level  
    ds['sf'].attrs = {'units': 'Hz', 'long_name': 'sampling frequency'}
    ds.attrs = ds0.attrs
    ds.attrs['summary'] = 'Hybrid-Dune campaign, pressure corrected for air pressure and cast in bursts of 20 minutes'
    ds['name'] = instr
    if not os.path.isdir(os.path.join(experimentFolder,'QC')):
        os.mkdir(os.path.join(experimentFolder,'QC'))
    ncFilePath = os.path.join(experimentFolder, 'QC', instr + ' p_rel.nc')
    ds.to_netcdf(ncFilePath)

S3P3 RBR6
O:\HybridDune experiment\data RBR, OSSI\copy RBR Udrive series1\raw NetCDF\S3P3 RBR6.nc
4
21158401
21158401
