In [206]:
import os
import numpy as np
import pandas as pd
from datetime import datetime, timedelta


import xarray as xr
import matplotlib.pyplot as plt
import sys
# sys.path.append(r'C:\Users\ruro\OneDrive - Boskalis\Documents\python\OSSI')

sys.path.append(r'C:\Users\dpoppema\Documents\GitHub\HybridDune\Ruben\Pressure_sensors\S1\RBR_05')
from KNMI_readers import read_knmi_uurgeg
from scipy.signal import welch

import puv 


In [207]:
def solo_data_reader(dataFile, sf):
    '''
    Function to read solo datafile.
    Returns a dataframe with a time column and pressure column in Pascal
    '''
    p = []
    dattt = []
    with open(dataFile) as myfile:
        for index, line in enumerate(myfile):
            if index >= 1:
                lin = line.split(',')
                datt.append(lin[0])
                p.append(float(lin[1]))
    p = np.array(p) * 1e4  # dBar to Pa

    t = pd.date_range(datt[0], periods=len(datt), freq='{}S'.format(1 / sf))
    print(datt[0])
    dfp = pd.DataFrame(data={'p': p}, index=t)

    dfp.index.name = 't'
    return dfp

In [208]:
def movmean(x, N):
    # calculate moving mean. NB: this divides values at the edges by the window length, instead of the available number of values
    y = uniform_filter1d(x, size=N, mode='constant') # for even window: backward avg. So window 2: x_m(i)=[x(i-1)+x(i)]/2. x_m(i=1) = x(i=1)/2

    # compensate edges for number of values, i.e. the truncated window length
    S1 = np.arange(np.ceil(N/2), N)
    S2 = np.ones(len(x)-N+1)*N
    S3 = np.arange(N-1, np.floor(N/2), -1)
    S = np.concatenate((S1, S2, S3)) 
    return y * N / S
    # for 2D inspiration, see https://stackoverflow.com/questions/23000260/numpy-two-dimensional-moving-average

In [230]:
# # LOAD RAW DATA, SAVE TO NETCDF
# # input parameters per file ----------------------------------
subfolder_in_all = ['refP1 RBR4','S1P2 RBR3', 'S1P3 RBR5', 'S2P3 RBR1', 'S3P3 RBR6', 'S4P3 RBR2'] # subfolder where file is sitting within experimentFolder (on O drive Daan)
instrumentname_all = subfolder_in_all #['refP1 RBR4', 'S3P6 RBR6']
namedatafile_all = ['S4 - 202441_20241227_1702_data.txt',
                    'S3 - 202440_20241227_1627_data.txt',
                    'S5 - 208681_20241227_1715_data.txt',
                    'RBR 1 - 202438_20241223_2203_data.txt',
                    'RBR6 - 208682_20241227_1744_data.txt',
                    'RBR2 - 202439_20241227_1624_data.txt']                                                          # name of the datafile
F_all = [8, 8, 16, 8, 16, 8] #[hz] sampling frequency

# # input parameters general ---------------------------------------------
experimentFolder = r'O:\HybridDune experiment\data RBR, OSSI\copy RBR Udrive series1'                  # path where the data is sitting # Rubens Laptop
knmiFile = r"C:\Users\dpoppema\Documents\GitHub\HybridDune\Ruben\uurgeg_330_2021-2030.txt"  # path to the KNMI data


# # Define input parameters for file N   (start loop) -----------------------
for i in range(5,6):
    F = F_all[i]  #[hz] sampling frequency
    data_inDir = os.path.join(experimentFolder, subfolder_in_all[i])
    namedatafile = namedatafile_all[i]
    instrumentName = instrumentname_all[i]                                                                               # designated name of the instrument


    # # Instrument info, TEMPORARILY THE SAME FOR ALL INSTRUMENTS. --------------------
    serial_number = '202438'                                                                                 # unique serial number of the instrument
    xRD = 72398.448                                                                                          # x position of placement in field
    yRD =  452130.253                                                                                        # y position of placement in field
    zRD_bedlevel =zb = -1.229         # m NAP
    zRD_sensor = zi =  -0.704         # m NAP

    # Do the reading from file and cast in xarray dataset ----------------
    dataFile =  os.path.join(data_inDir, namedatafile)                                                 # path + name datafile
    dfp = solo_data_reader(dataFile, F)
    ds = dfp.to_xarray()
    ds.p.attrs = {'long_name': 'pressure', 'units': 'Pa'}

    # Add global attribute metadata
    ds.attrs = {
        'Conventions': 'CF-1.6',
        'name': '{}'.format(instrumentName),
        'instrument': '{}'.format(instrumentName),
        'instrument type': 'Ruskin RBR Solo',
        'instrument serial number': '{}'.format(serial_number),
        'epsg': 28992,
        'x': xRD,
        'y': yRD,
        'time zone': 'UTC+2',
        'coordinate type': 'XYZ',
        'summary': 'Hybrid-Dune experiment',
        'contact person': 'Daan Poppema',
        'emailadres': 'd.w.poppema@tudelft.nl',
        'construction datetime': datetime.now().strftime("%d-%b-%Y (%H:%M:%S)"),
        'version': 'v1',
        'version comments': 'constructed with xarray'}

    # Save to netcdf
    ncOutDir = os.path.join(experimentFolder, 'raw NetCDF')
    if not os.path.isdir(ncOutDir):
        os.mkdir(ncOutDir)
    #ds.to_netcdf(os.path.join(ncOutDir, instrumentName + '.nc')) #uncomment to save to NETCDF

  t = pd.date_range(datt[0], periods=len(datt), freq='{}S'.format(1 / sf))


In [234]:
# # SAVE RELATIVE AIR PRESSURE, ...
# Open raw data file -------------------------------------------------------------------
# Reference sensor
dataFile =r'O:\HybridDune experiment\data RBR, OSSI\copy RBR Udrive series1\raw NetCDF\refP1 RBR4.nc'
ds = xr.open_dataset(dataFile)

# crop dataset to the time range of interest. 5 RBRs with about the same end time, make exactly the same
t0 = datetime(2024, 12, 12, 9, 0, 0)
t_end = datetime(2024, 12, 27, 16, 20, 0)
ds_air_8hz = ds0.sel(t=slice(t0, t_end)) 

pAir = ds_air_8hz['p']
tAir_8hz = ds_air_8hz['t']

# Determine moving average. 
pAir_smooth_8hz = movmean(pAir, 8*10) # smooth over 8hz * 10s

# interpolate for 16hz sensors
tAir_16hz   = pd.date_range(t0, t_end, freq='{}S'.format(1 / 16)) # 16hz time vector
pAir_smooth_16hz = np.interp(tAir_16hz, tAir_8hz, pAir_smooth_8hz) # interpolate to 16hz time vector

# Add to dataset
ds_air_8hz['pAir_smooth']   = pAir_smooth_8hz # add to dataSet for 8 hz air pressure

ds_air_16hz = xr.Dataset( # make dataset for 16hz air pressure
    data_vars={'p_air_smooth': (('t',), pAir_smooth_16hz)},
    coords={'t': tAir_16hz}
)

  tAir_16hz   = pd.date_range(t0, t_end, freq='{}S'.format(1 / 16)) # 16hz time vector


In [235]:
# Instrument to be corrected, filtered
for i in range(5,6):
    instrumentName = instrumentname_all[i]                                                                               # designated name of the instrument
    dataFile = os.path.join(experimentFolder,'raw_netcdf',instrumentName + '.nc')
    print(dataFile)

    ds0 = xr.open_dataset(dataFile)
    ds0 = ds0.sel(t=slice(t0, t_end))  # crop dataset to the time range of interest
    instr = ds0.instrument


    # frequency resolution in fourier space
    ### delta_f = p_blocks/D_timeframe ###
    p_blocks = 20    # minutes per block?
    D_length = 1200  # seconds per block?
    fresolution = p_blocks / D_length
    print(f'fresolution is: {fresolution}')
    nperseg = D_length * sf / p_blocks - 0.5 #dim should be len(ds.f); whelch has (nperseg/2 +1)

    # -----------------------------------------------------------------------------------
    rho = 10277 #kg/m3, for seawater at 9C, avg temp at HKZ measurement station
    g = 9.8125  # value Zandmotor

    # Relative pressure: correct the pressure signal with pAir
    if F_all[i] == 8:
        ds0['pc'] = ds0['p'] - ds_air_8hz['pAir_smooth']

    else:
        ds0['pc'] = ds0['p'] - ds_air_16hz['pAir_smooth']

    # remove all values below 0 Pa
    # ds0['pc'] = ds0['pc'].where(ds0['pc'] > 0)

    ds0['pc'].attrs = {'units': 'Pa', 'long_name': 'Relative pressure', 'comments': 'corrected for air pressure'}
    print(np.timedelta64())

# -----------------------------------------------------------------------------
# reshape to one row per burst in data array
pt = ds0.pc.values
nSamples = len(pt)
dt = ds0.isel(t=1).t - ds0.isel(t=0).t
sf = np.timedelta64(1, 's') / dt.values

D_length = '1200S'

burstDuration = pd.Timedelta(D_length)  # Burst duration (1200 seconds = 20 minutes)
burstLength = int(burstDuration / dt)
nBursts = int(np.floor(nSamples / burstLength))

pt = pt[:nBursts * burstLength]
t = ds0.t[::burstLength]
t = t[:nBursts]
N = (ds0.t.values[:burstLength] - ds0.t.values[0]) / np.timedelta64(1, 's')
# pdb.set_trace()

# --------------------------------------------------------------------------
# cast into a 2D array
ds = xr.Dataset(data_vars={},
                coords={'t': t, 'N': N})
# copy all data over into this new structure
ds['p'] = (('t', 'N'), pt.reshape((nBursts, burstLength)))
ds['zi'] = zi
ds['zb'] = zb
ds['sf'] = sf



# Remove water height below 30 cm
ds['p'] = ds['p'].where(ds['p'] > 0.3 * (rho * g))

# Remove bursts where the standard deviation is too low, indicating the instrument fell dry
ds['p'] = ds['p'].where(ds.p.std(dim='N') > 70)

# --------------------------------------------------------------------------
# pdb.set_trace()
ds['p'].attrs = {'units': 'Pa +NAP', 'long_name': 'pressure', 'comments': 'corrected for air pressure'}
ds['zi'].attrs = {'units': 'm+NAP', 'long_name': 'z instrument'}  # instrument height
ds['zb'].attrs = {'units': 'm+NAP', 'long_name': 'z bed'}  # bed level  
ds['sf'].attrs = {'units': 'Hz', 'long_name': 'sampling frequency'}
ds.attrs = ds0.attrs
ds.attrs['summary'] = 'Hybrid-Dune campaign, pressure corrected for air pressure and cast in bursts of 20 minutes'
ds['name'] = instr
if not os.path.isdir(os.path.join(experimentFolder,'QC')):
    os.mkdir(os.path.join(experimentFolder,'QC'))
ncFilePath = os.path.join(experimentFolder, 'QC', instr + '.nc')
ds.to_netcdf(ncFilePath)

O:\HybridDune experiment\data RBR, OSSI\copy RBR Udrive series1\raw_netcdf\S4P3 RBR2.nc


FileNotFoundError: [Errno 2] No such file or directory: 'O:\\HybridDune experiment\\data RBR, OSSI\\copy RBR Udrive series1\\raw_netcdf\\S4P3 RBR2.nc'

In [204]:
x = xr.DataArray([1, 2, 2, 4])
print(x)

movmean(pAir,3)
print(pAir)



<xarray.DataArray (dim_0: 4)> Size: 32B
array([1, 2, 2, 4])
Dimensions without coordinates: dim_0
<xarray.DataArray 'p' (t: 10599504)> Size: 85MB
array([104460.634, 104455.326, 104460.068, ..., 104546.329, 104538.295,
       104560.272], shape=(10599504,))
Coordinates:
  * t        (t) datetime64[ns] 85MB 2024-12-12T09:00:00 ... 2024-12-27T17:02...
Attributes:
    long_name:  pressure
    units:      Pa
