In [1]:
import os
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

import xarray as xr
import matplotlib.pyplot as plt
import sys
from scipy.signal import welch
sys.path.append(r'C:\Users\dpoppema\Documents\GitHub\HybridDune\Ruben\Pressure_sensors\S1\RBR_05') # to find the puv.py file
import puv 

In [2]:
# # Input data: parameters, instrument names, file locations
# Physical constants -----------------------------------------------------------------------------------
rho = 1027 #kg/m3, for seawater at 9C, avg temp at HKZ measurement station
g = 9.8125  # value Zandmotor

# # input parameters per file ----------------------------------
subfolder_in_all = ['refP1 RBR4','S1P2 RBR3', 'S1P3 RBR5', 'S2P3 RBR1', 'S3P3 RBR6', 'S4P3 RBR2'] # subfolder where file is sitting within experimentFolder (on O drive Daan)
sf_all = [8, 8, 16, 8, 16, 8] #[hz] sampling frequency

# # input parameters general ---------------------------------------------
experimentFolder = r'O:\HybridDune experiment\data RBR, OSSI\copy RBR Udrive series1'                  # path where the data is sitting # Rubens Laptop
instrumentname_all = subfolder_in_all #['refP1 RBR4', 'S3P6 RBR6']

# Settings spectral analysis: segments (Welch method) -------------------
p_blocks = 20          # number of segments within block, for the Welch method
D_length = 1200        # Duration of block in seconds (20 minutes)
D_length_s = '1200s'   # Duration of block in seconds (20 minutes). (same as above, but as string for xarray)

In [None]:
i = 5
instrument = instrumentname_all[i]  # select instrument
instrFile = os.path.join(experimentFolder,'QC', instrument +' p_rel.nc')
ncOutFile = os.path.join(experimentFolder,'tailored', instrument +'.nc')

# %% load the data from netcdf
ds0 = xr.open_dataset(instrFile)   # dataset with relative pressure
#print(np.size(ds0.p.values), 'time steps in dataset')

# frequency resolution in fourier space --------------------------------------------
### delta_f = p_blocks/D_timeframe ###
fresolution = p_blocks / D_length # Frequency resolution is 1/T_segment = n_segments / T_block
nperseg = D_length * sf_all[i] / p_blocks - 0.5 #dim should be len(ds.f); whelch has (nperseg/2 +1)

# -------------------------------------------------------------------------------------------
# reshape to one column per burst in data array
pt = ds0.p_rel.values # relative pressure, pAir subtracted
nSamples = len(pt)
dt = ds0.isel(t=1).t - ds0.isel(t=0).t

burstDuration = pd.Timedelta(D_length_s)  # Burst duration (1200 seconds = 20 minutes)
burstLength = int(burstDuration / dt)
nBursts = int(np.floor(nSamples / burstLength))

pt = pt[:nBursts * burstLength]
t_full = ds0.t.values[:nBursts * burstLength]  # time vector for all samples, up to the last complete burst. skip incomplete burst at end
t_block = t_full[::burstLength]  # take every nth step, so t = t0 of every burst

N = (ds0.t.values[:burstLength] - ds0.t.values[0]) / np.timedelta64(1, 's')  # time in seconds since start of burst

# Cast pressure into a 2D array --------------------------
ds_2D = xr.Dataset(data_vars={},    # Temporary 2D dataset, with cooridnates t (no. of blocks), N (obs within block)
                coords={'t_full': t_full, #ds0.t.values,                 
                        't_block': t_block,
                        'N': N,
                        'f': np.arange(0, ds0.sf.values / 2, fresolution)})

ds_2D['p'] = (('t', 'N'), pt.reshape((nBursts, burstLength)))      # relative pressure, pAir subtracted


# Filtering ----------------------------------------------------------------------------------------
# Remove water height below 30 cm
# ds_2D['p'] = ds_2D['p'].where(ds_2D['p'] > 0.3 * (rho * g))

# Remove bursts where the standard deviation is too low, indicating the instrument fell dry
# ds_2D['p'] = ds_2D['p'].where(ds_2D.p.std(dim='N') > 70)                  # keep when std > 70 Pa, i.e. > 7 mm water height equivalent

# make a new dataset that has an extra dimension to accomodate for the frequency axis ----------------------------------
ds_out = xr.Dataset(data_vars={},
                coords={'t_full': t_full, #ds0.t.values,                 
                        't_block': t_block,
                        'f': np.arange(0, ds0.sf.values / 2, fresolution)})

# # put all variables in this new dataset
ds_out.attrs = ds0.attrs
for key in ds0.data_vars:
    ds_out[key] = ds0[key]

# Add metadata, drop raw pressure
ds_out['f'].attrs = {'units': 'Hz'}
ds_out['p'] = ds_out.p_rel # renaming p_rel to p (overwriting existing p variable)
ds_out = ds_out.drop_vars('p_rel') # and  dropping the old p_rel variable


In [None]:
# temp: resize p -----------------------------------------------------

# Create a continuous time array
# Make full time vector
t0 = datetime(2024, 12, 12, 9, 0, 0)
t_end = datetime(2024, 12, 27, 16, 20, 0)
t_end = datetime(2024, 12, 27, 16, 19, 59, 999999)

t_full   = pd.date_range(t0, t_end, freq='{}s'.format(1 / 16)) # 16hz time vector
x = np.arange(0, len(t_full))  # create an index for the full time vector;

ds_new = xr.Dataset(data_vars={},
                coords={'t_full': t_full,
                        't': ds0.t.values,
                        'N': N,
                        'f': np.arange(0, ds0.sf.values / 2, fresolution)})

# put all variables in this new dataset
for key in ds.data_vars:
    ds_new[key] = ds[key]

#print(ds_new)

# Save with compression
ncFilePath1 = r'O:\HybridDune experiment\data RBR, OSSI\copy RBR Udrive series1\QC\copy.nc'
ncFilePath2 = r'O:\HybridDune experiment\data RBR, OSSI\copy RBR Udrive series1\QC\copy with t_full index.nc'
ncFilePath3 = r'O:\HybridDune experiment\data RBR, OSSI\copy RBR Udrive series1\QC\copy deflate4.nc'
ncFilePath4 = r'O:\HybridDune experiment\data RBR, OSSI\copy RBR Udrive series1\QC\copy with t_full deflate4.nc'
ncFilePath7 = r'O:\HybridDune experiment\data RBR, OSSI\copy RBR Udrive series1\QC\B7-2 with t_full, deflate4 var_coor.nc'

encoding3 = {var: {"zlib": True, "complevel": 4} for var in ds.data_vars}
encoding4 = {var: {"zlib": True, "complevel": 4} for var in ds_new.data_vars}
encoding7 = {var: {"zlib": True, "complevel": 4} for var in list(ds_new.data_vars) + list(ds_new.coords)}

#ds.to_netcdf(    ncFilePath1)
#ds_new.to_netcdf(ncFilePath2)
#ds.to_netcdf(    ncFilePath3, encoding=encoding3)
#ds_new.to_netcdf(ncFilePath4, encoding=encoding4)
ds_new.to_netcdf(ncFilePath7, encoding=encoding7)

nBursts = 1102
burstLength = 19200  # number of samples per burst
#ds_new['t_2d'] = (('t', 'N'), x.reshape((nBursts, burstLength)))      # relative pressure, pAir subtracted

# reshape variable t_2d to 1d vector, stacking dimension t and N to single dimension t_full
ds_new['p_1d'] = ds_new.p.stack(t_full=('t', 'N'))
ds_new.p_1d.attrs = ds_new.p.attrs
#ds_new.p2.name = 'p2'

# remove variable p from dateset ds_net
ds_new = ds_new.drop_vars('p')

# save reshaped
#ncFilePath5 = r'O:\HybridDune experiment\data RBR, OSSI\copy RBR Udrive series1\QC\copy with t_full reshape.nc'
ncFilePath6 = r'O:\HybridDune experiment\data RBR, OSSI\copy RBR Udrive series1\QC\copy with t_full reshape deflate4.nc'
encoding6 = {var: {"zlib": True, "complevel": 4} for var in ds_new.data_vars}
# ds_new.to_netcdf(ncFilePath5)
# ds_new.to_netcdf(ncFilePath6, encoding=encoding6)


In [None]:
# # DAAN: REMOVE FILTERING WITH OWN VERSION !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

def remove_outliers(data, threshold=3):
    mean = np.mean(data)
    std_dev = np.std(data)
    mask = np.abs(data - mean) < threshold * std_dev
    # filtered_data = np.where(mask, data, mean + threshold * std_dev * np.sign(data - mean))  # Replace outliers with closest value within threshold
    filtered_data = np.where(mask, data, np.nan) # Replace outliers with nan
    return filtered_data

# Create a lambda function to pass the threshold parameter
ufunc = lambda x: remove_outliers(x, threshold=3)

# Apply the outlier removal function to the pressure data
ds['p'] = xr.apply_ufunc(ufunc, 
                          ds['p'],
                          input_core_dims=[['N']],
                          output_core_dims=[['N']],
                          vectorize=True)

# Interpolate NaN values in the pressure signal
ds['p'] = ds['p'].interpolate_na(dim='N', method='linear')


ds.p.isel(t=111).plot()

In [6]:
# Print the initial number of NaN values
initial_nan_count = np.sum(np.isnan(ds_2D['p'].values))
print("Initial NaN count:", initial_nan_count)

# Interpolating NaN values using linear interpolation
#ds['p'] = ds['p'].interpolate_na(dim='N', method='linear')

# Handling any remaining NaN values
#ds['p'] = ds['p'].ffill(dim='N').bfill(dim='N')

# Print the number of NaN values after interpolation
#remaining_nan_count = np.sum(np.isnan(ds['p'].values))
#print("Remaining NaN count:", remaining_nan_count)

Initial NaN count: 0


In [None]:
### pressure filtering ####
ufunc = lambda x: puv.band_pass_filter2(ds0.sf.values, x, fmin=0.004, fmax=1.5)

ds_2D['p'] = xr.apply_ufunc(ufunc, 
                          ds_2D['p'],
                          input_core_dims=[['N']],
                          output_core_dims=[['N']],
                          vectorize=True)

# Compute water depth
ds_2D['h'] = (ds_2D['p'] / (rho * g) + (ds0['zi'] - ds0['zb'])).mean(dim='N')  # Mean water depth per burst, based on pressure, sensor height and bed level
ds_2D['h'].attrs = {'long_name': 'mean water depth', 'units': '[m]'} # avg per burst

In [None]:
# TEMPORARY: STUDY IMPORTANCE BED LEVEL
#    zRD_bedlevel =zb = -1.229         # m NAP
#    zRD_sensor = zi =  -0.704         # m NAP

# basis ---------------------------------------------------------------
zb_test = -1.23 # m NAP
zi_test = -0.7

# bed 20cm lower ---------------------------------------------------------------
zb_test = -1.43 # m NAP
zi_test = -0.9
ds_2D['h'] = (ds_2D['p'] / (rho * g) + (zi_test - zb_test)).mean(dim='N')  # Relative to bed level

ufunc = lambda x, h: puv.attenuate_signal(
    'pressure', 
    ds0.sf.values, x, h, 
    zi_test,
    zb_test,
    rho = rho,
    g = g,
    removeNoise=False,
    detrend=True)
fx, h = xr.apply_ufunc(ufunc, 
                              ds_2D['p'], ds_2D['h'],
                              input_core_dims=[['N'], []],
                              output_core_dims=[['f'], ['N']],
                              vectorize=True)
ds_2D['zs'] = h + zb_test  # water level (surface elevation) = bed level + depth
z1 = ds_2D['zs'].isel(t=531)  # select frequency 0.5 Hz, column 133

#ufunc = lambda p: welch(p, fs=ds0.sf.values, nperseg=nperseg, detrend='constant', window='hann') # Detrend: is per segment. 1min, about constant, so false used
#ds_2D['frequencies'], ds_2D['psd'] = xr.apply_ufunc(ufunc,
#                                                ds_2D['zs'],
#                                                input_core_dims=[['N']],
#                                                output_core_dims=[['f'], ['f']],
#                                                vectorize=True)

#ufunc = lambda psd: puv.compute_wave_params(ds_2D.f.values, psd, fmin=0.004 , fmax=5)   # DAAN: CHECK: WHY fmax=5? Different result than 1.5?
#ds_2D['Hm0'], ds_2D['Tp'], ds_2D['Tm01'], ds_2D['Tm02'], ds_2D['Tmm10'], ds_2D['Tps'] = xr.apply_ufunc(ufunc,
#                                                                          ds_2D['psd'],
#                                                                          input_core_dims=[['f']],
#                                                                          output_core_dims=[[], [], [], [], [], []],
#                                                                          vectorize=True)

# bed 20cm higher ---------------------------------------------------------------
zb_test = -1.03 # m NAP
zi_test = -0.5

ds_2D['h'] = (ds_2D['p'] / (rho * g) + (zi_test - zb_test)).mean(dim='N')  # Relative to bed level

ufunc = lambda x, h: puv.attenuate_signal(
    'pressure', 
    ds0.sf.values, x, h, 
    zi_test,
    zb_test,
    rho = rho,
    g = g,
    removeNoise=False,
    detrend=True)
fx, h = xr.apply_ufunc(ufunc, 
                              ds_2D['p'], ds_2D['h'],
                              input_core_dims=[['N'], []],
                              output_core_dims=[['f'], ['N']],
                              vectorize=True)
ds_2D['zs'] = h + zb_test  # water level (surface elevation) = bed level + depth
z3 = ds_2D['zs'].isel(t=531)  # select frequency 0.5 Hz, column 133

# Compare results ----------------------------------------------------
print( np.mean(z1))
print( np.mean(z3))

#print(z1[1])
#print(z3[1])
#print(ds_2D['Hm0'].isel(t=531))  # select frequency 0.5 Hz, column 133
#print(ds_2D['Hm0'].isel(t=531))  # select frequency 0.5 Hz, column 133

# plot ds_2D
z_dif = z1-z3
#z_dif.plot(label='zs at 0.5 Hz')
#plt.xlabel('Time')

# Conclusion: different bed levels give no difference in z_mean, but difference in z(t)


  Sw = np.cosh(k*h)/np.cosh(k*elev)
  Sw = np.cosh(k*h)/np.cosh(k*elev)


<xarray.DataArray 'zs' ()> Size: 8B
array(1.77398555)
<xarray.DataArray 'zs' ()> Size: 8B
array(2.17398555)


In [None]:
# Several wave statistics computations, only based on pressure, for full bandpass
# Threshold= 4    # DAAN: SKIP
ufunc = lambda x, h: puv.attenuate_signal(
    'pressure', 
    ds0.sf.values, x, h, 
    ds0.zi.values,
    ds0.zb.values,
    rho = rho,
    g = g,
    removeNoise=False,
    detrend=True)

fx, h = xr.apply_ufunc(ufunc, 
                              ds_2D['p'], ds_2D['h'],
                              input_core_dims=[['N'], []],
                              output_core_dims=[['f'], ['N']],
                              vectorize=True)
ds_2D['zs'] = ds0.zb.values + h # water level (surface elevation) = bed level + depth
ds_2D['zs'].attrs = {'units': 'm +NAP', 'long_name': 'surface elevation'}   # DAAN: CHECK: MORE METADATA NEEDED?

# ufunc = lambda zs: remove_outliers(zs, threshold=Threshold)   # DAAN: SKIP

# ds['zs'] = xr.apply_ufunc(ufunc, 
#                           ds['zs'],
#                           input_core_dims=[['N']],
#                           output_core_dims=[['N']],
#                           vectorize=True)

# # Interpolate NaN values in the pressure signal
# ds['zs'] = ds['zs'].interpolate_na(dim='N', method='linear')

# Drop values that are below bed level. 
# ds['zs'] = ds['zs'].where(ds['zs'] >= 0.0, drop=True)

ufunc = lambda p: welch(p, fs=ds0.sf.values, nperseg=nperseg, detrend='constant', window='hann') # Detrend: is per segment. 1min, about constant, so false used

ds_2D['frequencies'], ds_2D['psd'] = xr.apply_ufunc(ufunc,
                                                ds_2D['zs'],
                                                input_core_dims=[['N']],
                                                output_core_dims=[['f'], ['f']],
                                                vectorize=True)

ufunc = lambda psd: puv.compute_wave_params(ds_2D.f.values, psd, fmin=0.004 , fmax=5)   # DAAN: CHECK: WHY fmax=5? Different result than 1.5?

ds_2D['Hm0'], ds_2D['Tp'], ds_2D['Tm01'], ds_2D['Tm02'], ds_2D['Tmm10'], ds_2D['Tps'] = xr.apply_ufunc(ufunc,
                                                                          ds_2D['psd'],
                                                                          input_core_dims=[['f']],
                                                                          output_core_dims=[[], [], [], [], [], []],
                                                                          vectorize=True)

In [None]:
# # TIME-VARYING BEDLEVEL --------------------------------------------------------
# Define z_bed_obs and t_bed_obs for RBR6 -----------------
t_bed_obs = pd.to_datetime(['2024-12-17 10:30',
                            '2024-12-20 12:20',
                            '2024-12-23 12:00'])
t_block2 = pd.to_datetime(t_block)  # Convert to pandas datetime for consistency

z_bed_obs = np.array([-1.229, -1.103, -0.964] )  # TEMP EXAMPLE

# Interpolate z_bed_obs to z_bed_block for time t_block
z_bed_block = np.interp(t_block2, t_bed_obs, z_bed_obs)  # Interpolate bed level to block time vector
z_bed_block2 = np.reshape(z_bed_block,(len(z_bed_block),1)) # reshape to column vector for compatibility with p
z_bed_block3 = z_bed_block2.repeat(19200,axis=1)

# Calculate effect time-varying bedlevel on water depth ----------------------------------------
print(np.shape(ds_2D['p']))
print(np.shape(z_bed_block2))
print(np.shape(z_bed_block3))

#print(np.shape(ds_2D['z_bed_block']))
ds_2D['h'] = ( (ds_2D['p'] / rho / g) + ds0['zi'] - z_bed_block2 ).mean(dim='N') # Mean water depth per burst: h=p/rho/g + (z_i-z_b)

ufunc = lambda x, h, zb: puv.attenuate_signal(
    'pressure', 
    ds0.sf.values, x, h, 
    ds0.zi.values,
    zb, 
    rho = rho,
    g = g,
    removeNoise=False,
    detrend=True)

fx, h = xr.apply_ufunc(ufunc, 
                              ds_2D['p'], ds_2D['h'], z_bed_block,
                              input_core_dims=[['N'], [],[]],
                              output_core_dims=[['f'], ['N']],
                              vectorize=True)
ds_2D['zs'] = z_bed_block2 + h # water level (surface elevation) = bed level + depth

(438, 9600)
(438, 1)
(438, 19200)
<xarray.DataArray 'h' (t: 438)> Size: 4kB
array([0.67325847, 0.64387871, 0.60886407, 0.59656408, 0.6284327 ,
       0.66613148, 0.75689849, 0.8656114 , 0.98823653, 1.19220973,
       1.47550996, 1.8574465 , 2.21934196, 2.45503898, 2.5695246 ,
       2.57193093, 2.48148932, 2.40052476, 2.31656315, 2.24155846,
       2.12814885, 2.01761469, 1.88810426, 1.70769759, 1.49474718,
       1.31695377, 1.14076224, 0.97109879, 0.82766111, 0.77615699,
       0.73318488, 0.74938464, 0.75396073, 0.75989304, 0.72319393,
       0.64644241, 0.60209574, 0.55251325, 0.50580988, 0.48284198,
       0.46240305, 0.43320333, 0.43474024, 0.4337127 , 0.503797  ,
       0.54254058, 0.66374661, 0.83511156, 1.11252689, 1.47528213,
       1.75998795, 1.95991412, 2.01778279, 1.96411902, 1.87134399,
       1.77130097, 1.6633274 , 1.52970974, 1.38804032, 1.23956063,
       1.05435012, 0.87797699, 0.72447308, 0.56461696, 0.44915381,
       0.3674024 , 0.33565933, 0.34259458, 0.3804389 

In [None]:
## low frequencies: IG waves ##
## now first filter bandpass, then compute wave_params

ufunc = lambda x: puv.band_pass_filter2(ds.sf.values, x, fmin=0.004, fmax=0.05)

ds['p_low'] = xr.apply_ufunc(ufunc, 
                          ds['p'],
                          input_core_dims=[['N']],
                          output_core_dims=[['N']],
                          vectorize=True)

ufunc = lambda x, h: puv.attenuate_signal(
    'pressure', 
    ds.sf.values, x, h, 
    ds.zi.values,
    ds.zb.values,
    rho=1027,
    g=9.8125,
    removeNoise=False,
    detrend=True)

fx, ds['zs_low'] = xr.apply_ufunc(ufunc, 
                              ds['p_low'], ds['h'],
                              input_core_dims=[['N'], []],
                              output_core_dims=[['f'], ['N']],
                              vectorize=True)

# ufunc = lambda zs: remove_outliers(zs, threshold=Threshold)

# ds['zs_low'] = xr.apply_ufunc(ufunc, 
#                           ds['zs_low'],
#                           input_core_dims=[['N']],
#                           output_core_dims=[['N']],
#                           vectorize=True)

# ds['zs_low'] = ds['zs_low'].interpolate_na(dim='N', method='linear')

# ds['zs_low'] = ds['zs_low'].where(ds['zs_low'] >= 0.0, drop=True)

ds['zs_low'].attrs = {'units': 'm', 'long_name': 'surface elevation low freq.'}


ufunc = lambda p: welch(p, fs=ds.sf.values, nperseg=nperseg, detrend='constant', window='hann')

ds['frequencies_low'], ds['psd_low'] = xr.apply_ufunc(ufunc,
                                                ds['zs_low'],
                                                input_core_dims=[['N']],
                                                output_core_dims=[['f'], ['f']],
                                                vectorize=True)

ufunc = lambda psd: puv.compute_wave_params(ds.f.values, psd, fmin=0.004 , fmax=0.05)

ds['Hm0_low'], ds['Tp_low'], ds['Tm01_low'], ds['Tm02_low'], ds['Tmm10_low'], ds['Tps_low'] = xr.apply_ufunc(ufunc,
                                                                          ds['psd_low'],
                                                                          input_core_dims=[['f']],
                                                                          output_core_dims=[[], [], [], [], [], []],
                                                                          vectorize=True)




In [None]:
## High frequencies: wind-waves ##
## now first filter bandpass, then compute wave_params

ufunc = lambda x: puv.band_pass_filter2(ds.sf.values, x, fmin=0.05, fmax=5)

ds['p_high'] = xr.apply_ufunc(ufunc, 
                          ds['p'],
                          input_core_dims=[['N']],
                          output_core_dims=[['N']],
                          vectorize=True)

ufunc = lambda x, h: puv.attenuate_signal(
    'pressure', 
    ds.sf.values, x, h, 
    ds.zi.values,
    ds.zb.values,
    rho=1027,
    g=9.8125,
    removeNoise=False,
    detrend=True)

t, ds['zs_high'] = xr.apply_ufunc(ufunc, 
                              ds['p_high'], ds['h'],
                              input_core_dims=[['N'], []],
                              output_core_dims=[['f'], ['N']],
                              vectorize=True)

ufunc = lambda zs: remove_outliers(zs, threshold=Threshold)

# ds['zs_high'] = xr.apply_ufunc(ufunc, 
#                           ds['zs_high'],
#                           input_core_dims=[['N']],
#                           output_core_dims=[['N']],
#                           vectorize=True)

# ds['zs_high'] = ds['zs_high'].interpolate_na(dim='N', method='linear')

# ds['zs_high'] = ds['zs_high'].where(ds['zs_high'] >= 0.0, drop=True)

ds['zs_high'].attrs = {'units': 'm', 'long_name': 'surface elevation high freq.'}



ufunc = lambda p: welch(p, fs=ds.sf.values, nperseg=nperseg, detrend='constant', window='hann')

ds['frequencies_high'], ds['psd_high'] = xr.apply_ufunc(ufunc,
                                                ds['zs_high'],
                                                input_core_dims=[['N']],
                                                output_core_dims=[['f'], ['f']],
                                                vectorize=True)

ufunc = lambda psd: puv.compute_wave_params(ds.f.values, psd, fmin=0.05 , fmax=5)

ds['Hm0_high'], ds['Tp_high'], ds['Tm01_high'], ds['Tm02_high'], ds['Tmm10_high'], ds['Tps_high'] = xr.apply_ufunc(ufunc,
                                                                          ds['psd_high'],
                                                                          input_core_dims=[['f']],
                                                                          output_core_dims=[[], [], [], [], [], []],
                                                                          vectorize=True)


In [None]:
## skewness of waves ##
ufunc = lambda p: puv.compute_SkAs(ds.sf.values,p,fpfac =None, fbounds = None)   

ds['Sk'], ds['As'], ds['sig'] =  xr.apply_ufunc(ufunc,
                                                ds['p'], 
                                                input_core_dims=[['N']],
                                                output_core_dims=[[], [], []],
                                                vectorize=True)


In [None]:
# Flatten all variables with dimension 'N'
# for var in ds.data_vars:
#     if 'N' in ds[var].dims:
#         ds[var] = ds[var].stack(t_N=('t', 'N'))

# %% write to file
# we strip all information on burst scale from the dataset to reduce size (and this info is already present in the raw_netcdf version of the data)
# dsTailored = ds.drop_dims('N')
dsTailored = ds


if not os.path.isdir(os.path.join(experimentFolder,'tailored')):
    os.mkdir(os.path.join(experimentFolder,'tailored'))
ncFilePath = os.path.join(experimentFolder, 'tailored', ds0.instrument + '.nc')
dsTailored.to_netcdf(ncFilePath)



In [None]:
# Save with compression
ncFilePath = os.path.join(experimentFolder, 'tailored', ds0.instrument + 'deflate4.nc')
encoding = {var: {"zlib": True, "complevel": 6} for var in dsTailored.data_vars}
dsTailored.to_netcdf(ncFilePath, encoding=encoding)

In [None]:
# Reshape XArray ds

# Make full time vector
t0 = datetime(2024, 12, 12, 9, 0, 0)
t_end = datetime(2024, 12, 27, 16, 20, 0)
t_full   = pd.date_range(t0, t_end, freq='{}s'.format(1 / 16)) # 16hz time vector

# Add t_full to dsTailered as additional dimension
dsTailored = dsTailored.assign_coords(t_full=t_full)
dsTailored = dsTailored.expand_dims('t_full')

In [None]:
# Create a continuous time array
t_continuous = np.array([t + np.timedelta64(int(n), 's') for t in ds.t.values for n in ds.N.values])

# Flatten the burst structure for plotting
zs_flat = ds.zs.values.flatten()
zs_low_flat = ds.zs_low.values.flatten()
zs_high_flat = ds.zs_high.values.flatten()


In [None]:
# Plotting
plt.figure(figsize=(15, 8))

plt.plot(t_continuous, zs_flat, label='zs', alpha=0.7, linewidth=0.9)
plt.plot(t_continuous, zs_high_flat, label='zs_high', alpha=0.7, linewidth=0.9)
plt.plot(t_continuous, zs_low_flat, label='zs_low', alpha=0.7, linewidth=0.9)

ds.h.plot(label='Waterlevel')



# give values below y=0 a different color
# plt.fill_between(t_continuous, zs_flat, 0, where=zs_flat < 0, color='grey', alpha=1, zorder=4, linewidth=1.5)
# plt.fill_between(t_continuous, zs_low_flat, 0, where=zs_low_flat < 0, color='grey', alpha=1, zorder=4, linewidth=1.5)
# plt.fill_between(t_continuous, zs_high_flat, 0, where=zs_high_flat < 0, color='grey', alpha=1, zorder=4, linewidth=1.5)
plt.axhline(y=0, color='k', linestyle='--', label='Bed level')

# Set x-axis limits using specific date stamps
start_time = pd.Timestamp("2024-12-18T12:00:00")
end_time = pd.Timestamp("2024-12-21T00:00:00")
# plt.xlim(start_time, end_time)

plt.axhline((ds.zi-ds.zb).values, color='k', linestyle='--', label='Instrument height above bed')

plt.xlabel('Time')
plt.ylabel('Surface Elevation (m)')
plt.title('Surface Elevation vs Time RBR06 2024-12-16 to 2024-12-27')
plt.legend(loc='upper right')
plt.grid(True)
plt.show()

In [None]:
# plot hmo 
plt.figure(figsize=(15, 8))
ds.Hm0.plot(label='Hm0')
ds.Hm0_low.plot(label=f'Hm0_low (mean={ds.Hm0_low.mean().values:.3g} m)')

ds.Hm0_high.plot(label='Hm0_high')
plt.legend()

plt.xlabel('Time')
plt.ylabel('Hm0 (m)')
plt.title('Hm0 vs Time')
plt.grid(True)

# Set x-axis limits using specific date stamps
start_time = pd.Timestamp("2024-12-18T12:00:00")
end_time = pd.Timestamp("2024-12-21T00:00:00")
# plt.xlim(start_time, end_time)

plt.show()


In [None]:
# plot hs and h
plt.figure(figsize=(16, 3))
ds.h.plot(label='Waterlevel')
# (ds.h + ds.zb).plot(label='Waterlevel + zb (NAP)')

# Interpolate between points
ds.Hm0.plot(label=f'Hm0 (mean={ds.Hm0.mean().values:.3g} m)')
ds.Hm0_low.plot(label=f'Hm0_low (mean={ds.Hm0_low.mean().values:.3g} m)')

# ds.Hm0_high.plot(label='Hm0_high')
plt.legend()

plt.xlabel('Time')
plt.ylabel('(m)')
plt.title('Hm0 vs Time')
plt.grid(True)

# Set x-axis limits using specific date stamps
start_time = pd.Timestamp("2024-12-21T16:00:00")
end_time = pd.Timestamp("2024-12-23T13:00:00")
# plt.xlim(start_time, end_time)

plt.show()

In [None]:
t = 111
end =120

plt.figure(figsize=(16, 2))
(ds.zs.isel(t=t)-ds.zs.isel(t=t).mean()).plot(label = f'Water elevation; Hm0: {ds.Hm0.isel(t=t).values:.3g}m')
# (ds.zs.isel(t=t)-ds.h.isel(t=t).mean()).plot(label = f'Water elevation; Hm0: {ds.Hm0.isel(t=t).values:.3g}m')
plt.grid()
plt.xlim(0,end)
plt.ylabel(f'\u03B7 [m]')
plt.legend(loc='upper right')

plt.figure(figsize=(16, 2))
ds.zs.isel(t=t).plot(color='grey', label= 'total elevation')
ds.zs_low.isel(t=t).plot(color='green', label = f'infragravity waves; Hm0: {ds.Hm0_low.isel(t=t).values:.3g}m')
plt.grid()
plt.xlim(0,end)
plt.legend(loc='upper right')

plt.figure(figsize=(16, 2))
ds.zs.isel(t=t).plot(color='grey', label= 'total elevation')
ds.zs_high.isel(t=t).plot(color='red', label = f'wind waves; Hm0: {ds.Hm0_high.isel(t=t).values:.3g}m')

plt.grid()
plt.xlim(0,end)
plt.legend(loc='upper right');

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Function to remove outliers
# def remove_outliers(data, threshold=3):
#     mean = np.mean(data)
#     std_dev = np.std(data)
#     filtered_data = [x if abs(x - mean) < threshold * std_dev else mean for x in data]
#     return np.array(filtered_data)

def remove_outliers(data, threshold=3):
    mean = np.mean(data)
    std_dev = np.std(data)
    mask = np.abs(data - mean) < threshold * std_dev
    # filtered_data = np.where(mask, data, mean + threshold * std_dev * np.sign(data - mean))  # Replace outliers with closest value within threshold
    filtered_data = np.where(mask, data, np.nan) # Replace outliers with nan
    return filtered_data


data = ds.zs.isel(t=t).values  # Extract the data values for the selected time index t

# Remove outliers from the data
cleaned_data = remove_outliers(data)

# Interpolate NaN values in the pressure signal
# cleaned_data = cleaned_data.interpolate_na(method='linear')

# Calculate the mean of the cleaned data
mean_cleaned_data = np.mean(cleaned_data)

# Plotting the cleaned data
plt.figure(figsize=(16, 2))
plt.plot(ds.N, cleaned_data - mean_cleaned_data, label=f'Water elevation; Hm0: {ds.Hm0.isel(t=t).values:.3g}m')
plt.grid()
plt.xlim(0, 200)
plt.ylabel(f'\u03B7 [m]')
plt.legend(loc='upper right')
plt.show()

print(len(cleaned_data)/16)

In [None]:
from scipy.stats import chi2
t=111
freq = ds.frequencies.isel(t=t)
psd = ds.psd.isel(t=t)


# Calculate confidence intervals
nBlocks = len(ds.zs.isel(t=t)) // nperseg  # Estimate the number of blocks used by Welch's method
edf = round(nBlocks * 2)     # Degrees of freedom (approximately 2 per segment)
alpha = 0.1                  # 90% confidence level

confLow = edf / chi2.ppf(1 - alpha / 2, edf)  # Lower confidence limit
confUpper = edf / chi2.ppf(alpha / 2, edf)    # Upper confidence limit

# Confidence interval bounds for PSD
psd_lower = psd * confLow
psd_upper = psd * confUpper



### background plot 
freq_bg, psd_bg = welch(ds.zs.isel(t=t).values, fs=16, nperseg=19200, detrend='constant', window='hann')
nBlocks_bg = len(ds.zs.isel(t=t).values) // 19200  # Estimate the number of blocks used by Welch's method
edf_bg = round(nBlocks * 2)     # Degrees of freedom (approximately 2 per segment)
confLow_bg = edf / chi2.ppf(1 - alpha / 2, edf)  # Lower confidence limit
confUpper_bg = edf / chi2.ppf(alpha / 2, edf)    # Upper confidence limit
psd_lower_bg = psd_bg * confLow_bg
psd_upper_bg = psd_bg * confUpper_bg
# plt.plot(freq_bg, psd_upper_bg, color='gray', linestyle= '--')
plt.fill_between(freq_bg, psd_lower_bg, psd_upper_bg, color='gray', alpha=0.3, label='Raw spectrum', linestyle='--')

### plotting
plt.fill_between(freq, psd_lower, psd_upper, color='black', alpha=0.3, label='90% Confidence Interval')
plt.semilogy(freq, psd, label='Power Density Spectrum')

plt.xlabel('Frequency (Hz)')
plt.ylabel('PSD (m²/Hz)')
plt.title(f'Power Spectral Density using Welch\'s Method for t ={t}')
plt.grid()
plt.yscale('linear')
# plt.xscale('log')
plt.xscale('linear')
plt.xlim(0,0.6)
plt.legend()
plt.show()

In [None]:
def find_time_index(ds, specific_time):
    """
    Function to find the index 't' for a specific time in the dataset.
    
    Parameters:
    ds (xarray.Dataset): The dataset containing the time dimension.
    specific_time (str or datetime): The specific time to find the index for.
    
    Returns:
    int: The index corresponding to the specific time.
    """
    specific_time = np.datetime64(specific_time)
    time_index = np.where(ds.t.values == specific_time)[0]
    
    if len(time_index) == 0:
        raise ValueError(f"Time {specific_time} not found in the dataset.")
    
    return time_index[0]

# Example usage
specific_time = "2024-12-20T12:00:00"
t_index = find_time_index(ds, specific_time)
print(f"The index for the specific time {specific_time} is {t_index}.")