In [2]:
# based on OSSI_04_Final_scripts from Ruben
import os
import numpy as np
import pandas as pd
from datetime import datetime, timedelta


import xarray as xr
import matplotlib.pyplot as plt
import sys
#sys.path.append(r'C:\Users\ruro\OneDrive - Boskalis\Documents\python\Hybrid_Dune\Pressure_sensors\S1\OSSI_04')
from scipy.signal import welch
from scipy.ndimage import uniform_filter1d


#import puv 

In [3]:
def ossi_data_reader(datafolder):
    '''
    author: Paul van Wiechen

    Function to read all WLOG_XXX files in a certain subfolder.
    Make sure that only WLOG_XXX files are in this folder and no other files.
    Only WLOG_XXX files with minimally 2 rows are appended to the dataframe.
    A correct WLOG_XXX file should contain a first line with OSSI configuration, and a second line (third row) with starting time
    Timestep and sampling frequency are retrieved from the first row. Starting time from the next row
    Returns a dataframe with a time column and pressure column in dbars
    '''

    ossi = pd.DataFrame({
        't': [],
        'p': []})

    directory = str(datafolder)

    for filename in os.listdir(directory):
        f = os.path.join(directory, filename)

        # checking if it is a file
        if os.path.isfile(f):
            print('Currently concatenating file ' + f)
            ossi_raw = pd.read_csv(f, header=None, nrows=4, sep=',')
            if len(ossi_raw.index) > 2:
                t_0 = datetime(int(str(20) + ossi_raw[0][1][1:]), int(ossi_raw[1][1][1:]), int(ossi_raw[2][1][1:]),
                               int(ossi_raw[3][1][1:]), int(ossi_raw[4][1][1:]), int(ossi_raw[5][1][1:]))
                dt = 1 / float(ossi_raw[6][0][1:])
                ossi_tot = pd.read_csv(f, skiprows=3, usecols=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], header=None,
                                       sep=',', skipinitialspace=True).to_numpy().flatten()
                ossi_temp = pd.DataFrame({
                    't': np.array([t_0 + timedelta(seconds=dt * i) for i in range(len(ossi_tot))]),
                    'p': ossi_tot})

                ossi_temp.dropna(inplace=True)
                ossi_temp['p'] = ossi_temp['p'] * 1e5  # Bar to Pa

                ossi = pd.concat([ossi, ossi_temp], ignore_index=True)

    ossi['p'] = pd.to_numeric(ossi['p'])
    ossi['t'] = pd.to_datetime(ossi['t'])

    return ossi.set_index('t')

In [4]:
def movmean(x, N):
    # calculate moving mean. NB: this divides values at the edges by the window length, instead of the available number of values
    y = uniform_filter1d(x, size=N, mode='constant') # for even window: backward avg. So window 2: x_m(i)=[x(i-1)+x(i)]/2. x_m(i=1) = x(i=1)/2

    # compensate edges for number of values, i.e. the truncated window length
    S1 = np.arange(np.ceil(N/2), N)
    S2 = np.ones(len(x)-N+1)*N
    S3 = np.arange(N-1, np.floor(N/2), -1)
    S = np.concatenate((S1, S2, S3)) 
    return y * N / S
    # for 2D inspiration, see https://stackoverflow.com/questions/23000260/numpy-two-dimensional-moving-average

In [5]:
# # LOAD RAW DATA, SAVE TO NETCDF
# Physical constants -----------------------------------------------------------------------------------
rho = 1027  # kg/m3, for seawater at 9C, avg temp at HKZ measurement station
g = 9.8125  # value Zandmotor

# # input parameters per file ----------------------------------
subfolder_in_all = [ 'OSSI 8', 'OSSI 1', 'OSSI 2', 'OSSI 3', 'OSSI 4', 'OSSI 5', 'OSSI 6', 'OSSI 7'] # subfolder where file is stored within experimentFolder

instrumentname_all = [ 'refP2 Ossi8', 'S2P2 Ossi1', 'S3P2 Ossi2', 'S4P2 Ossi3',  'S1P1 Ossi4', 'S2P1 Ossi5', 'S3P1 Ossi6', 'S4P1 Ossi7' ]
offset_all         = [         93757,        84717,        99188,        87040,        96189,        93167,        83848,       94741 ]; # [Pa] offset to be added to the pressure data, for instrument calibration
xRD_all            = [        np.nan,    72428.624,    72412.935,    72398.642,	   72478.510,    72462.613,    72446.111,	72431.977 ]              # x position of placement in field (mean of measured during period 1 and 2, such that RBRs period 2 get the same coordinates)
yRD_all            = [        np.nan,   452140.147,   452119.047,   452098.976,   452138.638,   452116.841,   452094.373,  452074.641 ]              # y position of placement in field
zi_all             = [        np.nan,        0.235,        0.238,        0.403,        1.321,	     1.411,	       1.295,       1.326 ]              # z position of instrument (sensor) in field
serial_number_all  = [  'TUD OSSI 8', 'TUD OSSI 1', 'TUD OSSI 2', 'TUD OSSI 3', 'TUD OSSI 4', 'TUD OSSI 5', 'TUD OSSI 6', 'TUD OSSI 7']              # unique serial number of the instrument
t_cal_factor_all   = [     1.0000449,    1.0000429,   1.00003464,    1.0000259,    1.0000329,    1.0000414,    1.0000365, 1.0000250   ]              # Factor to correct for clock drift
ossi_resolution    = [            10,            1,            1,            1,           10,           10,            1,          10 ]              # resolution of OSSI in Pa
# The higest row of instruments had the bedlevel measured more often, so they're treated separately here
zb_i0to3 = np.array([ [np.nan, np.nan, np.nan], # i0=Ossi8
                      [-0.023,	-0.209,	-0.009],
                      [-0.102,	-0.071,	-0.119],
                      [-0.098,	-0.216,	-0.182]  ])  
t_zb_i0to3 = pd.to_datetime(['2024-12-17 10:30',  
                           '2024-12-20 12:20',
                           '2024-12-23 11:30'])

zb_i4to7 = np.array([ [0.929, 0.810, 0.761, 0.687], # i4=Ossi4
                      [0.884, 0.798, 0.779, 0.685],
                      [0.893, 0.804, 0.778, 0.678],  
                      [0.915, 0.784, 0.749, 0.607]])  
t_zb_i4to7 = pd.to_datetime(['2024-12-17 10:30',  
                           '2024-12-20 12:20',
                           '2024-12-22 14:00',
                           '2024-12-23 11:30'])

# convert RD coordinates to local coordinates
xy_RD = np.array([xRD_all, yRD_all]).T
a = np.deg2rad(36)
transformation_matrix = np.array([ [np.cos(a), np.sin(a)],[-np.sin(a), np.cos(a)] ])
xy_loc = ( xy_RD - [71683.584, 452356.055] ) @ transformation_matrix
x_loc_all = xy_loc.T[0]
y_loc_all = xy_loc.T[1]
t_installed = pd.to_datetime(['2024-12-16 14:00'])
t_removed = pd.to_datetime(['2024-12-23 11:00'])
offset_rbr_ref = -543

# # input parameters general ---------------------------------------------
experimentFolder = r'O:\HybridDune experiment\data RBR, OSSI\Ossi data'                   # main folder, where subfolders per OSSI etc are stored
sf = 20

# # Define input parameters for file i   (start loop) -----------------------
for i in [0]: #range(0,8):
    subfolder_in = subfolder_in_all[i]
    instrumentName = instrumentname_all[i]                                                              # designated name of the instrument
    xRD = xRD_all[i]                                                                                    # x position of placement in field
    yRD =  yRD_all[i]                                                                                   # y position of placement in field
    x_loc = x_loc_all[i]                                                                                # x position of instrument in local coordinate system [m]
    y_loc = y_loc_all[i]                                                                                # y position of instrument in local coordinate system [m]
    t_cal_factor = t_cal_factor_all[i]
    if i >= 1: # instrument 0 is reference sensor, instrument and bed height only applicable for sensors installed at beach
        zi = zi_all[i]                          
        t_zi =  pd.to_datetime('2024-12-17 10:30')
    if i >= 4:
        zb = zb_i4to7[i-4] 
        t_zb = t_zb_i4to7 
    elif i>= 1:
        zb = zb_i0to3[i] 
        t_zb = t_zb_i0to3

    # UNCOMMENT SECTION TO LOAD RAW DATA
    # # Do the reading from file and cast in xarray dataset ----------------
    # dfp = ossi_data_reader(os.path.join(experimentFolder,subfolder_in))                                                #specific map for data OSSI
    # ds = dfp.to_xarray()
    # #ds['p'] = ds.p.astype('int32')  #Code ruben. Daan: check effect. 
    # #ds.p.attrs = {'long_name': 'pressure', 'units': 'Pa'}

    # # add dimension t_zb and variables zb, zi and t_zi, for bed level observations over time and instrument height
    # if i >= 1: # instrument 0 is reference sensor, instrument and bed height only applicable for the remaining sensors
    #     ds['t_zb'] = t_zb                                    # t_zb is a vector instead of scalar, so this syntax adds a dimension (instead of variable)
    #     ds['zb'] = ('t_zb',zb)                               # initial bed level [m NAP]
    #     ds['t_zi'] = t_zi                                    # time that zi was measured
    #     ds['zi'] = zi                                        # instrument height [m NAP]

    # # Add instrument variables for metadata: location, frequency
    # ds['x_RD'] = xRD                                         # x position of instrument, in RDNAP coordinates [m]
    # ds['y_RD'] = yRD                                         # y position of instrument, in RDNAP coordinates [m]
    # ds['x_local'] = x_loc                                    # x position of instrument, in local coordinate system [m]
    # ds['y_local'] = y_loc                                    # y position of instrument, in local coordinate system [m] 
    # ds['sf'] = sf                                            # sampling frequency [hz]
    # ds['t_cal_factor'] = t_cal_factor                        # factor to use to synchronize time
    # ds['t_installed'] = t_installed                          # time that the instrument was installed at the indicated height and location at the beach
    # ds['t_removed'] = t_removed                              # time that the instrument was removed

    # # Add global attribute metadata
    # ds.attrs = {
    #     'Conventions': 'CF-1.6',
    #     'name': 'Pressure sensor ' + instrumentName[0:-4] + 'Ossi, period 1',  # Daan: check if OSssi is already in instrumentName, prevent double
    #     'instrument': 'Pressure sensor ' + instrumentName,
    #     'instrument type': 'OSSI',
    #     'instrument serial number': '{}'.format(serial_number_all[i]),
    #     'epsg': 28992,
    #     'time zone': 'UTC+1',
    #     'start time': pd.to_datetime(ds.t.values[0]).strftime("%d-%b-%Y %H:%M:%S"),
    #     'end time':   pd.to_datetime(ds.t.values[-1]).strftime("%d-%b-%Y %H:%M:%S"),
    #     'summary': 'HybridDune experiment: raw pressure data',
    #     'contact person': 'Daan Poppema',
    #     'emailadres': 'd.w.poppema@tudelft.nl',
    #     'construction datetime': datetime.now().strftime("%d-%b-%Y %H:%M:%S"),
    #     'version': 'v1',
    #     'comment_1': 'constructed with xarray',
    #     'url of online dataset': 'ADD LATER'}      # DAAN: ADD URL LATER

    # # Add attributes to variables for metadata
    # local_coord_sys = 'x=cross-shore (positive=landward); y=alongshore (positive is to north-east); (800,200) is the southern seaward corner of the containers'
    # coord_conv   = '(0,0) local is (71683.584,452356.055) RD coordinates; local x-axis is 36° clockwise from RD x-axis; i.e. [x_loc y_loc] = [x_RD y_RD] - [x0 y0] .* [cosd(36) sind(36); -sind(36) cosd(36)]'
    # cal_text = 'Add ' + '{}'.format(offset_all[i]) + ' Pa to raw pressure to obtain absolute pressure. Calibration factor based on the period 23dec, 19:11 to 19:26 (during the calibration test)'
    # ds.p.attrs = {'units': 'Pa', 'long_name': 'pressure', 'comments': 'raw data', 'calibration': cal_text}
    # ds.x_RD.attrs = {'units': 'm', 'long_name': 'x position of instrument in RDNAP coordinates', 'epsg': 28992}
    # ds.y_RD.attrs = {'units': 'm', 'long_name': 'y position of instrument in RDNAP coordinates', 'epsg': 28992}
    # ds.x_local.attrs = {'units': 'm', 'long_name': 'cross-shore position of instrument in local coordinate system','local_coordinate_system': local_coord_sys, 'coordinate_conversion': coord_conv}
    # ds.y_local.attrs = {'units': 'm', 'long_name': 'alongshore position of instrument in local coordinate system','local_coordinate_system': local_coord_sys, 'coordinate_conversion': coord_conv}
    # ds.sf.attrs = {'units': 'Hz', 'long_name': 'sampling frequency'}
    # ds.t_cal_factor.attrs = {'long_name': 'time dillation factor to correct for clock drift', 'comment': 'to apply, multiply time since instrument start (time series start) with t_cal_factor. Not yet applied, the time series contains the raw data'}
    # ds['t_installed'].attrs = {'long name': 'date and time that the instrument was installed at the indicated height and location at the beach'}
    # ds['t_removed'].attrs = {'long name': 'date and time that the instrument was removed'}

    # if i >= 1: # instrument 0 is reference sensor, instrument and bed height only applicable for the remaining sensors
    #     ds.zi.attrs = {'units': 'm +NAP', 'long_name': 'elevation of sensor'}  # instrument height
    #     ds.zb.attrs = {'units': 'm +NAP', 'long_name': 'bed level'}  
    #     ds.t_zi.attrs = {'long name': 'time that instrument elevation was measured'}
    #     ds.t_zb.attrs = {'long name': 'time that bed level at instrument was measured'}

    # # Save to netcdf -------------------------------
    # encoding = {var: {"zlib": True, "complevel": 4} for var in list(ds.data_vars) + list(ds.coords)}  # Apply deflate compression to all variables and coordinates in netCDF
    # ds.encoding = encoding  # add the encoding to the dataset (not really necessary, but allows retrieval later on)

    # ncOutDir = os.path.join(experimentFolder, 'raw NetCDF')
    # if not os.path.isdir(ncOutDir):
    #     os.mkdir(ncOutDir)
    # ds.to_netcdf(os.path.join(ncOutDir, 'Pressure sensor ' + instrumentName + ' raw data - period 1.nc'), encoding=encoding)

In [6]:
# # CALCULATE ATMOSPHERIC AIR PRESSURE, ...
# Set smoothing window for atmpospheric pressure
t_smooth_air = 10 # [s]     # measured with 8 hz. But p_water and p_air are measured up to 100m apart (and p_air inside). Affected different by wind gusts, so filter out short-term variation

# Open raw data file  of reference sensor -------------------------------------------------------------------
dataFile =r'O:\HybridDune experiment\data RBR, OSSI\copy RBR Udrive series1\raw NetCDF\Pressure sensor refP1 RBR4 raw data - period 1.nc'
ds = xr.open_dataset(dataFile)

# Calibrate referense sensor
ds['p'] = ds['p'] + offset_rbr_ref  # add offset to pressure data, for instrument calibration

# Determine moving average. 
pAir_smooth_8hz = movmean(ds['p'], 8*t_smooth_air) # smooth over 8hz * n seconds

# interpolate to 20hz 
tAir_20hz   = pd.date_range(ds.t.values[0], ds.t.values[-1] , freq='{}s'.format(1 / 20)) # 20z time vector
pAir_smooth_20hz = np.interp(tAir_20hz, ds['t'], pAir_smooth_8hz) # interpolate to 20hz time vector

# Add to dataset
ds_air_20hz = xr.Dataset( # make dataset for 20hz air pressure
    data_vars={'pAir_smooth': (('t',), pAir_smooth_20hz)},
    coords={'t': tAir_20hz} )

In [7]:
# Instrument to be corrected, filtered

for i in range(1,8):
    instrumentName = instrumentname_all[i]                                                                               # designated name of the instrument
    dataFile = os.path.join(experimentFolder,'raw NetCDF', 'Pressure sensor ' + instrumentName + ' raw data - period 1.nc')
    print(dataFile)
    ds0 = xr.open_dataset(dataFile)
    ds0 = ds0.rename({'p': 'p_abs'})   # rename p to p_abs

    # Correct clock drift
    t = ds0.t.values                    # time
    dt = t - t[0]                       # time since instrument start
    t = t[0] + dt * ds0.t_cal_factor.values
    ds_clockdrift = ds0.assign_coords(t=t)        # assign corrected t to database

    # Interpolate pressure/dataset from drifted clock to exactly 20hz, so that datasets with the same coordinates can be subtracted
    t0 = ds.t_installed.values                                      # first full hour that instrument was installed at the beach
    t_end = ds.t_removed.values                                     # last full hour that instrument was installed at the beach
    t_20hz   = pd.date_range(t0, t_end, freq='{}s'.format(1 / 20)) # 20hz time vector
    P_exact20hz = np.interp(t_20hz, ds_clockdrift.t, ds0.p_abs.values)  # interpolate to 20hz time vector
        
    # crop air pressure and instrument dataset to the same time
    ds_air_20hz_crop = ds_air_20hz.sel(t=slice(t0, t_end))          # crop dataset to the time range of interest
    ds0 = ds0.sel(t=slice(t0, t_end))                               # crop dataset to the time range of interest
    
    # Relative pressure: correct the pressure signal with pAir
    ds0['p_rel'] = P_exact20hz + offset_all[i] - ds_air_20hz_crop['pAir_smooth'] # relative pressure. Add calibration offset per instrument (ds_air already calibrated). 

    # Correct negative pressures
    if i != 0: # only for non-reference sensors
        block_mask = ds0['p_rel'] < 0
        ds0['p_rel'] = ds0['p_rel'].where(~block_mask, 0)               # set negative pressures to zero
        
    # Add/replace instrument metadata
    cal_text = '{}'.format(offset_all[i]) + ' Pa added to raw pressure, based on the period 23dec, 19:11 to 19:26, during the calibration test'
    ds0['p_abs'].attrs = {'units': 'Pa', 'long_name': 'Relative pressure', 'comments': 'corrected for air pressure and calibrated','calibration': cal_text}
    cal_text = '{}'.format(offset_all[i]) + ' Pa added to raw pressure of instrument (and ' + '{}'.format(offset_rbr_ref) + ' Pa added to air pressure of sensor refP1), based on the period 23dec, 19:11 to 19:26, during the calibration test'
    ds0['p_rel'].attrs = {'units': 'Pa', 'long_name': 'Relative pressure', 'comments': 'corrected for air pressure and calibrated','calibration': cal_text}
    ds0.attrs['summary'] = 'Hybrid-Dune campaign: pressure corrected for air pressure'
    ds0.attrs['start time'] = pd.to_datetime(ds.t.values[0]).strftime("%d-%b-%Y %H:%M:%S")
    ds0.attrs['end time'] =   pd.to_datetime(ds.t.values[-1]).strftime("%d-%b-%Y %H:%M:%S")
    ds0.t_cal_factor.attrs = {'long_name': 'time dillation factor applied to correct for clock drift', 'comment': 'applied by multiplying time since instrument start (start of raw data series) with t_cal_factor.'}
    ds0.p_abs.attrs['resolution_raw_data'] = '{} Pa'.format(ossi_resolution[i])  # add resolution of OSSI as attribute
    ds0.p_rel.attrs['resolution_raw_data'] = '{} Pa'.format(ossi_resolution[i])

    # Save to netcdf -------------------------------
    ds0.p_abs.values = np.round(ds0.p_abs.values)    # Round pressure to 1 Pa = 0.1 mm
    ds0.p_rel.values = np.round(ds0.p_rel.values)    
    encoding = {var: {"zlib": True, "complevel": 4} for var in list(ds0.data_vars) + list(ds0.coords)}  # Apply deflate compression to all variables and coordinates in netCDF
    ds0.encoding = encoding  # add the encoding to the dataset (not really necessary, but allows retrieval later on)

    if not os.path.isdir(os.path.join(experimentFolder,'QC')):
        os.mkdir(os.path.join(experimentFolder,'QC'))
    ncFilePath = os.path.join(experimentFolder, 'QC', 'Pressure sensor ' + instrumentName + ' p_rel - period 1.nc')
    ds0.to_netcdf(ncFilePath, encoding=encoding) # save to netcdf

O:\HybridDune experiment\data RBR, OSSI\Ossi data\raw NetCDF\Pressure sensor S2P2 Ossi1 raw data - period 1.nc
O:\HybridDune experiment\data RBR, OSSI\Ossi data\raw NetCDF\Pressure sensor S3P2 Ossi2 raw data - period 1.nc
O:\HybridDune experiment\data RBR, OSSI\Ossi data\raw NetCDF\Pressure sensor S4P2 Ossi3 raw data - period 1.nc
O:\HybridDune experiment\data RBR, OSSI\Ossi data\raw NetCDF\Pressure sensor S1P1 Ossi4 raw data - period 1.nc
O:\HybridDune experiment\data RBR, OSSI\Ossi data\raw NetCDF\Pressure sensor S2P1 Ossi5 raw data - period 1.nc
O:\HybridDune experiment\data RBR, OSSI\Ossi data\raw NetCDF\Pressure sensor S3P1 Ossi6 raw data - period 1.nc
O:\HybridDune experiment\data RBR, OSSI\Ossi data\raw NetCDF\Pressure sensor S4P1 Ossi7 raw data - period 1.nc


In [8]:
ds0.t_cal_factor