In [1]:

import pandas as pd
import numpy as np
import time
import netCDF4 as nc

# NWA

In [2]:
obs_fldr = "raw_data/NWA/"
# Collect observation directories in list
obs_dir = [f"{obs_fldr}NWTideA.2022.06.QC_dataReturn.Rev0.nc"]

ds = nc.Dataset(obs_dir[0])
ds

<class 'netCDF4.Dataset'>
root group (NETCDF3_CLASSIC data model, file format NETCDF3):
    Conventions: MetOcean/v3
    client: RVO
    project: J3707
    location: NW A
    logging_system: mantacom
    history: Thu Jul 21 15:58:28 2022: ncatted -O -a height_ref,DepthHeight,o,c,LAT NWTideA.2022.06.QC_dataReturn.nc
2022-07-01 14:47:14 [CJC] moecat 42674 with args: NWTideA.2022.06.Standby.QC_dataReturn.nc NWTideA.2022.06.Main.QC_dataReturn.nc --output=NWTideA.2022.06.QC_dataReturn.nc
2022-07-04 16:09:11 [JTL] edited in moeqc
2022-07-05 09:09:07 [JTL] edited in moeqc
2022-07-05 10:15:44 [JTL] edited in moeqc
2022-07-05 14:03:25 [JTL] edited in moeqc
2022-07-05 14:15:11 [JTL] edited in moeqc
2022-07-05 14:27:10 [JTL] edited in moeqc
2022-07-07 17:28:00 [noreikas] edited in moeqc
2022-07-08 11:32:30 [noreikas] edited in moeqc
2022-07-08 11:33:06 [noreikas] edited in moeqc
Processed with ncsetscalar DepthHeight -29.1 by CJC on Thu Jul 21 16:37:32 2022
    df_name: NWTideA
    arrival_mechan

In [3]:
ds["Time"]

<class 'netCDF4.Variable'>
int32 Time(records)
    long_name: Time
    units: seconds since 1970-01-01T00:00:00+0000
unlimited dimensions: records
current shape = (4266,)
filling on, default _FillValue of -2147483647 used

In [4]:
obs_fldr = "raw_data/NWA/"
# Collect observation directories in list
obs_dir = [f"{obs_fldr}NWTideA.2022.06.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}NWTideA.2022.07.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}NWTideA.2022.08.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}NWTideA.2022.09.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}NWTideA.2022.10.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}NWTideA.2022.11.QC_dataReturn.Rev0.part1.nc",
           f"{obs_fldr}NWTideA.2022.11.QC_dataReturn.Rev0.part2.nc",
           f"{obs_fldr}NWTideA.2022.12.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}NWTideA.2023.01.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}J3707_11_NW-A1_D03.Tide.2023.02.QC_dataReturn.Rev1.nc",
           f"{obs_fldr}J3707_11_NW-A1_D03.Tide.2023.03.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}J3707_11_NW-A1_D03.Tide.2023.04.QC_dataReturn.Rev1.nc", 
           f"{obs_fldr}J3707_11_NW-A1_D03.Tide.2023.05.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}J3707_11_NW-A1_D03.Tide.2023.06.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}J3707_11_NW-A1_D03.Tide.2023.07.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}J3707_11_NW-A1_D03.Tide.2023.08.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}J3707_11_NW-A1_D03.Tide.2023.09.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}J3707_11_NW-A1_D03.Tide.2023.10.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}J3707_18_NW-A2_D04.Tide.2023.11.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}J3707_18_NW-A2_D04.Tide.2023.12.QC_dataReturn.Rev0.nc"]

all_tide_list = []
longitudes = []
latitudes = []
for o in obs_dir:
    ds = nc.Dataset(o)
    formatted_time = []
    for epoch_time in ds["Time"][:].data:
        formatted_time.append(time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(epoch_time)))

    data = ds["TideHeight"][:].data.tolist()
    if len(ds["Longitude"][:].data.shape) >0:
        lon = ds["RefLongitude"][:].data.item()
        lat = ds["RefLatitude"][:].data.item()
    else:
        lon = ds["Longitude"][:].data.item()
        lat = ds["Latitude"][:].data.item()
    longitudes.append(lon)
    latitudes.append(lat) 

    tide = pd.DataFrame({"tide": data},index= pd.DatetimeIndex(formatted_time))
    
    if "TideHeightQual" in ds.variables:
        quality_filter = ds.variables["TideHeightQual"][:].data<2 # Quality filter
        tide = tide[quality_filter]
    tide = tide.drop(tide[tide['tide'] < -100].index)# Remove invalid values
    tide["tide"] = tide["tide"]-np.mean(tide["tide"])# Center around zero
    all_tide_list.append(tide)

all_tide=pd.concat(all_tide_list)
# Replace missing entries with nan such that we have a full date time index
all_tide = all_tide.set_index(all_tide.index).resample('10min').sum().replace(0.00, np.nan) 


In [5]:
longitudes

[3.13108,
 3.13108,
 3.13108,
 3.13108,
 3.13108,
 3.130933,
 3.13108,
 3.1311,
 3.1311,
 3.1311,
 3.1311,
 3.140433,
 3.141667,
 3.141667,
 3.141667,
 3.141667,
 3.141667,
 3.141667,
 3.141667,
 3.141667]

In [6]:
latitudes

[53.379,
 53.379,
 53.379,
 53.379,
 53.379,
 53.379083,
 53.379,
 53.38793,
 53.38793,
 53.38793,
 53.38793,
 53.3800083,
 53.380278,
 53.380278,
 53.380278,
 53.380278,
 53.380278,
 53.380278,
 53.380278,
 53.380278]

In [7]:
# Calculate rolling mean value
all_tide_rollmean = np.round(all_tide.rolling(window='30min',closed="left").mean()[::3],decimals=4)
all_tide_rollmean = all_tide_rollmean.dropna()

In [8]:
all_tide_rollmean.index.name = "datetime_UTC"
all_tide_rollmean.columns = ["water_level"]
all_tide_rollmean

Unnamed: 0_level_0,water_level
datetime_UTC,Unnamed: 1_level_1
2022-06-01 00:30:00,-0.5643
2022-06-01 01:00:00,-0.6550
2022-06-01 01:30:00,-0.7199
2022-06-01 02:00:00,-0.6690
2022-06-01 02:30:00,-0.6384
...,...
2023-12-31 21:00:00,0.6983
2023-12-31 21:30:00,0.7428
2023-12-31 22:00:00,0.7753
2023-12-31 22:30:00,0.7262


In [9]:
all_tide_rollmean.to_csv(f"../observations/NWA_wl.csv")

In [10]:
locations_NWA = pd.DataFrame({"station": ["NWA"],
                              "longitude": [np.round(np.mean(longitudes),decimals=6)],
                              "latitude": [np.round(np.mean(latitudes),decimals=6)]})
# locations_NWA.to_csv("../../MIKE21HD_DutchCoast/data/observations/NWA_locations.csv")

In [11]:
locations_NWA

Unnamed: 0,station,longitude,latitude
0,NWA,3.135779,53.381352


# IJVA

In [12]:

obs_fldr = "raw_data/IJVA/"
# Collect observation directories in list
obs_dir = [f"{obs_fldr}IJVTideA.2022.05.QC_dataReturn.Rev0.nc"]

ds = nc.Dataset(obs_dir[0])
ds.variables

{'Time': <class 'netCDF4.Variable'>
 float64 Time(records)
     long_name: Time
     units: seconds since 1970-01-01T00:00:00.000Z
 unlimited dimensions: records
 current shape = (4257,)
 filling on, default _FillValue of 9.969209968386869e+36 used,
 'EllipsoidHeight': <class 'netCDF4.Variable'>
 float64 EllipsoidHeight(records)
     missing_value: -1.9938419936773738e+37
     _FillValue: -1.9938419936773738e+37
     coordinates: Time Longitude Latitude
     description: Height above Ellipsoid
     units: m
     quality_variable: EllipsoidHeightQual
     long_name: Ellipsoid Height
 unlimited dimensions: records
 current shape = (4257,)
 filling on,
 'SigmaHeight': <class 'netCDF4.Variable'>
 float64 SigmaHeight(records)
     missing_value: -1.9938419936773738e+37
     _FillValue: -1.9938419936773738e+37
     coordinates: Time Longitude Latitude
     description: Height standard deviation
     units: m
     quality_variable: SigmaHeightQual
     long_name: Sigma Height
 unlimited dimen

In [13]:
obs_fldr = "raw_data/IJVA/"
# Collect observation directories in list
obs_dir = [f"{obs_fldr}IJVTideA.2022.05.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}IJVTideA.2022.06.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}IJVTideA.2022.07.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}IJVTideA.2022.08.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}IJVTideA.2022.09.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}IJVTideA.2022.10.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}IJVTideA.2022.11.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}IJVTideA.2022.12.QC_dataReturn.Rev0.part1.nc",
           f"{obs_fldr}IJVTideA.2022.12.QC_dataReturn.Rev0.part2.nc",
           f"{obs_fldr}IJVTideA.2023.01.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}J3707_09_IJV-A2_D02.Tide.2023.02.QC_dataReturn.Rev1.nc",
           f"{obs_fldr}J3707_09_IJV-A2_D02.Tide.2023.03.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}J3707_09_IJV-A2-D02.Tide.2023.04.QC_dataReturn.Rev1.nc",
           f"{obs_fldr}J3707_15_IJV-A1_D03.Tide.2023.05.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}J3707_15_IJV-A1_D03.Tide.2023.06.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}J3707_15_IJV-A1_D03.Tide.2023.07.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}J3707_15_IJV-A1_D03.Tide.2023.08.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}J3707_15_IJV-A1_D03.Tide.2023.09.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}J3707_15_IJV-A1_D03.Tide.2023.10.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}J3707_16_IJV-A2_D04.Tide.2023.11.QC_dataReturn.Rev0.nc",
           f"{obs_fldr}J3707_16_IJV-A2_D04.Tide.2023.12.QC_dataReturn.Rev0.nc"]

all_tide_list = []
longitudes_IJVA = []
latitudes_IJVA = []
for o in obs_dir:
    ds = nc.Dataset(o)
    formatted_time = []
    for epoch_time in ds["Time"][:].data:
        formatted_time.append(time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(epoch_time)))

    data = ds["TideHeight"][:].data.tolist()
    if len(ds["Longitude"][:].data.shape) >0:
        lon = ds["RefLongitude"][:].data.item()
        lat = ds["RefLatitude"][:].data.item()
    else:
        lon = ds["Longitude"][:].data.item()
        lat = ds["Latitude"][:].data.item()
    longitudes_IJVA.append(lon)
    latitudes_IJVA.append(lat) 

    tide = pd.DataFrame({"tide": data},index=pd.DatetimeIndex(formatted_time))
    if "TideHeightQual" in ds.variables:
        quality_filter = ds.variables["TideHeightQual"][:].data<2
        tide = tide[quality_filter]
    tide = tide.drop(tide[tide['tide'] < -100].index)# Remove invalid values
    tide["tide"] = tide["tide"]-np.mean(tide["tide"])# Center around zero
    
    
    all_tide_list.append(tide)

    

all_tide=pd.concat(all_tide_list)
# Replace missing entries with nan such that we have a full date time index
all_tide = all_tide.set_index(all_tide.index).resample('10min').sum().replace(0.00, np.nan) 
all_tide

Unnamed: 0,tide
2022-05-01 00:00:00,-0.801037
2022-05-01 00:10:00,-0.851124
2022-05-01 00:20:00,
2022-05-01 00:30:00,-0.706967
2022-05-01 00:40:00,-0.789162
...,...
2023-12-15 20:20:00,0.500285
2023-12-15 20:30:00,0.476667
2023-12-15 20:40:00,0.433284
2023-12-15 20:50:00,0.410645


In [14]:
latitudes_IJVA

[52.88486,
 52.88486,
 52.88486,
 52.88486,
 52.88486,
 52.88486,
 52.88486,
 52.88486,
 52.88486,
 52.88486,
 52.88486,
 52.88486,
 52.87621667,
 52.88558333,
 52.88558333,
 52.88558333,
 52.88558333,
 52.88558333,
 52.88558333,
 52.87603,
 52.87603]

In [15]:
longitudes_IJVA

[3.71038,
 3.71038,
 3.71038,
 3.71038,
 3.71038,
 3.71038,
 3.71038,
 3.71038,
 3.71038,
 3.71038,
 3.71038,
 3.71038,
 3.70968333,
 3.71098333,
 3.71098333,
 3.71098333,
 3.71098333,
 3.71098333,
 3.71098333,
 3.70953,
 3.70953]

In [16]:
# Rolling mean value
all_tide_rollmean = np.round(all_tide.rolling(window='30min',closed="left").mean()[::3],decimals=4)
all_tide_rollmean = all_tide_rollmean.dropna()

In [17]:
all_tide_rollmean.index.name = "datetime_UTC"
all_tide_rollmean.columns = ["water_level"]
all_tide_rollmean

Unnamed: 0_level_0,water_level
datetime_UTC,Unnamed: 1_level_1
2022-05-01 00:30:00,-0.8261
2022-05-01 01:00:00,-0.7621
2022-05-01 01:30:00,-0.7461
2022-05-01 02:00:00,-0.4822
2022-05-01 02:30:00,-0.2937
...,...
2023-12-15 19:00:00,0.3398
2023-12-15 19:30:00,0.4233
2023-12-15 20:00:00,0.4669
2023-12-15 20:30:00,0.4907


In [18]:
all_tide_rollmean.to_csv("../observations/IJVA_wl.csv")

In [19]:
locations_IJVA = pd.DataFrame({"station": ["IJVA"],
                              "longitude": [np.round(np.mean(longitudes_IJVA),decimals=6)],
                              "latitude": [np.round(np.mean(latitudes_IJVA),decimals=6)]})
locations_all = pd.concat([locations_NWA,locations_IJVA])

In [20]:
locations_all.to_csv("../observations/stations.csv")

# DDW1A

In [21]:
# The modelskill package can be used to compare model results with observations.
# For more info on modelskill, see https://github.com/DHI/modelskill
obs_fldr = "raw_data/DDW1A/"
# Collect observation directories in list
obs_dir = [f"{obs_fldr}DDW1A2_GEOxyz_D01_Hydro_WL_2023-06_dataReturn_Rev2_20230915.nc"]

ds = nc.Dataset(obs_dir[0])
ds

<class 'netCDF4.Dataset'>
root group (NETCDF4_CLASSIC data model, file format HDF5):
    instrument: Tritech
    platform: In Situ Ocean-based Platforms > MOORINGS
    title: Oceanographic and meteorological data collected from an Accurasea Floating LiDAR System by GeoXYZ for RVO, project VSA MC Lot1&Lot2
    creator_type: person
    creator_institution: Aqua Vision BV
    publisher_type: person
    publisher_institution: RVO
    program: Accurasea Data Processing Chain (ADPC)
    date_created: 2023-09-15 09:53:35+0:00
    summary: Oceanographic and meteorological data collected from an Accurasea Floating LiDAR System by GeoXYZ for RVO, project VSA MC Lot1&Lot2
    source: Accurasea Data Processing Chain (ADPC)
    cdm_data_type: Station
    sea_name: North Sea
    id: DDW1A2_GEOxyz_D01_Hydro_WL_2023-06_dataReturn_Rev2_20230915.nc
    time_coverage_start: 2023-06-01 00:00:00+0:00
    time_coverage_end: 2023-06-30 23:50:00+0:00
    time_coverage_duration: 1month
    time_coverage_resolu

In [22]:
ds.variables

{'time': <class 'netCDF4.Variable'>
 int32 time(time)
     long_name: time
     standard_name: time
     units: seconds since 1970-01-01 00:00:00+0:00
     calendar: julian
     _FillValue: 2147483647
 unlimited dimensions: 
 current shape = (4247,)
 filling on,
 'lat': <class 'netCDF4.Variable'>
 float32 lat(time)
     _FillValue: -9999.0
     long_name: Latitude
     standard_name: latitude
     units: degrees_north
     data_min: 54.197144
     data_max: 54.19799
 unlimited dimensions: 
 current shape = (4247,)
 filling on,
 'lon': <class 'netCDF4.Variable'>
 float32 lon(time)
     _FillValue: -9999.0
     long_name: Longitude
     standard_name: longitude
     units: degrees_east
     data_min: 5.366872
     data_max: 5.368743
 unlimited dimensions: 
 current shape = (4247,)
 filling on,
 'WaterLevel': <class 'netCDF4.Variable'>
 float32 WaterLevel(time)
     long_name: waterlevel_above_msl
     standard_name: waterlevel
     units: m
     _FillValue: 3.4028235e+38
     data_min: -

In [23]:
# Collect observation directories in list
obs_dir = [f"{obs_fldr}DDW1A2_GEOxyz_D01_Hydro_WL_2023-06_dataReturn_Rev2_20230915.nc",
           f"{obs_fldr}DDW1A2_GEOxyz_D01_Hydro_WL_2023-07_dataReturn_Rev2_20230927.nc",
           f"{obs_fldr}DDW1A2_GEOxyz_D01_Hydro_WL_2023-08_dataReturn_Rev2_20231020.nc",
           f"{obs_fldr}DDW1A2_GEOxyz_D02_Hydro_WL_2023-08_dataReturn_Rev2_20231020.nc",
           f"{obs_fldr}DDW1A2_GEOxyz_D02_Hydro_WL_2023-09_dataReturn_Rev2_20231027.nc",
           f"{obs_fldr}DDW1A2_GEOxyz_D02_Hydro_WL_2023-10_dataReturn_Rev2_20231127.nc",
           f"{obs_fldr}DDW1A2_GEOxyz_D02_Hydro_WL_2023-11_dataReturn_Rev3_20240319.nc",
           f"{obs_fldr}DDW1A2_GEOxyz_D03_Hydro_WL_2023-11_dataReturn_Rev3_20240319.nc",
           f"{obs_fldr}DDW1A2_GEOxyz_D03_Hydro_WL_2023-12_dataReturn_Rev2_20240205.nc"]

all_wl_list = []
longitudes_DDW1A = []
latitudes_DDW1A = []
for o in obs_dir:
    ds = nc.Dataset(o)
    formatted_time = []
    for epoch_time in ds["time"][:].data:
        formatted_time.append(time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(epoch_time)))

    data = ds["WaterLevel"][:].data.tolist()
    lon = np.nanmean(ds["lon"][:].data)
    lat = np.nanmean(ds["lat"][:].data)
    longitudes_DDW1A.append(lon)
    latitudes_DDW1A.append(lat) 

    wl = pd.DataFrame({"wl": data}, index = pd.DatetimeIndex(formatted_time))
    if "WaterLevel" in ds.variables:
        quality_filter = ds.variables["WaterLevel_qc"][:].data<2
        wl = wl[quality_filter]
    wl = wl.drop(wl[wl['wl'] < -100].index)# Remove invalid values
    wl["wl"] = wl["wl"]-np.mean(wl["wl"])# Center around zero
    
    
    all_wl_list.append(wl)

all_wl=pd.concat(all_wl_list)
# Replace missing entries with nan such that we have a full date time index
all_wl = all_wl.set_index(all_wl.index).resample('10min').sum().replace(0.00, np.nan) 
all_wl

Unnamed: 0,wl
2023-06-01 00:00:00,-0.807840
2023-06-01 00:10:00,-0.816840
2023-06-01 00:20:00,-0.806840
2023-06-01 00:30:00,-0.785840
2023-06-01 00:40:00,-0.765840
...,...
2023-12-31 23:10:00,0.370619
2023-12-31 23:20:00,0.400619
2023-12-31 23:30:00,0.370619
2023-12-31 23:40:00,0.340619


In [24]:
longitudes_DDW1A

[5.3678,
 5.368164,
 5.3680196,
 5.3680987,
 5.3680024,
 5.368092,
 5.3684177,
 5.368141,
 5.3681545]

In [25]:
# Rolling mean value
all_wl_rollmean = np.round(all_wl.rolling(window='30min',closed="left").mean()[::3],decimals=4)
all_wl_rollmean = all_wl_rollmean.dropna()

In [26]:
all_wl_rollmean.index.name = "datetime_UTC"
all_wl_rollmean.columns = ["water_level"]
all_wl_rollmean

Unnamed: 0_level_0,water_level
datetime_UTC,Unnamed: 1_level_1
2023-06-01 00:30:00,-0.8105
2023-06-01 01:00:00,-0.7692
2023-06-01 01:30:00,-0.6743
2023-06-01 02:00:00,-0.5285
2023-06-01 02:30:00,-0.3228
...,...
2023-12-31 21:30:00,0.2873
2023-12-31 22:00:00,0.3373
2023-12-31 22:30:00,0.3906
2023-12-31 23:00:00,0.3440


In [27]:
all_wl_rollmean.to_csv(f"../observations/DDW1A_wl.csv")

In [28]:
locations_DDW1A = pd.DataFrame({"station": ["DDW1A"],
                              "longitude": [np.round(np.mean(longitudes_DDW1A),decimals=6)],
                              "latitude": [np.round(np.mean(latitudes_DDW1A),decimals=6)]})
locations_all = pd.concat([locations_NWA,locations_IJVA,locations_DDW1A])

In [29]:
locations_all.to_csv("../observations/stations.csv")

# F3 platform

In [30]:
# The modelskill package can be used to compare model results with observations.
# For more info on modelskill, see https://github.com/DHI/modelskill
obs_fldr = "raw_data/F3platform/"
# Collect observation directories in list
obs_dir = [f"{obs_fldr}NO_TS_TG_F3platformTG_202201.nc"]

ds = nc.Dataset(obs_dir[0])
ds

<class 'netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    data_mode: R
    id: NO_TS_TG_F3platformTG_202201
    institution: Rijkswaterstaat Water- Traffic and Environment
    institution_country: Netherlands
    institution_edmo_code: 1526
    naming_authority: Copernicus Marine In Situ
    platform_code: F3platformTG
    source: land/onshore structure
    source_platform_category_code: 14
    title: NWS - NRT in situ Observations
    last_latitude_observation: 54.85000
    last_longitude_observation: 4.72000
    geospatial_lat_min: 54.85
    geospatial_lat_max: 54.85
    geospatial_lon_min: 4.72
    geospatial_lon_max: 4.72
    last_date_observation: 2022-01-31T23:50:00Z
    institution_references: https://www.rijkswaterstaat.nl/english/index.aspx
    platform_name: 
    summary: Oceanographic data collated by the North West Shelf Production Unit
    cdm_data_type: timeSeries
    geospatial_vertical_min: 0
    geospatial_vertical_max: 0
    time_coverage_start:

In [31]:
ds["SLEV_QC"]
ds["TIME"]

<class 'netCDF4.Variable'>
float64 TIME(TIME)
    long_name: Time
    standard_name: time
    units: days since 1950-01-01T00:00:00Z
    valid_min: -90000.0
    valid_max: 90000.0
    axis: T
    ancillary_variables: TIME_QC
    calendar: standard
unlimited dimensions: 
current shape = (4446,)
filling on, default _FillValue of 9.969209968386869e+36 used

In [32]:
import datetime

In [33]:
formatted_time = []
start = datetime.date(1950,1,1)
for days in ds["TIME"][:].data:
    formatted_time.append(start + datetime.timedelta(days = days))

data = ds["SLEV"][:].data.squeeze().tolist()
lon = np.nanmean(ds["LONGITUDE"][:].data)
lat = np.nanmean(ds["LATITUDE"][:].data)

wl = pd.DataFrame({"slev": data}, index = pd.DatetimeIndex(formatted_time))
quality_filter = ds["SLEV_QC"][:].data<=2
wl = wl[quality_filter]
wl = wl.drop(wl[wl['slev'] < -100].index)# Remove invalid values
wl["slev"] = wl["slev"]-np.mean(wl["slev"])# Center around zero


# all_wl=pd.concat(all_wl_list)
# # Replace missing entries with nan such that we have a full date time index
# all_wl = all_wl.set_index(all_wl.index).resample('10min').sum().replace(0.00, np.nan) 
# all_wl

In [34]:
# There are several data points on the same day.
# We assume that data is uniformly distributed across the day.
# We will add a small time increment to each data point to make them unique.

def add_time_stamps(df):
    # Resample dates with duplicate indices by adding hours and minutes
    # Find duplicate dates in index
    duplicate_dates = df.index[df.index.duplicated(keep=False)]
    if len(duplicate_dates) == 0:
        return df
    
    for date in set(duplicate_dates):
        duplicates = df.loc[date]
        # Calculate time increment based on the number of duplicates
        num_duplicates = len(duplicates)
        time_increment = datetime.timedelta(days=1) / num_duplicates

        # Assign new unique timestamps to each duplicate
        times = [date + i * time_increment for i in range(len(duplicates))]
        df.loc[date, 'new_index'] = times
    
    # Set the new unique datetime index
    df.set_index('new_index', inplace=True)
    df.index.name = 'datetime'  # Rename the index for clarity
    
    return df

In [35]:
wl = add_time_stamps(wl)

In [36]:
def load_and_save_CMEMS_data(obs_dir,platform_name):

    all_wl_list = []
    longitudes = []
    latitudes = []
    for o in obs_dir:
        ds = nc.Dataset(o)
        formatted_time = []
        start = datetime.date(1950,1,1)
        for days in ds["TIME"][:].data:
            formatted_time.append(start + datetime.timedelta(days = days))

        data = ds["SLEV"][:].data.squeeze().tolist()
        lon = np.nanmean(ds["LONGITUDE"][:].data)
        lat = np.nanmean(ds["LATITUDE"][:].data)
        longitudes.append(lon)
        latitudes.append(lat) 

        wl = pd.DataFrame({"slev": data}, index = pd.DatetimeIndex(formatted_time))
        quality_filter = ds["SLEV_QC"][:].data<=2
        wl = wl[quality_filter]
        wl = wl[(wl.index >"2021-12-31") & (wl.index <"2024-01-01")]
        wl = wl.drop(wl[wl['slev'] < -100].index)# Remove invalid values
        wl = wl.drop(wl[wl['slev'] > 1000].index)# Remove invalid values
        wl["slev"] = wl["slev"]-np.mean(wl["slev"])# Center around zero

        if len(wl)==0:
            continue
        #  Add timestamps
        wl = add_time_stamps(wl)
        
        all_wl_list.append(wl)

    all_wl=pd.concat(all_wl_list)
    # Replace missing entries with nan such that we have a full date time index
    freq=wl.index[1]-wl.index[0]
    all_wl = all_wl.set_index(all_wl.index).resample(freq).sum().replace(0.00, np.nan) 
    # Rolling mean value
    all_wl_rollmean = all_wl.rolling(window='30min',closed="left").mean()[::int(pd.Timedelta(minutes=30)/freq)]
    all_wl_rollmean = np.round(all_wl_rollmean.dropna(),decimals=4)
    all_wl_rollmean.index.name = "datetime_UTC"
    all_wl_rollmean.columns = ["water_level"]
    all_wl_rollmean.to_csv(f"../observations/{platform_name}_wl.csv")

    locations = pd.DataFrame({"station": [platform_name],
                              "longitude": [np.round(np.mean(longitudes),decimals=6)],
                              "latitude": [np.round(np.mean(latitudes),decimals=6)]})
    locations_all = pd.read_csv("../observations/stations.csv",index_col=0)
    locations_all = pd.concat([locations_all,locations])
    locations_all.to_csv("../observations/stations.csv")

    return all_wl_rollmean,all_wl

In [37]:
# Collect observation directories in list
obs_fldr = "raw_data/F3platform/"
obs_dir = [f"{obs_fldr}NO_TS_TG_F3platformTG_{year}{month:02d}.nc" for year in [2022,2023] for month in range(1, 13)]
load_and_save_CMEMS_data(obs_dir,"F3platform")

(                     water_level
 datetime_UTC                    
 2022-01-01 00:30:00      -0.3012
 2022-01-01 01:00:00      -0.2712
 2022-01-01 01:30:00      -0.1979
 2022-01-01 02:00:00      -0.0779
 2022-01-01 02:30:00       0.0554
 ...                          ...
 2023-12-31 21:30:00       0.0759
 2023-12-31 22:00:00       0.0525
 2023-12-31 22:30:00       0.0459
 2023-12-31 23:00:00       0.0325
 2023-12-31 23:30:00       0.0125
 
 [35009 rows x 1 columns],
                          slev
 datetime                     
 2022-01-01 00:00:00 -0.311217
 2022-01-01 00:10:00 -0.301217
 2022-01-01 00:20:00 -0.291217
 2022-01-01 00:30:00 -0.281217
 2022-01-01 00:40:00 -0.271217
 ...                       ...
 2023-12-31 23:00:00  0.012522
 2023-12-31 23:10:00  0.022522
 2023-12-31 23:20:00  0.002522
 2023-12-31 23:30:00 -0.007478
 2023-12-31 23:40:00 -0.017478
 
 [105119 rows x 1 columns])

# K13a

In [38]:
# The modelskill package can be used to compare model results with observations.
# For more info on modelskill, see https://github.com/DHI/modelskill
obs_fldr = "raw_data/K13a/"
# Collect observation directories in list
obs_dir = [f"{obs_fldr}NO_TS_TG_K13aTG_{year}{month:02d}.nc" for year in [2022,2023] for month in range(1, 13)]

ds = nc.Dataset(obs_dir[0])
ds["SLEV"]

<class 'netCDF4.Variable'>
float32 SLEV(TIME, DEPTH)
    standard_name: water_surface_height_above_reference_datum
    units: m
    _FillValue: 9.96921e+36
    long_name: Water surface height above a specific datum
    valid_min: -20.0
    valid_max: 20.0
    coordinates: TIME LATITUDE LONGITUDE DEPH STATION
    ancillary_variables: SLEV_QC
    data_mode: R
    time_sampling: 10.0
    sea_level_datum: MSL
    processing_method: unknown
unlimited dimensions: 
current shape = (4276, 1)
filling on

In [39]:
load_and_save_CMEMS_data(obs_dir,"K13a")

(                     water_level
 datetime_UTC                    
 2022-01-01 00:30:00      -0.2127
 2022-01-01 01:00:00      -0.1127
 2022-01-01 01:30:00      -0.0027
 2022-01-01 02:00:00       0.1239
 2022-01-01 02:30:00       0.2439
 ...                          ...
 2023-12-31 21:30:00       0.7149
 2023-12-31 22:00:00       0.7249
 2023-12-31 22:30:00       0.6783
 2023-12-31 23:00:00       0.5949
 2023-12-31 23:30:00       0.5049
 
 [33887 rows x 1 columns],
                          slev
 datetime                     
 2022-01-01 00:00:00 -0.242732
 2022-01-01 00:10:00 -0.212732
 2022-01-01 00:20:00 -0.182732
 2022-01-01 00:30:00 -0.152732
 2022-01-01 00:40:00 -0.112732
 ...                       ...
 2023-12-31 23:10:00  0.504932
 2023-12-31 23:20:00  0.464932
 2023-12-31 23:30:00  0.444932
 2023-12-31 23:40:00  0.384932
 2023-12-31 23:50:00  0.354932
 
 [105120 rows x 1 columns])

# Europlatform

In [40]:
# The modelskill package can be used to compare model results with observations.
# For more info on modelskill, see https://github.com/DHI/modelskill
obs_fldr = "raw_data/Europlatform/"
# Collect observation directories in list
obs_dir = [f"{obs_fldr}NO_TS_TG_EuroplatformTG_{year}{month:02d}.nc" for year in [2022,2023] for month in range(1, 13)]

ds = nc.Dataset(obs_dir[0])
ds["TIME"]

<class 'netCDF4.Variable'>
float64 TIME(TIME)
    long_name: Time
    standard_name: time
    units: days since 1950-01-01T00:00:00Z
    valid_min: -90000.0
    valid_max: 90000.0
    axis: T
    ancillary_variables: TIME_QC
    calendar: standard
unlimited dimensions: 
current shape = (4456,)
filling on, default _FillValue of 9.969209968386869e+36 used

In [41]:
load_and_save_CMEMS_data(obs_dir,"Europlatform")

(                     water_level
 datetime_UTC                    
 2022-01-01 00:30:00       0.8830
 2022-01-01 01:00:00       0.7130
 2022-01-01 01:30:00       0.5563
 2022-01-01 02:00:00       0.3730
 2022-01-01 02:30:00       0.1630
 ...                          ...
 2023-12-31 21:30:00      -0.2332
 2023-12-31 22:00:00      -0.2198
 2023-12-31 22:30:00      -0.2065
 2023-12-31 23:00:00      -0.1998
 2023-12-31 23:30:00      -0.1532
 
 [34560 rows x 1 columns],
                          slev
 datetime                     
 2022-01-01 00:00:00  0.936302
 2022-01-01 00:10:00  0.886302
 2022-01-01 00:20:00  0.826302
 2022-01-01 00:30:00  0.766302
 2022-01-01 00:40:00  0.706302
 ...                       ...
 2023-12-31 23:10:00 -0.159820
 2023-12-31 23:20:00 -0.139820
 2023-12-31 23:30:00 -0.129820
 2023-12-31 23:40:00 -0.099820
 2023-12-31 23:50:00 -0.089820
 
 [105120 rows x 1 columns])

# J61

In [42]:
# The modelskill package can be used to compare model results with observations.
# For more info on modelskill, see https://github.com/DHI/modelskill
obs_fldr = "raw_data/J61/"
# Collect observation directories in list
obs_dir = [f"{obs_fldr}NO_TS_TG_J61TG_{year}{month:02d}.nc" for year in [2022,2023] for month in range(1, 13)]

ds = nc.Dataset(obs_dir[0])
ds["TIME"]

<class 'netCDF4.Variable'>
float64 TIME(TIME)
    long_name: Time
    standard_name: time
    units: days since 1950-01-01T00:00:00Z
    valid_min: -90000.0
    valid_max: 90000.0
    axis: T
    ancillary_variables: TIME_QC
    calendar: standard
unlimited dimensions: 
current shape = (4454,)
filling on, default _FillValue of 9.969209968386869e+36 used

In [43]:
all_wl_rollmean,all_wl = load_and_save_CMEMS_data(obs_dir,"J61")

List of remaining platforms:

- [] A2                         unknown - maybe skip because of three dimensional data??
- [] BolVanHeist                unknown
- [] Brouwershavensegat         unknown
- [] D151                       unknown
- [] Haringvliet10              unknown
- [] HastingsPier               unknown
- [x] Helgoland                  GOOD 
- [x] K141                       GOOD
- [x] L91                        GOOD
- [] LichteilandGoeree          unknown
- [x] Q11                        GOOD
- [] ScheurWielingen            unknown
- [] Wandelaar                  unknown
- [] WestHinder                 unknown
- [x] Westkapelle                GOOD
- [x] WhitbyHarbour              GOOD
- [x] ZeebruggeWielingendok      GOOD

# A2

In [44]:
obs_fldr = "raw_data/CMEMS/"
# Collect observation directories in list
obs_dir = [f"{obs_fldr}NO_TS_TG_A2TG.nc"]

ds = nc.Dataset(obs_dir[0])
ds.variables.keys()

dict_keys(['TIME', 'TIME_QC', 'DEPH', 'LATITUDE', 'LONGITUDE', 'STATION', 'TEMP', 'TEMP_QC', 'ATMP', 'ATMP_QC', 'SLEV', 'SLEV_QC', 'SLEV_DM'])

In [45]:
ds["SLEV"] #???? Three dimensions?

<class 'netCDF4.Variable'>
float32 SLEV(TIME, DEPTH)
    standard_name: water_surface_height_above_reference_datum
    units: m
    _FillValue: 9.96921e+36
    long_name: Water surface height above a specific datum
    valid_min: -20.0
    valid_max: 20.0
    coordinates: TIME LATITUDE LONGITUDE DEPH STATION
    ancillary_variables: SLEV_QC SLEV_DM
    data_mode: M
    time_sampling: 5.0
    sea_level_datum: MSL
    processing_method: unknown
unlimited dimensions: 
current shape = (2480384, 3)
filling off

# Helgoland

In [46]:
obs_fldr = "raw_data/CMEMS/"
# Collect observation directories in list
obs_dir = [f"{obs_fldr}NO_TS_TG_HelgolandTG.nc"]

ds = nc.Dataset(obs_dir[0])
ds.variables.keys()

dict_keys(['TIME', 'TIME_QC', 'DEPH', 'LATITUDE', 'LONGITUDE', 'STATION', 'SLEV', 'SLEV_QC'])

In [47]:
all_wl_rollmean,all_wl = load_and_save_CMEMS_data(obs_dir,"Helgoland")

# K141

In [48]:
obs_fldr = "raw_data/CMEMS/"
# Collect observation directories in list
obs_dir = [f"{obs_fldr}NO_TS_TG_K141TG.nc"]

ds = nc.Dataset(obs_dir[0])
ds.variables.keys()

dict_keys(['TIME', 'TIME_QC', 'DEPH', 'LATITUDE', 'LONGITUDE', 'STATION', 'SLEV', 'SLEV_QC'])

In [49]:
ds["SLEV"]

<class 'netCDF4.Variable'>
float32 SLEV(TIME, DEPTH)
    standard_name: water_surface_height_above_reference_datum
    units: m
    _FillValue: 9.96921e+36
    long_name: Water surface height above a specific datum
    valid_min: -20.0
    valid_max: 20.0
    coordinates: TIME LATITUDE LONGITUDE DEPH STATION
    ancillary_variables: SLEV_QC
    data_mode: R
    time_sampling: 10.0
    sea_level_datum: MSL
    processing_method: unknown
unlimited dimensions: 
current shape = (474497, 1)
filling off

In [50]:
all_wl_rollmean,all_wl = load_and_save_CMEMS_data(obs_dir,"K141")

# L91

In [51]:
obs_fldr = "raw_data/CMEMS/"
# Collect observation directories in list
obs_dir = [f"{obs_fldr}NO_TS_TG_L91TG.nc"]

ds = nc.Dataset(obs_dir[0])
ds.variables.keys()
ds["SLEV"]

<class 'netCDF4.Variable'>
float32 SLEV(TIME, DEPTH)
    standard_name: water_surface_height_above_reference_datum
    units: m
    _FillValue: 9.96921e+36
    long_name: Water surface height above a specific datum
    valid_min: -20.0
    valid_max: 20.0
    coordinates: TIME LATITUDE LONGITUDE DEPH STATION
    ancillary_variables: SLEV_QC
    data_mode: R
    time_sampling: 10.0
    sea_level_datum: MSL
    processing_method: unknown
unlimited dimensions: 
current shape = (478899, 1)
filling off

In [52]:
all_wl_rollmean,all_wl = load_and_save_CMEMS_data(obs_dir,"L91")
all_wl_rollmean

Unnamed: 0_level_0,water_level
datetime_UTC,Unnamed: 1_level_1
2022-01-01 00:30:00,-0.6641
2022-01-01 01:00:00,-0.5607
2022-01-01 01:30:00,-0.3341
2022-01-01 02:00:00,-0.0474
2022-01-01 02:30:00,0.2159
...,...
2023-12-31 21:30:00,0.7859
2023-12-31 22:00:00,0.8526
2023-12-31 22:30:00,0.8959
2023-12-31 23:00:00,0.9093


# Q11

In [53]:
obs_fldr = "raw_data/CMEMS/"
# Collect observation directories in list
obs_dir = [f"{obs_fldr}NO_TS_TG_Q11TG.nc"]

ds = nc.Dataset(obs_dir[0])
ds.variables.keys()
ds["SLEV"]

<class 'netCDF4.Variable'>
float32 SLEV(TIME, DEPTH)
    standard_name: water_surface_height_above_reference_datum
    units: m
    _FillValue: 9.96921e+36
    long_name: Water surface height above a specific datum
    valid_min: -20.0
    valid_max: 20.0
    coordinates: TIME LATITUDE LONGITUDE DEPH STATION
    ancillary_variables: SLEV_QC
    data_mode: R
    time_sampling: 10.0
    sea_level_datum: MSL
    processing_method: unknown
unlimited dimensions: 
current shape = (376375, 1)
filling off

In [54]:
all_wl_rollmean,all_wl = load_and_save_CMEMS_data(obs_dir,"Q11")
all_wl_rollmean

Unnamed: 0_level_0,water_level
datetime_UTC,Unnamed: 1_level_1
2022-01-01 00:30:00,0.2567
2022-01-01 01:00:00,0.4967
2022-01-01 01:30:00,0.6067
2022-01-01 02:00:00,0.5834
2022-01-01 02:30:00,0.5434
...,...
2022-11-29 21:30:00,-0.0300
2022-11-29 22:00:00,-0.0900
2022-11-29 22:30:00,-0.1550
2022-11-29 23:00:00,-0.2450


# Westkapelle

In [55]:
obs_fldr = "raw_data/CMEMS/"
# Collect observation directories in list
obs_dir = [f"{obs_fldr}NO_TS_TG_WestkapelleTG.nc"]

ds = nc.Dataset(obs_dir[0])
ds.variables.keys()
ds["SLEV"]

<class 'netCDF4.Variable'>
float32 SLEV(TIME, DEPTH)
    standard_name: water_surface_height_above_reference_datum
    units: m
    _FillValue: 9.96921e+36
    long_name: Water surface height above a specific datum
    valid_min: -20.0
    valid_max: 20.0
    coordinates: TIME LATITUDE LONGITUDE DEPH STATION
    ancillary_variables: SLEV_QC
    data_mode: R
    time_sampling: 10.0
    sea_level_datum: MSL
    processing_method: unknown
unlimited dimensions: 
current shape = (527272, 1)
filling off

In [56]:
all_wl_rollmean,all_wl = load_and_save_CMEMS_data(obs_dir,"Westkapelle")
all_wl_rollmean

Unnamed: 0_level_0,water_level
datetime_UTC,Unnamed: 1_level_1
2022-01-01 00:30:00,1.5972
2022-01-01 01:00:00,1.3372
2022-01-01 01:30:00,1.0272
2022-01-01 02:00:00,0.6739
2022-01-01 02:30:00,0.2606
...,...
2023-12-31 21:30:00,-0.7994
2023-12-31 22:00:00,-0.7861
2023-12-31 22:30:00,-0.7294
2023-12-31 23:00:00,-0.6394


# WhitbyHarbour

In [57]:
obs_fldr = "raw_data/CMEMS/"
# Collect observation directories in list
obs_dir = [f"{obs_fldr}NO_TS_TG_WhitbyHarbourTG.nc"]

ds = nc.Dataset(obs_dir[0])
ds.variables.keys()
ds["SLEV"]

<class 'netCDF4.Variable'>
float32 SLEV(TIME, DEPTH)
    standard_name: water_surface_height_above_reference_datum
    units: m
    _FillValue: 9.96921e+36
    long_name: Water surface height above a specific datum
    valid_min: -20.0
    valid_max: 20.0
    coordinates: TIME LATITUDE LONGITUDE DEPH DEPLOY_LATITUDE DEPLOY_LONGITUDE STATION
    ancillary_variables: SLEV_QC SLEV_DM
    data_mode: M
    time_sampling: 10.0
    sea_level_datum: Chart Datum
    processing_method: 40 seconds average
unlimited dimensions: 
current shape = (544489, 1)
filling off

In [58]:
all_wl_rollmean,all_wl = load_and_save_CMEMS_data(obs_dir,"WhitbyHarbour")
all_wl_rollmean

Unnamed: 0_level_0,water_level
datetime_UTC,Unnamed: 1_level_1
2022-09-15 00:17:08.571429,0.1647
2022-09-15 00:34:17.142858,0.0117
2022-09-15 00:51:25.714287,-0.1753
2022-09-15 01:08:34.285716,-0.3113
2022-09-15 01:25:42.857145,-0.4733
...,...
2023-12-30 22:34:17.159847,-2.6627
2023-12-30 22:51:25.731276,-3.1797
2023-12-30 23:08:34.302705,-1.7813
2023-12-30 23:25:42.874134,-3.8097


# ZeebruggeWielingendok

In [59]:
obs_fldr = "raw_data/CMEMS/"
# Collect observation directories in list
obs_dir = [f"{obs_fldr}NO_TS_TG_ZeebruggeWielingendokTG.nc"]

ds = nc.Dataset(obs_dir[0])
ds.variables.keys()
ds["SLEV"]

<class 'netCDF4.Variable'>
float32 SLEV(TIME, DEPTH)
    standard_name: water_surface_height_above_reference_datum
    units: m
    _FillValue: 9.96921e+36
    long_name: Water surface height above a specific datum
    valid_min: -20.0
    valid_max: 20.0
    coordinates: TIME LATITUDE LONGITUDE DEPH STATION
    ancillary_variables: SLEV_QC SLEV_DM
    data_mode: M
    time_sampling: 5.0
    sea_level_datum: MSL
    processing_method: unknown
unlimited dimensions: 
current shape = (797905, 1)
filling off

In [60]:
all_wl_rollmean,all_wl = load_and_save_CMEMS_data(obs_dir,"ZeebruggeWielingendok")
all_wl_rollmean

Unnamed: 0_level_0,water_level
datetime_UTC,Unnamed: 1_level_1
2022-01-01 00:30:00,1.4591
2022-01-01 01:00:00,1.1408
2022-01-01 01:30:00,0.7791
2022-01-01 02:00:00,0.3641
2022-01-01 02:30:00,-0.0825
...,...
2023-12-31 21:30:00,-1.0609
2023-12-31 22:00:00,-0.9775
2023-12-31 22:30:00,-0.8459
2023-12-31 23:00:00,-0.7175


# stations.csv file

In [61]:
# Reindex the stations file 
locations_all = pd.read_csv("../observations/stations.csv",index_col=0)
locations_all.index = locations_all["station"].values.tolist()
locations_all.index.name="station"
locations_all.drop(columns=["station"],inplace=True)
locations_all.columns = ["Longitude","Latitude"]
locations_all.to_csv("../observations/stations.csv")