In [1]:
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from matplotlib import animation
from mpl_toolkits.basemap import Basemap
import contextily as cx  
import xarray as xr
from matplotlib.colors import TwoSlopeNorm
from matplotlib.colors import LinearSegmentedColormap
from scipy.spatial import cKDTree
from datetime import datetime
from matplotlib import gridspec
import scipy.stats as stats
import wavespectra
import netCDF4 as nc
import h5py
import numbers
import copy
from statsmodels.tsa.stattools import acf
from statsmodels.stats.diagnostic import acorr_ljungbox
import pymannkendall as mk
import calendar
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import RANSACRegressor
import re
from scipy.stats import norm


#import 
%matplotlib inline

In [2]:
base_path = "/gpfs/work3/0/ai4nbs/hurry_wave/north_sea"
output_dir = os.path.join(base_path, "04_modelruns" , "YearSims", "01_postprocess", "plots","test")
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# parser = argparse.ArgumentParser(description="Postprocess Hurrywave output.")
# parser.add_argument("--model_name", type=str, required=True, help="Name of the model run")
# parser.add_argument("--data_name", type=str, required=True, help="Name of the data set")
# args = parser.parse_args()

# model_name = args.model_name
# data_name = args.data_name

year_start = 2023 # Latest year
year_end = 1950 # Earliest year

month_start = 12 # Latest month
month_end = 1 # Earliest month

day_start = 31 # Latest day
day_end = 1 # Earliest day

hour_start = 23 # Latest hour
hour_end = 0 # Earliest hour

years  = np.arange(year_start, year_end - 1, -1)
months = np.arange(month_start, month_end - 1, -1)
days   = np.arange(day_start, day_end - 1, -1)
hours  = np.arange(hour_start, hour_end - 1, -1)

run_tests = True

area = [65, -12, 48, 10] # DCSM area in the North Sea (degrees): North, West, South, East

# model_path = os.path.join(base_path, '04_modelruns', 'YearSims',model_name)
 
model_path = os.path.join(base_path, '04_modelruns', 'YearSims')
model_path_start = os.path.join(model_path, str(year_start)) 
postprocess_path = os.path.join(base_path, '05_postprocessing', 'his_files_per_station')
spectral_data_path = os.path.join(base_path, '01_data', 'spectral_buoy_data')
ERA5_data_path = '/gpfs/work3/0/ai4nbs/ERA5_data/data'


inp_file = os.path.join(model_path_start, 'hurrywave.inp')
tstart = f"{year_end}{month_end}{day_end} {hour_end}0000" # earliest time
tstop = f"{year_start}{month_start}{day_start} {hour_start}0000" # latest time

# DCSM bounding box: [North, West, South, East]
area = area  # (N, W, S, E)

# Find the start and stop times from the input file
with open(inp_file, 'r') as f:
    for line in f:
        if line.strip().startswith('tspinup'):
            tspinup = line.split('=')[1].strip()


# Ensure time is in "yyyymmdd hhmmss" format
def parse_time(s):
    parts = s.strip().split()
    if len(parts) == 2:
        date, time = parts
    elif len(parts) == 1:
        date = parts[0][:8]
        time = parts[0][8:] if len(parts[0]) > 8 else "000000"
    else:
        date, time = "00000000", "000000"  # fallback
    return f"{date} {time}"


tstart_str = parse_time(tstart)
tstop_str = parse_time(tstop)

tstart_dt = datetime.strptime(tstart_str, "%Y%m%d %H%M%S")
tstop_dt = datetime.strptime(tstop_str, "%Y%m%d %H%M%S")

In [3]:
# Path to Waterinfo_RWS data
waterinfo_base = os.path.join(base_path, "01_data", "Waterinfo_RWS")

# Collect all years available in Waterinfo_RWS
obs_years = sorted([d for d in os.listdir(waterinfo_base) if d.isdigit()])
# Only keep years from year_start down to year_end (inclusive)
obs_years = [year for year in obs_years if int(year) <= year_start and int(year) >= year_end]
obs_years = sorted(obs_years, reverse=True)  # Descending order: year_start to year_end

print(obs_years)
# Dictionary to hold all observational data per station
obs_data_per_station = {}

for year in obs_years:
    print(f"Loading observational data for year: {year}")
    year_dir = os.path.join(waterinfo_base, year)
    for fname in os.listdir(year_dir):
        if fname.endswith('.csv'):
            station = fname.replace('.csv', '')
            fpath = os.path.join(year_dir, fname)
            if not os.path.exists(fpath):
                print(f"File not found: {fpath}, skipping.")
                continue

            # Check if file is empty or contains no columns
            with open(fpath, 'r') as checkfile:
                first_line = checkfile.readline()
                if not first_line or len(first_line.strip().split(',')) < 2:
                    print(f"File {fpath} contains no columns, skipping.")
                    continue
            df = pd.read_csv(fpath, parse_dates=True)
            if df.empty:
                print(f"Skipping {fpath} as it contains no data records.")
                continue  # Skip file if no data record
            if df.empty or len(df) < 10:
                print(f"Skipping {fpath} as it contains no data records or less than 10 records.")
                continue  # Skip file if no data record or too few records
            df['year'] = int(year)
            if station not in obs_data_per_station:
                obs_data_per_station[station] = [df]
            else:
                obs_data_per_station[station].append(df)

    print(f"Finished loading observational data from Waterinfo_RWS for {len(obs_data_per_station)} stations.")

# Concatenate yearly dataframes for each station
for station in obs_data_per_station:
    obs_data_per_station[station] = pd.concat(obs_data_per_station[station], ignore_index=True)
    #divide all values of hm0 by 100 as they are in cm
    if 'hm0' in obs_data_per_station[station]:
        obs_data_per_station[station]['hm0'] = obs_data_per_station[station]['hm0'] / 100

    # t13 is divided by 0.95 to move to Tp
    if 't13' in obs_data_per_station[station]:
        obs_data_per_station[station]['t13'] = obs_data_per_station[station]['t13'] / 0.95

    # If the dataframe has
    # 'Unnamed: 0' as a column, rename it to 'time'
    for station in obs_data_per_station:
        if isinstance(obs_data_per_station[station], pd.DataFrame):
            if 'Unnamed: 0' in obs_data_per_station[station]:
                # Convert to pandas datetime first, then to numpy.datetime64 array
                print(station)
                time_pd = pd.to_datetime(obs_data_per_station[station].pop('Unnamed: 0'))
                obs_data_per_station[station]['time'] = time_pd.values.astype('datetime64[ns]')

['2023', '2022', '2021', '2020', '2019', '2018', '2017', '2016', '2015', '2014', '2013', '2012', '2011', '2010', '2009', '2008', '2007', '2006', '2005', '2004', '2003', '2002', '2001', '2000', '1999', '1998', '1997', '1996', '1995', '1994', '1993', '1992', '1991', '1990', '1989', '1988', '1987', '1986', '1985', '1984', '1983', '1982', '1981', '1980', '1979', '1978', '1977', '1976', '1975', '1974', '1973', '1972', '1971', '1970', '1969', '1968', '1967', '1966', '1965', '1964', '1963', '1962', '1961', '1960', '1959', '1958', '1957', '1956', '1955', '1954', '1953', '1952', '1951', '1950']
Loading observational data for year: 2023
Skipping /gpfs/work3/0/ai4nbs/hurry_wave/north_sea/01_data/Waterinfo_RWS/2023/Platform Hoorn Q1-A.csv as it contains no data records or less than 10 records.
Skipping /gpfs/work3/0/ai4nbs/hurry_wave/north_sea/01_data/Waterinfo_RWS/2023/Platform F16-A.csv as it contains no data records or less than 10 records.
Finished loading observational data from Waterinfo_RWS

  time_pd = pd.to_datetime(obs_data_per_station[station].pop('Unnamed: 0'))
  obs_data_per_station[station]['time'] = time_pd.values.astype('datetime64[ns]')


North Cormorant
Euro platform
Platform Hoorn Q1-A
Platform F16-A


In [4]:
for station, df in obs_data_per_station.items():
    if 'hm0' in df and not df['hm0'].dropna().empty:
        start_time = df.loc[df['hm0'].first_valid_index(), 'time']
        end_time = df.loc[df['hm0'].last_valid_index(), 'time']
        print(f"{station}: hm0 data from {start_time} to {end_time}")

IJmuiden munitiestortplaats: hm0 data from 2023-01-01 00:00:00 to 1976-05-01 06:00:00
Platform D15-A: hm0 data from 2017-01-01 00:00:00 to 2009-12-31 00:00:00
K13a platform: hm0 data from 2023-01-01 00:00:00 to 1978-12-29 21:00:00
K13 Alpha: hm0 data from 2022-12-31 23:50:00 to 2023-12-01 22:40:00
North Cormorant: hm0 data from 2023-01-01 00:00:00 to 1987-12-24 23:00:00
Euro platform: hm0 data from 2023-01-01 00:00:00 to 1982-12-31 00:00:00
Platform Hoorn Q1-A: hm0 data from 2022-01-01 00:20:00 to 2009-12-31 00:00:00
Platform F16-A: hm0 data from 2020-01-01 00:00:00 to 2009-12-31 00:00:00


In [5]:
for station, df in obs_data_per_station.items():
    if 't13' in df and not df['t13'].dropna().empty:
        start_time = df.loc[df['t13'].first_valid_index(), 'time']
        end_time = df.loc[df['t13'].last_valid_index(), 'time']
        print(f"{station}: t13 data from {start_time} to {end_time}")

IJmuiden munitiestortplaats: t13 data from 2023-01-01 00:00:00 to 2007-12-31 00:00:00
Platform D15-A: t13 data from 2017-01-01 00:00:00 to 2009-12-31 00:00:00
K13a platform: t13 data from 2023-01-01 00:00:00 to 2007-12-31 00:00:00
K13 Alpha: t13 data from 2022-12-31 23:50:00 to 2023-12-01 22:40:00
Euro platform: t13 data from 2023-01-01 00:00:00 to 2007-12-31 00:00:00
Platform Hoorn Q1-A: t13 data from 2022-01-01 00:20:00 to 2009-12-31 00:00:00
Platform F16-A: t13 data from 2020-01-01 00:00:00 to 2009-12-31 00:00:00


In [6]:
for station, df in obs_data_per_station.items():
    if 'th0' in df and not df['th0'].dropna().empty:
        start_time = df.loc[df['th0'].first_valid_index(), 'time']
        end_time = df.loc[df['th0'].last_valid_index(), 'time']
        print(f"{station}: th0 data from {start_time} to {end_time}")

IJmuiden munitiestortplaats: th0 data from 2023-01-01 00:00:00 to 1989-12-31 00:00:00
K13a platform: th0 data from 2023-01-01 00:00:00 to 1985-12-31 00:00:00
North Cormorant: th0 data from 1989-02-28 23:00:00 to 1987-12-24 23:00:00
Euro platform: th0 data from 2023-01-01 00:00:00 to 1985-12-31 00:00:00
Platform Hoorn Q1-A: th0 data from 2022-01-14 08:10:00 to 2009-12-31 00:00:00
