# MOHID Postprocessing

This Jupyter Notebook aims to help analyse the results of the MOHID Water model.

***
**Note 1**: Execute each cell through the <button class="btn btn-default btn-xs"><i class="icon-play fa fa-play"></i></button> button from the top MENU (or keyboard shortcut `Shift` + `Enter`).<br>
<br>
**Note 2**: Use the Kernel and Cell menus to restart the kernel and clear outputs.<br>
***

# Table of contents
- [1. Import required libraries](#1.-Import-required-libraries)
- [2. Time series](#2.-Time-series)
    - [2.1 Convert and merge multiple MOHID time series files to csv](#2.1-Convert-and-merge-multiple-MOHID-time-series-files-to-csv)
    - [2.2 Extract time series from HDF5 files](#2.2-Extract-time-series-from-HDF5-files)
        - [2.2.1 Read one or multiple MOHID HDF5 files](#2.2.1-Read-one-or-multiple-MOHID-HDF5-files)
        - [2.2.2 Load or create a new file with monitoring stations](#2.2.2-Load-or-create-a-new-file-with-monitoring-stations)
        - [2.2.3 Adjust or define new monitoring stations on the map](#2.2.3-Adjust-or-define-new-monitoring-stations-on-the-map)
        - [2.2.4 Create Input_table.dat](#3.1.4-Create-Input_table.dat)
        - [2.2.5 Create InputValida4D.dat](#2.2.5-Create-InputValida4D.dat)
        - [2.2.6 Run Valida4D tool](#2.2.6-Run-Valida4D-tool)
        - [2.2.7 Convert OutTable.dat to csv files](#2.2.7-Convert-OutTable.dat-to-csv-files)
    - [2.3 Load csv files](#2.3-Load-csv-files)
    - [2.4 Statistics](#2.4-Statistics)
    - [2.5 Plot time series](#2.5-Plot-time-series)
    - [2.6 Compare with measurements](#2.6-Compare-with-measurements)
    - [2.7 Harmonic analysis](#2.7-Harmonic-analysis)
        - [2.7.1 Calculate the anomaly](#2.7.1-Calculate-the-anomaly)
        - [2.7.2 Solve to obtain the coefficients](#2.7.2-Solve-to-obtain-the-coefficients)
        - [2.7.3 Save the amplitudes and phases](#2.7.3-Save-the-amplitudes-and-phases)
        - [2.7.4 Generate the astronomical tide](#2.7.4-Generate-the-astronomical-tide)
        - [2.7.5 Plot and save](#2.7.5-Plot-and-save)
- [3. Maps](#3.-Maps)
    - [3.1 Plot maps](#3.1-Plot-maps)
    - [3.2 Plot statistics](#3.2-Plot-statistics)
    - [3.3 Plot vertical cut](#3.3-Plot-vertical-cut)
        - [3.3.1 Define paths and file names](#3.3.1-Define-paths-and-file-names)
        - [3.3.2 Load or create a new file with a polyline](#3.3.2-Load-or-create-a-new-file-with-a-polyline)
        - [3.3.3 Visualise or define a new polyline on the map](#3.3.3-Visualise-or-define-a-new-polyline-on-the-map)
        - [3.3.4 Run script](#3.3.4-Run-script)

# 1. Import required libraries

In [None]:
import os
from ipyleaflet import Map, TileLayer, DrawControl, GeoJSON, Marker, basemaps, Popup, Polyline, Circle, GeoData, Polygon, LayerGroup
import json
import re
import datetime
import time
from datetime import datetime, timedelta
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize, to_hex
import ipywidgets as widgets
from IPython.display import display
import pandas as pd
import shutil
import subprocess
import sys
import matplotlib as mpl
from folium.plugins import MeasureControl
import glob
import zipfile
import h5py
import requests
import pathlib
from tqdm import tqdm
from IPython.display import Video
import cartopy.crs as ccrs
import cartopy.io.img_tiles as cimgt
from mpl_toolkits.axes_grid1 import make_axes_locatable
from urllib.request import Request, urlopen
from PIL import Image
import io
from math import radians, cos, sin
import matplotlib.colors as mcolors
from io import StringIO
from pathlib import Path
import tempfile
from collections import defaultdict
import csv
from typing import Iterable, Optional
import utide

print("Done.")

# 2. Time series

## 2.1 Convert and merge multiple MOHID time series files to csv

In [None]:
master_dir = os.path.join(os.getcwd(),'res') #files can be inside subfolders of master_dir
filename='FPOLIS.srh'

out_dir   = os.path.join(os.getcwd(),'out')
os.makedirs(out_dir, exist_ok=True)

output_csv = os.path.join(os.getcwd(),out_dir,'FPOLIS.csv') 

script_folder = os.path.join(os.getcwd(), "work", "Merge_TimeSeries")
script_name = os.path.join(script_folder, "merge_timeseries.py")
input_file  = os.path.join(script_folder, "input_merge_timeseries.py")

with open(input_file, 'w', encoding='utf-8') as f:
    f.write(f"master_dir       = r'{master_dir}'\n")
    f.write(f"filename         =r'{filename}'\n")
    f.write(f"output_csv=r'{output_csv}'\n")

try:
    result = subprocess.run(
        ["python", os.path.basename(script_name)],
        cwd=script_folder,
        capture_output=True,
        text=True
    )
    result.check_returncode()
    print("STDOUT:\n", result.stdout)
    print("Completed successfully.")
except subprocess.CalledProcessError as e:
    print("ERROR: exited with code", e.returncode)
    print("---- STDOUT ----")
    print(e.stdout)
    print("---- STDERR ----")
    print(e.stderr)
    raise

print("Done.")

## 2.2 Extract time series from HDF5 files
#Based on Valida4D from MOHID tools (https://github.com/Mohid-Water-Modelling-System/Mohid/tree/master/Software/SmallTools/Valida4D)

### 2.2.1 Read one or multiple MOHID HDF5 files

In [None]:
multiple_files    = True
backup_root       = os.path.join(os.getcwd(),'res')
hdf5_file         =r'Hydrodynamic_2_Surface.hdf5'
start_date_str    = '2025-9-25'
end_date_str      = '2025-9-27'

# ----------------------------------------
def collect_hdf5_paths(root, h5file, sd, ed):
    paths = []
    for entry in os.scandir(root):
        if not entry.is_dir():
            continue
        try:
            day = datetime.strptime(entry.name.split('_')[0], "%Y%m%d").date()
        except Exception:
            continue
        if sd <= day <= ed:
            # Look directly inside the date-folder
            pattern = os.path.join(entry.path, h5file)
            for f in glob.glob(pattern):
                if os.path.isfile(f):
                    paths.append(f)
    return sorted(paths)
# ----------------------------------------

if multiple_files:
    sd = datetime.strptime(start_date_str, "%Y-%m-%d").date()
    ed = datetime.strptime(end_date_str,   "%Y-%m-%d").date()
    
    hdf5_files = collect_hdf5_paths(backup_root, hdf5_file, sd, ed)
else:
    hdf5_files = [os.path.join(backup_root, hdf5_file)]

if not hdf5_files:
    raise RuntimeError(f"No  HDF5s in {hdf5_file} between {start_date_str} and {end_date_str}")

# ----------------------------------------
# GRID
# ----------------------------------------
with h5py.File(hdf5_files[0], "r") as h5f:
    Xr = h5f["Grid"]["Longitude"][:]
    Yr = h5f["Grid"]["Latitude"][:]
    zi = h5f["Grid"]["Bathymetry"][:]

print("Done.")

### 2.2.2 Load or create a new file with monitoring stations 

In [None]:
stations_file = 'stations.csv'

# Load existing stations (format: lon lat name)
if os.path.exists(stations_file):
    stations_df = pd.read_csv(
        stations_file,
        sep=',',
        header=None,
        names=['lon', 'lat', 'station_name'],
        engine='python'
    )
    print(f"Loaded {len(stations_df)} stations from {stations_file}")
else:
    stations_df = pd.DataFrame(columns=['lon', 'lat', 'station_name'])
    print(f"No '{stations_file}' found. Starting with zero stations.")

### 2.2.3 Adjust or define new monitoring stations on the map  
#Move or draw markers on the map to define the monitoring stations

In [None]:
# -------------------------------
# Start timing
# -------------------------------
start_time = time.time()

# -------------------------------
# Assume Xr, Yr, zi defined elsewhere
# -------------------------------
LonGrid = np.array(Xr)
LatGrid = np.array(Yr)

# -------------------------------
# Build discrete colormap
# -------------------------------
_nbins = 10
_bins = None
_discrete_colors = None

def map_value_to_color(value):
    if value == -99:
        return "#ffffff00"
    idx = np.digitize(value, _bins) - 1
    idx = int(np.clip(idx, 0, _nbins - 1))
    return _discrete_colors[idx]

def precompute_color_grid(zi, nbins=10):
    global _bins, _nbins, _discrete_colors
    _nbins = nbins
    valid = zi != -99
    if np.any(valid):
        vmin, vmax = zi[valid].min(), zi[valid].max()
    else:
        vmin, vmax = 0, 1
    _bins = np.linspace(vmin, vmax, nbins + 1)
    cmap = plt.colormaps.get_cmap('viridis')
    _discrete_colors = [mcolors.to_hex(c) for c in cmap(np.linspace(0, 1, nbins))]
    return np.vectorize(map_value_to_color)(zi)

color_mapped_zi = precompute_color_grid(zi)

# -------------------------------
# Create base map
# -------------------------------
output = widgets.Output()
display(output)
m = Map(center=(LatGrid.mean(), LonGrid.mean()), zoom=8)

# -------------------------------
# Split raster into GeoJSON blocks
# -------------------------------
lon_sw = LonGrid[:-1, :-1]; lon_se = LonGrid[:-1, 1:]
lon_ne = LonGrid[1:, 1:];   lon_nw = LonGrid[1:, :-1]
lat_sw = LatGrid[:-1, :-1]; lat_se = LatGrid[:-1, 1:]
lat_ne = LatGrid[1:, 1:];   lat_nw = LatGrid[1:, :-1]

block_size = 10
block_layers = {}

def generate_block_geojson(br, bc):
    features = []
    rows, cols = zi.shape
    i0, i1 = br * block_size, min((br + 1) * block_size, rows)
    j0, j1 = bc * block_size, min((bc + 1) * block_size, cols)
    for i in range(i0, i1):
        for j in range(j0, j1):
            if zi[i, j] == -99:
                continue
            coords = [[
                [float(lon_sw[i, j]), float(lat_sw[i, j])],
                [float(lon_se[i, j]), float(lat_se[i, j])],
                [float(lon_ne[i, j]), float(lat_ne[i, j])],
                [float(lon_nw[i, j]), float(lat_nw[i, j])],
                [float(lon_sw[i, j]), float(lat_sw[i, j])]
            ]]
            feat = {
                "type": "Feature",
                "geometry": {"type": "Polygon", "coordinates": coords},
                "properties": {
                    "fill": map_value_to_color(zi[i, j]),
                    "stroke": "#000000",
                    "fill-opacity": 0.5,
                    "stroke-width": 0.2,
                    "i": i, "j": j
                }
            }
            features.append(feat)
    return {"type": "FeatureCollection", "features": features}

def update_all_blocks():
    for lyr in block_layers.values():
        m.remove_layer(lyr)
    block_layers.clear()
    rows, cols = zi.shape
    n_br = (rows + block_size - 1) // block_size
    n_bc = (cols + block_size - 1) // block_size
    for br in range(n_br):
        for bc in range(n_bc):
            fc = generate_block_geojson(br, bc)
            if not fc["features"]:
                continue
            layer = GeoJSON(
                data=fc,
                style_callback=lambda f: {
                    "fillColor": f["properties"]["fill"],
                    "color": f["properties"]["stroke"],
                    "weight": f["properties"]["stroke-width"],
                    "fillOpacity": f["properties"]["fill-opacity"],
                }
            )
            m.add_layer(layer)
            block_layers[(br, bc)] = layer

update_all_blocks()
print(f"Raster layering time: {time.time() - start_time:.2f} sec")

# -------------------------------
# Station markers
# -------------------------------
markers_dict = {}
marker_counter = 0
preloaded_ids = []   # will capture IDs of originally loaded stations

# Add each preloaded station and record its ID
for _, row in stations_df.iterrows():
    mid = marker_counter
    marker_counter += 1
    preloaded_ids.append(mid)

    lon, lat, nm = row.lon, row.lat, row.station_name
    markers_dict[mid] = {'location': [lat, lon], 'name': nm}

    mkr = Marker(location=[lat, lon], draggable=True)
    mkr.marker_id = mid
    mkr.marker_name = nm

    def on_move(change, m_id=mid):
        new_loc = change['new']
        markers_dict[m_id]['location'] = new_loc
        print(f"Station '{markers_dict[m_id]['name']}' moved to {new_loc}")

    mkr.observe(on_move, names='location')
    m.add_layer(mkr)

# -------------------------------
# Drawing new markers
# -------------------------------
def ask_marker_name(marker, mid):
    name_input = widgets.Text(
        placeholder='Enter station name',
        description='Station name:',
        style={'description_width': 'initial'}
    )
    confirm = widgets.Button(description='Confirm', button_style='success')
    box = widgets.VBox([name_input, confirm])

    def on_confirm(b):
        nm = name_input.value.strip() or f"Station {mid}"
        marker.marker_name = nm
        markers_dict[mid]['name'] = nm
        print(f"New station {mid} named '{nm}'")
        box.close()

    confirm.on_click(on_confirm)
    with output:
        display(box)

def handle_draw(target, action, geo_json):
    global marker_counter
    if action == "created":
        lon, lat = geo_json["geometry"]["coordinates"]
        mid = marker_counter
        marker_counter += 1
        markers_dict[mid] = {'location': [lat, lon], 'name': None}

        mkr = Marker(location=[lat, lon], draggable=True)
        mkr.marker_id = mid
        ask_marker_name(mkr, mid)

        def on_move(change, m_id=mid):
            markers_dict[m_id]['location'] = change["new"]
            print(f"Station {m_id} moved to {change['new']}")

        mkr.observe(on_move, names="location")
        m.add_layer(mkr)

        # remove Leaflet’s default point layer
        for lyr in list(m.layers):
            if isinstance(lyr, GeoJSON) and lyr.data.get("geometry", {}).get("type") == "Point":
                m.remove_layer(lyr)

    elif action == "deleted":
        feats = geo_json.get("features", []) or [geo_json]
        for f in feats:
            mid = f.get("properties", {}).get("marker_id")
            if mid in markers_dict:
                print(f"Deleted station {mid} named '{markers_dict[mid]['name']}'")
                markers_dict.pop(mid)
        print("Remaining stations:", markers_dict)

draw_control = DrawControl(
    polygon={}, polyline={}, rectangle={}, circle={}, circlemarker={},
    marker={"repeatMode": False}
)
draw_control.on_draw(handle_draw)
m.add_control(draw_control)

# -------------------------------
# Save stations, preserving original names/order
# -------------------------------
def save_stations(path):
    """
    Write stations in two phases:
      1) Preloaded stations in their original order with unchanged names
      2) Newly drawn stations appended afterwards
    """
    new_ids = [mid for mid in markers_dict if mid not in preloaded_ids]

    with open(path, 'w') as f:
        # 1) Write preloaded stations first
        for mid in preloaded_ids:
            data = markers_dict.get(mid)
            if not data:
                continue
            lat, lon = data['location']
            name = data['name']
            f.write(f"{lon},{lat},{name}\n")

        # 2) Append any new stations
        for mid in new_ids:
            data = markers_dict[mid]
            lat, lon = data['location']
            name = data['name'] or f"Station_{mid}"
            f.write(f"{lon},{lat},{name}\n")

    print(f"Saved {len(markers_dict)} stations to '{path}'")

save_btn = widgets.Button(description='Save stations', button_style='info')
save_btn.on_click(lambda b: save_stations(stations_file))

with output:
    display(save_btn)

# -------------------------------
# Show the map
# -------------------------------
display(m)

In [None]:
print(markers_dict)

### 2.2.4 Create Input_table.dat

In [None]:
Z_DEPTHS = 0. #depth relative to the surface

stations_df = pd.read_csv(
    stations_file,
    sep=',',
    header=None,
    names=['lon', 'lat', 'station_name'],
    engine='python',
    dtype={'lon': float, 'lat': float, 'station_name': str}
)
print(f"Loaded {len(stations_df)} stations from {stations_file}")

script_folder = os.path.join(os.getcwd(), "work", "valida4D")
script_name = os.path.join(script_folder, "Valida4D.exe")
input_table = os.path.join(script_folder, "Input_table.dat")

float_fmt='{:.6f}'
delimiter = ' ' 
encoding = 'utf-8'  # change if needed

sd = datetime.strptime(start_date_str, "%Y-%m-%d").date()

lines = []
lines.append('SERIE_INITIAL_DATA :' + sd.strftime('%Y %m %d %H %M %S'))
lines.append('<BeginTable>')

for _, row in stations_df.iterrows():
    # Safely format lon/lat and clean station name
    lon = float_fmt.format(row['lon'])
    lat = float_fmt.format(row['lat']) 
    name = str(row['station_name']).strip()
    # Escape delimiter in station name by replacing it with a space
    if delimiter and delimiter in name:
        name = name.replace(delimiter, ' ')
    lines.append(delimiter.join([lon, lat, str(Z_DEPTHS), ',', name]))

lines.append('<EndTable>')

# Atomic write: write to temp file then replace
dest_dir = os.path.dirname(input_table) or '.'

fd, tmp_path = tempfile.mkstemp(dir=dest_dir, prefix='._tmp_dat_', text=True)
os.close(fd)
try:
    with open(tmp_path, 'w', encoding=encoding, newline='\n') as f:
        for line in lines:
            f.write(line + '\n')
    os.replace(tmp_path, input_table)
    print(f"Wrote table to {input_table}")
finally:
    if os.path.exists(tmp_path):
        try:
            os.remove(tmp_path)
        except OSError:
            pass

### 2.2.5 Create InputValida4D.dat

In [None]:
output_dir = os.path.join(os.getcwd(),'out')
output_table = os.path.join(output_dir, "OutTable.dat")

#Get hdf5 variables and units

def decode_attr(x):
    if isinstance(x, (bytes, np.bytes_)):
        return x.decode("utf-8")
    if isinstance(x, (list, tuple, np.ndarray)):
        return tuple(decode_attr(i) for i in x)
    return x

variable = []
with h5py.File(hdf5_files[0], "r") as h5f:
    results = h5f["Results"]
    for name, group in results.items():
        # expected child inside the group
        child_name = f"{name}_00001"
        if child_name in group:
            item = group[child_name]
            units = item.attrs.get("Units", None)
            units = decode_attr(units) if units is not None else None
        else:
            units = None
        variable.append((name, units))
        print(f"{name}  units = {units}")

script_folder = os.path.join(os.getcwd(), "work", "valida4D")
script_name = os.path.join(script_folder, "Valida4D.exe")
input_file  = os.path.join(script_folder, "InputValida4D.dat")
input_table = os.path.join(script_folder, "Input_table.dat")

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

sd = datetime.strptime(start_date_str, "%Y-%m-%d").date()
ed = datetime.strptime(end_date_str,   "%Y-%m-%d").date()

lines = []
lines.append('FIELD4D         : 1')
lines.append('EXTRAPOLATE     : 0')
lines.append('INPUT_TABLE     : ' + input_table)
lines.append('Z_DEPTHS        : ' + str(Z_DEPTHS))
lines.append('X_COLUMN        : 1')
lines.append('Y_COLUMN        : 2')
lines.append('Z_COLUMN        : 3')
lines.append('OUTPUT_TABLE    : ' + output_table)
lines.append('START           :' + sd.strftime('%Y %m %d %H %M %S'))
lines.append('END             :' + ed.strftime('%Y %m %d %H %M %S'))
lines.append('DT              : 3600')

lines.append('<BeginHDF5>')

for filename in hdf5_files:
    # Convert to str and strip whitespace
    fname = str(filename).strip()
    # Optionally ensure consistent path style (uncomment if needed)
    # fname = os.path.normpath(fname)
    lines.append(fname)
lines.append('<EndHDF5>')

col = 4
# variable is a list of tuples, e.g. [("water level", "m"), ("velocity modulus", "m/s")]
for item in variable:
    col += 1
    name, units = item[0], item[1]
    description = item[2] if len(item) > 2 else name
    lines.append('<beginproperty>')
    lines.append(f'NAME        :  {name}')
    lines.append(f'UNITS       :  {units}')
    lines.append(f'DESCRIPTION :  {description}')
    lines.append(f'COLUMN      :  {col}')
    lines.append('<endproperty>')

# Atomic write: write to temp file then replace
dest_dir = os.path.dirname(input_file) or '.'

fd, tmp_path = tempfile.mkstemp(dir=dest_dir, prefix='._tmp_dat_', text=True)
os.close(fd)
try:
    with open(tmp_path, 'w', newline='\n') as f:
        for line in lines:
            f.write(line + '\n')
    os.replace(tmp_path, input_file)
    print(f"Wrote table to {input_file}")
finally:
    if os.path.exists(tmp_path):
        try:
            os.remove(tmp_path)
        except OSError:
            pass


### 2.2.6 Run Valida4D tool

In [None]:
try:
    result = subprocess.run(
        [script_name],
        cwd=script_folder,
        capture_output=True,
        text=True,
        check=True
    )
    
    result.check_returncode()
    print("STDOUT:\n", result.stdout)
    print("Completed successfully.")
except subprocess.CalledProcessError as e:
    print("ERROR: exited with code", e.returncode)
    print("---- STDOUT ----")
    print(e.stdout)
    print("---- STDERR ----")
    print(e.stderr)
    raise

print(f"Wrote results to {output_table}")

### 2.2.7 Convert OutTable.dat to csv files

In [None]:
"""
- Finds the first numeric line with at least 3 numbers and interprets them as YEAR MONTH DAY.
- Finds the table header line (contains the token 'Seconds' and 'Station' or 'StationName' case-insensitive).
- Uses header tokens as CSV column names (keeps order).
- Parses rows after the header, supports variable whitespace and station names containing spaces.
- Adds a Datetime column computed as start_of_day + Seconds (Seconds must be numeric).
- Groups rows by station and writes <Station>.csv in outdir; also writes outdir/groups.txt with detected pre-table tokens.
"""

def read_lines(path):
    return Path(path).read_text(encoding='utf-8').splitlines()

def find_start_date(lines):
    num_re = re.compile(r'([+\-]?\d+(\.\d+)?([Ee][+\-]?\d+)?)')
    for line in lines:
        tokens = re.findall(num_re, line)
        # tokens is list of tuples; we want the first elements
        if len(tokens) >= 3:
            try:
                year = int(float(tokens[0][0]))
                month = int(float(tokens[1][0]))
                day = int(float(tokens[2][0]))
                return datetime(year, month, day)
            except Exception:
                continue
    raise ValueError("Start date not found: expected a line with at least three numeric tokens (year month day).")

def extract_pre_table(lines):
    pre = []
    for line in lines:
        if line.strip().upper().startswith('<BEGINTABLE>') or line.strip().upper() == '<BEGINTABLE>':
            break
        pre.append(line)
    return pre

def detect_header_and_table_start(lines):
    # header must contain 'Seconds' and a token with 'Station' substring (e.g., StationName)
    hdr_re = re.compile(r'\bSeconds\b', re.IGNORECASE)
    station_re = re.compile(r'\bStation', re.IGNORECASE)
    for idx, line in enumerate(lines):
        if hdr_re.search(line) and station_re.search(line):
            # split by whitespace to get header fields (preserve order)
            header_tokens = re.findall(r'\S+', line.strip())
            return header_tokens, idx + 1  # table starts next line
    raise ValueError("Table header not found: expected a line containing 'Seconds' and 'Station'.")

def parse_row_by_header(line, header):
    parts = re.findall(r'\S+', line.strip())
    if not parts:
        return None
    # Strategy: Attempt to map from left to right, but station (last header token) may contain spaces.
    n_hdr = len(header)
    if len(parts) >= n_hdr:
        # assume station (last column) may include remaining tokens beyond n_hdr-1
        mapped = {}
        for i in range(n_hdr - 1):
            mapped[header[i]] = parts[i]
        # station and any trailing tokens
        station_val = ' '.join(parts[n_hdr - 1:])
        mapped[header[-1]] = station_val
        return mapped
    # If fewer tokens than headers, skip row
    return None

def is_numeric(s):
    try:
        float(s)
        return True
    except Exception:
        return False

def parse_table(lines, header, start_idx):
    entries = []
    for line in lines[start_idx:]:
        if not line.strip():
            continue
        mapped = parse_row_by_header(line, header)
        if not mapped:
            continue
        # Seconds must exist and be numeric
        seconds_key = None
        for c in header:
            if c.lower() == 'seconds':
                seconds_key = c
                break
        if not seconds_key:
            raise ValueError("Header missing 'Seconds' column after detection.")
        sec_val = mapped.get(seconds_key, '')
        if not is_numeric(sec_val):
            # try to clean scientific notation like 0.000000000000000E+000
            sec_clean = sec_val.replace('D','E').replace('d','E')
            if not is_numeric(sec_clean):
                continue
            sec_val = sec_clean
            mapped[seconds_key] = sec_val
        # store entry
        entries.append(mapped)
    return entries

def write_station_files(entries, header, start_dt, outdir):
    seconds_key = next(c for c in header if c.lower() == 'seconds')
    station_key = None
    # find header token that contains 'station' ignoring case
    for c in header:
        if 'station' in c.lower():
            station_key = c
            break
    if not station_key:
        raise ValueError("Header missing Station column.")
    grouped = defaultdict(list)
    for row in entries:
        sec = float(row[seconds_key])
        dt = start_dt + timedelta(seconds=sec)
        row_with_dt = dict(row)  # copy
        row_with_dt['Datetime'] = dt.strftime('%Y-%m-%d %H:%M:%S')
        grouped[row[station_key]].append(row_with_dt)

    outdir = Path(outdir)
    outdir.mkdir(parents=True, exist_ok=True)

    # Exclude the Seconds column from output columns
    out_columns = ['Datetime'] + [c for c in header if c != seconds_key]
    
    written = []
    for station, rows in grouped.items():
        safe_name = re.sub(r'[^\w\-_. ]', '_', station).strip().replace(' ', '_')
        fname = outdir / f"{safe_name}.csv"
        with fname.open('w', newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=out_columns)
            writer.writeheader()
            for r in rows:
                # ensure all fields present
                outrow = {col: r.get(col, '') for col in out_columns}
                writer.writerow(outrow)
        written.append(station)
    return written, out_columns

def extract_pre_table_groups(pre_lines):
    # tokens containing letters, deduplicated in order
    tokens = []
    for line in pre_lines:
        clean = re.sub(r'[,:;()\/\[\]]', ' ', line)
        for t in re.findall(r'\S+', clean):
            t_norm = re.sub(r'[^A-Za-z0-9_\-]', '', t)
            if not t_norm:
                continue
            # skip pure numeric tokens
            if re.fullmatch(r'[+\-]?\d+(\.\d+)?([Ee][+\-]?\d+)?', t_norm):
                continue
            tokens.append(t_norm)
    seen = set()
    groups = []
    for t in tokens:
        up = t.upper()
        if up in seen:
            continue
        seen.add(up)
        groups.append(t)
    return groups

def main():

    lines = read_lines(output_table)
    pre_table = extract_pre_table(lines)
    groups = extract_pre_table_groups(pre_table)

    try:
        start_dt = find_start_date(lines)
    except ValueError as e:
        print("Error:", e)
        sys.exit(3)

    try:
        header, table_start_idx = detect_header_and_table_start(lines)
    except ValueError as e:
        print("Error:", e)
        sys.exit(4)

    entries = parse_table(lines, header, table_start_idx)
    if not entries:
        print("No data rows parsed. Check table formatting.")
        sys.exit(5)

    written_stations, out_columns = write_station_files(entries, header, start_dt, output_dir)

    print(f"Wrote {len(written_stations)} station files to {output_dir}.")
    print(f"Detected header columns: {', '.join(header)}.")
    print(f"Output columns (with Datetime): {', '.join(out_columns)}.")
    print(f"Detected pre-table groups: {', '.join(groups) if groups else '(none)'}.")

if __name__ == '__main__':
    main()

## 2.3 Load csv files  

In [None]:
#csv_file_1 = os.path.join(os.getcwd(),'out','PR.csv')
#csv_file_1 = os.path.join(os.getcwd(),'out','FPOLIS.csv')
#csv_file_1 = os.path.join(os.getcwd(),'res','res.csv')
csv_file_1 = os.path.join(os.getcwd(),'res','nivel_itapema.csv')

df_1 = pd.read_csv(csv_file_1)

csv_file_2 = csv_file_1
df_1.head()

In [None]:
#Optional - load a second dataframe if you want a plot with time series from different csv files or compare with measurements
csv_file_2 = os.path.join(os.getcwd(),'res','obs.csv')
df_2 = pd.read_csv(csv_file_2)
df_2.head()

## 2.4 Statistics

In [None]:
df = df_1

out_dir = "out"
os.makedirs(out_dir, exist_ok=True)

csv_path = os.path.join(os.getcwd(),out_dir, "statistics.csv")

def column_stats_to_csv_exclude(
    df: pd.DataFrame,
    out_csv: str,
    exclude: Optional[Iterable[str]] = None,
    columns: Optional[Iterable[str]] = None,
    decimals: Optional[int] = None
) -> pd.DataFrame:
    """
    Compute stats (max, p99, p95, p90, median, mean, min) for numeric columns,
    excluding columns in `exclude`. Save result to CSV and return DataFrame.

    Parameters
    - df: input pandas DataFrame
    - out_csv: path to output CSV file
    - exclude: columns to exclude (e.g., ['x','y','z'])
    - columns: optional list of columns to restrict to before exclusion
    - decimals: optional integer to round results; None means no rounding
    """
    exclude = set(exclude or [])
    if columns is not None:
        cols = [c for c in columns if c in df.columns and c not in exclude]
    else:
        numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
        cols = [c for c in numeric_cols if c not in exclude]

    if not cols:
        raise ValueError("No numeric columns to compute after applying exclusion.")

    stats = {
        "max": df[cols].max(skipna=True),
        "p99": df[cols].quantile(0.99),
        "p95": df[cols].quantile(0.95),
        "p90": df[cols].quantile(0.90),
        "median": df[cols].median(skipna=True),
        "mean": df[cols].mean(skipna=True),
        "min": df[cols].min(skipna=True),
    }

    result = pd.DataFrame(stats, index=cols)
    result = result[["max", "p99", "p95", "p90", "median", "mean", "min"]]
    if decimals is not None:
        result = result.round(decimals)

    result.index.name = "column"
    result.to_csv(out_csv)

    return result

stats_df = column_stats_to_csv_exclude(df, csv_path, exclude=["X","Y","Z"], decimals=2)

print(f"Saved:\n{csv_path}")
stats_df.head()

## 2.5 Plot time series

In [None]:
output_dir = os.path.join(os.getcwd(),'out')
os.makedirs(out_dir, exist_ok=True)

output_file = "timeseries.png"

# Choose your column names (or set variable_name_2 = '' if you only want one trace)
var1 = 'water_level'
var2 = ''  # set to '' if you don't want a second trace

days_between_ticks = 1  # number of days between ticks
mode_var1 ='lines' #lines+markers, lines, markers
mode_var2 ='lines' #lines+markers, lines, markers
color_var1 = 'blue'
color_var2 = 'red'
legend_1 = 'res'
legend_2 = 'obs'

axis_labels = {
  'water_level': 'Water Level (m)',
  'velocity_modulus' : 'Velocity modulus (m/s)',
  'temperature': 'Temperature (°C)'}

date_format     = "%d-%m-%Y"
dpi = 150 

script_folder = os.path.join(os.getcwd(), "work", "Plot_TimeSeries")
script_name = os.path.join(script_folder, "plot_timeseries.py")
input_file  = os.path.join(script_folder, "input_plot_timeseries.py")

config = {
    'csv_file_1':      csv_file_1,
    'csv_file_2':      csv_file_2,
    'output_dir':      output_dir,
    'output_file':     output_file,
    'var1':            var1,
    'var2':            var2,
    'mode_var1':       mode_var1,
    'mode_var2':       mode_var2,
    'color_var1':      color_var1,
    'color_var2':      color_var2,
    'legend_1':        legend_1,
    'legend_2':        legend_2,
    'days_between':    days_between_ticks,
    'axis_label_1':    axis_labels.get(var1),
    'axis_label_2':    axis_labels.get(var2),
    'date_format':     date_format,
    'dpi':             dpi
}

# Write config to input_file
with open(input_file, 'w', encoding='utf-8') as f:
    f.write("# Auto-generated config\n\n")
    for key, val in config.items():
        f.write(f"{key} = {val!r}\n")

print(f"Wrote configuration to {input_file}")

try:
    result = subprocess.run(
        ["python", os.path.basename(script_name)],
        cwd=script_folder,
        capture_output=True,
        text=True
    )
    result.check_returncode()
    print("STDOUT:\n", result.stdout)
    print("Completed successfully.")
except subprocess.CalledProcessError as e:
    print("ERROR: exited with code", e.returncode)
    print("---- STDOUT ----")
    print(e.stdout)
    print("---- STDERR ----")
    print(e.stderr)
    raise

print("Done.")

## 2.6 Compare with measurements

In [None]:
"""
Compare observed and modelled time series by nearest-time matching (±30 min),
compute statistics, save CSV and PNG plots.
Assumes the first column in input DataFrames or CSVs is the time column.
"""
variable = "water_level"

axis_labels = {
  'water_level': 'Water Level (m)',
  'velocity_modulus' : 'Velocity modulus (m/s)',
  'temperature': 'Temperature (°C)'}

# Load modelled and observed data
df_mod = df_1
df_obs = df_2

# Settings
TOLERANCE = pd.Timedelta("30min")
out_dir = "out"
dpi = 150

csv_path = os.path.join(os.getcwd(),out_dir, f"obs_vs_mod_{variable}.csv")
fig_path = os.path.join(os.getcwd(),out_dir, f"obs_vs_mod_{variable}.png")
residuals_path = os.path.join(os.getcwd(),out_dir, f"residuals_{variable}.png")

def compute_metrics(obs_series, mod_series):
    """
    Compute bias, RMSE, Pearson correlation, and R-squared (coefficient of determination)
    """
    diff = mod_series - obs_series

    ss_res = ((diff ** 2).sum())
    ss_tot = (((obs_series - obs_series.mean()) ** 2).sum())
    if ss_tot == 0.0:
        r2 = float("nan")
    else:
        r2 = 1.0 - ss_res / ss_tot

    return {
        "bias": float(diff.mean()),
        "rmse": float(np.sqrt((diff**2).mean())),
        "corr": float(obs_series.corr(mod_series)),
        "r2": float(r2)
    }
def save_plots(df_cmp: pd.DataFrame, out_dir: str = out_dir):
    os.makedirs(out_dir, exist_ok=True)
    if not {"obs", "mod"}.issubset(df_cmp.columns):
        raise ValueError("df_cmp must contain 'obs' and 'mod' columns")
    fig, ax = plt.subplots(figsize=(12, 4))
    df_cmp[["obs", "mod"]].plot(ax=ax)
    ax.set_title(f"Observed vs. Modeled")
    ax.set_ylabel(axis_labels.get(variable))
    fig.tight_layout()
    fig.savefig(fig_path, dpi=dpi)
    plt.close(fig)
    residual = df_cmp["mod"] - df_cmp["obs"]
    fig, ax = plt.subplots(figsize=(12, 2))
    residual.plot(ax=ax, color="black", legend=False)
    ax.axhline(0, color="red", linestyle="--")
    ax.set_title(f"Residual (Model – Obs)")
    ax.set_ylabel("m")
    fig.tight_layout()
    fig.savefig(residuals_path, dpi=dpi)
    plt.close(fig)

def read_firstcol_time(path_or_df, value_name):
    """
    Accepts a path to CSV or a DataFrame.
    Assumes the first column is the time column and the second column (or remaining one)
    is the value column to return as a Series with DatetimeIndex named value_name.
    """
    if isinstance(path_or_df, str):
        df = pd.read_csv(path_or_df, header=0)
    elif isinstance(path_or_df, pd.DataFrame):
        df = path_or_df.copy()
    else:
        raise TypeError("Input must be a file path or a pandas DataFrame")

    if df.shape[1] < 2:
        raise ValueError("Input must have at least two columns (time + value)")

    time_col = df.columns[0]
    df[time_col] = pd.to_datetime(df[time_col])
    series = pd.Series(df[variable].values, index=df[time_col], name=value_name)
    series = series.sort_index()
    return series

def pair_by_nearest(obs_series: pd.Series, mod_series: pd.Series, tolerance: pd.Timedelta = TOLERANCE):
    obs_df = obs_series.rename("obs").reset_index().rename(columns={obs_series.index.name or "index": "time"})
    mod_df = mod_series.rename("mod").reset_index().rename(columns={mod_series.index.name or "index": "time"})
    obs_df["time"] = pd.to_datetime(obs_df["time"])
    mod_df["time"] = pd.to_datetime(mod_df["time"])
    obs_df = obs_df.sort_values("time")
    mod_df = mod_df.sort_values("time")
    merged = pd.merge_asof(
        mod_df,
        obs_df,
        on="time",
        direction="nearest",
        tolerance=tolerance,
        suffixes=("_mod", "_obs")
    )
    merged = merged.dropna(subset=["mod", "obs"])
    if merged.empty:
        return pd.DataFrame(columns=["obs", "mod"]).astype(float)
    paired = merged.set_index("time")[["obs", "mod"]]
    return paired

def main(obs_input, mod_input, out_dir=out_dir, tolerance=TOLERANCE):
    os.makedirs(out_dir, exist_ok=True)
    obs_series = read_firstcol_time(obs_input, value_name="obs")
    mod_series = read_firstcol_time(mod_input, value_name="mod")
    if obs_series.index.tz is not None and mod_series.index.tz is None:
        mod_series = mod_series.tz_localize(obs_series.index.tz)
    elif mod_series.index.tz is not None and obs_series.index.tz is None:
        obs_series = obs_series.tz_localize(mod_series.index.tz)
    paired = pair_by_nearest(obs_series, mod_series, tolerance=tolerance)
    if paired.empty:
        raise RuntimeError("No pairs found within tolerance. Check your indices and tolerance value.")
    metrics = compute_metrics(paired["obs"], paired["mod"])
    paired.to_csv(csv_path)
    save_plots(paired, out_dir=out_dir)
    print("OBS range:", obs_series.index.min(), "→", obs_series.index.max())
    print("MOD range:", mod_series.index.min(), "→", mod_series.index.max())
    print("Paired points:", len(paired))
    print("Metrics (nearest snap):", metrics)
    print(f"Saved:\n{csv_path}\n{fig_path}\n{residuals_path}")
    return metrics, paired

if __name__ == "__main__":
    
    metrics, paired = main(df_obs, df_mod, out_dir=out_dir, tolerance=TOLERANCE)

## 2.7 Harmonic analysis
#Based on a Python distribution of the MatLab package UTide (https://github.com/wesleybowman/UTide)

### 2.7.1 Calculate the anomaly

In [None]:
'''
calculate the deviations of the elevations from their mean (stored in a new column called "anomaly"), 
and then interpolate to fill in the nan values in the anomaly.
'''
obs = df_1
variable = "water_level"

bad = obs[variable] == -99

obs.loc[bad, variable] = np.nan
obs["anomaly"] = obs[variable] - obs[variable].mean()
obs["anomaly"] = obs["anomaly"].interpolate()
print(f"{bad.sum()} points were flagged 'bad' and interpolated")

obs["Datetime"] = pd.to_datetime(obs["Datetime"])
obs = obs.set_index("Datetime")

obs.head()

### 2.7.2 Solve to obtain the coefficients

In [None]:
lat = -27 #define the correct latitude of your data

coef = utide.solve(
    obs.index,
    obs["anomaly"],
    lat=lat,
    method="ols",
    conf_int="MC",
    verbose=True,
)

### 2.7.3 Save the amplitudes and phases

In [None]:
#Save the amplitudes and phases of tidal constituents to a csv file

out_dir = "out"
os.makedirs(out_dir, exist_ok=True)
csv_path = os.path.join(os.getcwd(),out_dir, "tidal_constituents.csv")

names = coef['name']            # array of constituent names
amplitudes = coef['A']         # amplitude (same units as input)
phases_deg = coef['g']         # phase in degrees (astronomical convention)

for nm, A, g in zip(names, amplitudes, phases_deg):
    print(f"{nm:6s}  amplitude = {A:.4f} m  phase = {g:.2f} deg")

df = pd.DataFrame({
    'name': coef['name'],
    'amplitude': pd.Series(coef['A']).round(4),
    'phase_deg': pd.Series(coef['g']).round(2)
})
df.to_csv(csv_path, index=False, encoding='utf-8')

print(f"Saved:\n{csv_path}")

### 2.7.4 Generate the astronomical tide

In [None]:
'''The amplitudes and phases from the fit are now in the coef data structure, 
which can be used directly in the reconstruct function to generate the tides.
'''

tide = utide.reconstruct(obs.index, coef, verbose=True)

### 2.7.5 Plot and save

In [None]:
#Plot and save anomaly, astronomical tide, and residual

out_dir = "out"
os.makedirs(out_dir, exist_ok=True)

fig_path = os.path.join(os.getcwd(),out_dir, "tidal_analysis.png")
csv_path = os.path.join(os.getcwd(),out_dir, "tidal_analysis.csv")

dpi = 150

t = obs.index.to_pydatetime()

fig, (ax0, ax1) = plt.subplots(figsize=(17, 5), nrows=2, sharey=True, sharex=True)

ax0.plot(t, obs.anomaly, label="Data", color="C0")
ax0.plot(t, tide.h, label="Astronomical tide", color="C1")
ax0.set_ylabel("Water level (m)")

residual = obs.anomaly - tide.h
ax1.plot(t, residual, label="Residual", color="C2")
ax1.grid(which="major", axis="y", linestyle="--", color="0.8", linewidth=0.8)

ax1.set_ylabel("Data - Astronomical tide (m)")

fig.legend(ncol=3, loc="upper center");

fig.savefig(fig_path, dpi=dpi)

df = pd.DataFrame({
    'Datetime': t,
    'Anomaly': obs.anomaly.round(2),
    'Astronomical_tide': tide.h.round(2),
    'Rsidual': residual.round(2)
})
df.to_csv(csv_path, index=False, encoding='utf-8')

print(f"Saved:\n{fig_path}\n{csv_path}")

# 3. Maps

## 3.1 Plot maps

In [None]:
# ============================
# SET-UP: Define paths and file names
# ============================

backup_root       = os.path.join(os.getcwd(),'res')
hdf5_file         =r'Hydrodynamic_2_Surface.hdf5'
hdf5_file_vectors =r'Hydrodynamic_2_Surface.hdf5'
start_date_str    = '2025-9-25'
end_date_str      = '2025-9-27'

variable = "velocity modulus"  # Change as needed

out_dir   = os.path.join(os.getcwd(),'out', "maps")
os.makedirs(out_dir, exist_ok=True)

# ============================
# DEFINE VARIABLE-LABEL DICTIONARY
# ============================
variable_label_dict = {
    "velocity modulus": "Velocity Modulus(m/s)",
    "salinity": "Salinity(psu)",
    "temperature": "Temperature(°C)",
    "water level": "Water Level(m)"
}

variable_vector = ["velocity U","velocity V"]

label = variable_label_dict.get(variable, "Unknown Variable")  # Fetch label from dictionary

start_date = datetime.strptime(start_date_str, "%Y-%m-%d").date()
end_date = datetime.strptime(end_date_str, "%Y-%m-%d").date()

# Option to enable or disable vector overlay and image frame saving
show_vectors = True      # Set to False to disable wind vectors in the animation
save_frames = True       # Set to False to disable saving individual image frames

# User-specified parameters for skipping time steps, adjusting extent, vectors, etc.
skip_time = 3           # Sample every nth time step
extent_cells = 1        # Number of extra cells added to the plot extent
increase_zoom_level = 1 # Increase computed zoom level by this amount to improve background image resolution
skip_vector = 5         # Skip factor when plotting vectors (to reduce clutter)
vector_scale = 10       # Scale for the current vector arrows
vector_color = 'white'  # Color for the wind vectors
transparency_factor = 1.
dpi = 150               #specify the DPI
cmap = "jet"       # colour scale (jet, viridis,...)

#Input file to plot_hdf5.py
script_folder = os.path.join(os.getcwd(), "work", "Plot_HDF5")
script_name = os.path.join(script_folder, "plot_hdf5.py")
input_file  = os.path.join(script_folder, "Input_Plot_HDF5.py")

# -----------------------
# WRITE Input_Plot_HDF5.py
# -----------------------
with open(input_file, 'w', encoding='utf-8') as f:
    f.write(f"backup_root       = r'{backup_root}'\n")
    f.write(f"hdf5_file         =r'{hdf5_file}'\n")
    f.write(f"hdf5_file_vectors=r'{hdf5_file_vectors}'\n")
    f.write(f"figures_folder    = r'{out_dir}'\n")
    f.write(f"start_date_str    = '{start_date_str}'\n")
    f.write(f"end_date_str      = '{end_date_str}'\n")
    f.write(f"variable          = '{variable}'\n")
    f.write(f"label             = '{label}'\n")
    f.write(f"variable_vector   = {variable_vector}\n")
    f.write(f"show_vectors      = {show_vectors}\n")
    f.write(f"save_frames       = {save_frames}\n")
    f.write(f"skip_time         = {skip_time}\n")
    f.write(f"extent_cells      = {extent_cells}\n")
    f.write(f"increase_zoom_level = {increase_zoom_level}\n")
    f.write(f"skip_vector       = {skip_vector}\n")
    f.write(f"vector_scale      = {vector_scale}\n")
    f.write(f"vector_color      = '{vector_color}'\n")
    f.write(f"transparency_factor = {transparency_factor}\n")
    f.write(f"dpi               = {dpi}\n")
    f.write(f"cmap               = '{cmap}'\n")
# -----------------------
# RUN THE ANIMATION SCRIPT ONCE
# -----------------------
print("Starting multi-day animation…")
try:
    result = subprocess.run(
        ["python", os.path.basename(script_name)],
        cwd=script_folder,
        capture_output=True,
        text=True
    )
    result.check_returncode()
    print("STDOUT:\n", result.stdout)
    print("Animation completed successfully.")
except subprocess.CalledProcessError as e:
    print("ERROR: plot_hdf5.py exited with code", e.returncode)
    print("---- STDOUT ----")
    print(e.stdout)
    print("---- STDERR ----")
    print(e.stderr)
    raise

print("Done.")

## 3.2 Plot statistics

In [None]:
# ============================
# SET-UP: Define paths and file names
# ============================
backup_root       = os.path.join(os.getcwd(),'res')
hdf5_file         =r'Hydrodynamic_2_Surface.hdf5'
hdf5_file_vectors =r'Hydrodynamic_2_Surface.hdf5'
start_date_str    = '2025-9-25'
end_date_str      = '2025-9-27'

variable = "velocity modulus"  # Change as needed
variable_vector = ["velocity U","velocity V"]

out_dir   = os.path.join(os.getcwd(),'out', "statistics")
os.makedirs(out_dir, exist_ok=True)

percentil = 50 #max = 100, min = 0, median = 50
countour_levels   = '[]' #'[0.1, 0.2]'
vmin = 0.
vmax = 1.0

#if 3D
percentil_map = "surface" #max_value, surface, layer

#if percentil_map = layer
nlayer = 1

# ============================
# DEFINE VARIABLE-LABEL DICTIONARY
# ============================
variable_label_dict = {
    "velocity modulus": "Velocity Modulus(m/s)",
    "salinity": "Salinity(psu)",
    "temperature": "Temperature(°C)",
    "water level": "Water Level(m)"
}

label = variable_label_dict.get(variable, "Unknown Variable")  # Fetch label from dictionary

# User-specified parameters
extent_cells = 1        # Number of extra cells added to the plot extent
increase_zoom_level = 1 # Increase computed zoom level by this amount to improve background image resolution
transparency_factor = 1.
dpi = 150               #specify the DPI
cmap = "jet"       # colour scale (jet, viridis,...)

show_vectors      = True
variable_vector   = ['velocity U', 'velocity V']
skip_vector       = 5
vector_scale      = 10
vector_color      = 'white'

#Input file to plot_hdf5.py
script_folder = os.path.join(os.getcwd(), "work", "Plot_HDF5_Statistics")
script_name = os.path.join(script_folder, "plot_hdf5_statistics.py")
input_file  = os.path.join(script_folder, "input_plot_hdf5_statistics.py")

start_date = datetime.strptime(start_date_str, "%Y-%m-%d").date()
end_date = datetime.strptime(end_date_str, "%Y-%m-%d").date()

# -----------------------
# WRITE input_plot_hdf5_statistics.py
# -----------------------
with open(input_file, 'w', encoding='utf-8') as f:
    f.write(f"backup_root       = r'{backup_root}'\n")
    f.write(f"hdf5_file         =r'{hdf5_file}'\n")
    f.write(f"hdf5_file_vectors=r'{hdf5_file_vectors}'\n")
    f.write(f"out_dir           = r'{out_dir}'\n")
    f.write(f"start_date_str    = '{start_date_str}'\n")
    f.write(f"end_date_str      = '{end_date_str}'\n")
    f.write(f"variable          = '{variable}'\n")
    f.write(f"label             = '{label}'\n")
    f.write(f"percentil         = {percentil}\n")
    f.write(f"countour_levels   = {countour_levels}\n")
    f.write(f"vmin              = {vmin}\n")
    f.write(f"vmax              = {vmax}\n")
    f.write(f"percentil_map     = '{percentil_map}'\n")
    f.write(f"nlayer            = {nlayer}\n")
    f.write(f"extent_cells      = {extent_cells}\n")
    f.write(f"increase_zoom_level = {increase_zoom_level}\n")
    f.write(f"transparency_factor = {transparency_factor}\n")
    f.write(f"dpi               = {dpi}\n")
    f.write(f"cmap              = '{cmap}'\n")
    f.write(f"skip_vector       = {skip_vector}\n")
    f.write(f"vector_scale      = {vector_scale}\n")
    f.write(f"vector_color      = '{vector_color}'\n")
    f.write(f"variable_vector   = {variable_vector}\n")
    f.write(f"show_vectors      = {show_vectors}\n")
    
# -----------------------
# RUN THE SCRIPT
# -----------------------

try:
    result = subprocess.run(
        ["python", os.path.basename(script_name)],
        cwd=script_folder,
        capture_output=True,
        text=True
    )
    result.check_returncode()
    print("STDOUT:\n", result.stdout)
    print("Statistics completed successfully.")
except subprocess.CalledProcessError as e:
    print("ERROR: plot_hdf5_statistics.py exited with code", e.returncode)
    print("---- STDOUT ----")
    print(e.stdout)
    print("---- STDERR ----")
    print(e.stderr)
    raise

print("Done.")

## 3.3 Plot vertical cut

### 3.3.1 Define paths and file names

In [None]:
#backup_root       = os.path.join(os.getcwd(),'res')
backup_root       = r'C:\Users\aquaf\OneDrive\Projetos\Aquaflow\Hidromod\2025\Proj_575_Consulgal_Soyo\Aplica\MOHID_Water\run_cases\Soyo\backup'
hdf5_file         =r'WaterProperties_2.hdf5'
hdf5_file_vectors =r'Hydrodynamic_2.hdf5'
start_date_str    = '2025-1-6'
end_date_str      = '2025-1-7'

variable = "salinity"  # Change as needed
variable_vector = ["velocity U","velocity V"]

out_dir   = os.path.join(os.getcwd(),'out', "vertical_cut")
os.makedirs(out_dir, exist_ok=True)


# ----------------------------------------
def collect_hdf5_paths(root, h5file, sd, ed):
    paths = []
    for entry in os.scandir(root):
        if not entry.is_dir():
            continue
        try:
            day = datetime.strptime(entry.name.split('_')[0], "%Y%m%d").date()
        except Exception:
            continue
        if sd <= day <= ed:
            # Look directly inside the date-folder
            pattern = os.path.join(entry.path, h5file)
            for f in glob.glob(pattern):
                if os.path.isfile(f):
                    paths.append(f)
    return sorted(paths)
# ----------------------------------------


sd = datetime.strptime(start_date_str, "%Y-%m-%d").date()
ed = datetime.strptime(end_date_str,   "%Y-%m-%d").date()

hdf5_files = collect_hdf5_paths(backup_root, hdf5_file, sd, ed)

if not hdf5_files:
    raise RuntimeError(f"No  {hdf5_file} between {start_date_str} and {end_date_str}")
    
# ----------------------------------------
# GRID
# ----------------------------------------
with h5py.File(hdf5_files[0], "r") as h5f:
    Xr = h5f["Grid"]["Longitude"][:]
    Yr = h5f["Grid"]["Latitude"][:]
    zi = h5f["Grid"]["Bathymetry"][:]

print("Done.")

### 3.3.2 Load or create a new file with a polyline 

In [None]:
polyline_file = os.path.join(os.getcwd(),"VerticalCutPath.csv")

# Load existing stations (format: lon lat)
if os.path.exists(polyline_file):
    stations_df = pd.read_csv(
        polyline_file,
        sep=',',
        header=None,
        names=['lon', 'lat'],
        engine='python'
    )
    print(f"Loaded points from {polyline_file}")
else:
    stations_df = pd.DataFrame(columns=['lon', 'lat'])
    with open(polyline_file, mode='w', newline='') as file:
        writer = csv.writer(file)
    print("Starting with zero stations.")

### 3.3.3 Visualise or define a new polyline on the map  


In [None]:
# -------------------------------
# Start timing
# -------------------------------
start_time = time.time()

# -------------------------------
# Assume Xr, Yr, zi defined elsewhere
# -------------------------------
LonGrid = np.array(Xr)
LatGrid = np.array(Yr)

# -------------------------------
# Build discrete colormap
# -------------------------------
_nbins = 10
_bins = None
_discrete_colors = None

def map_value_to_color(value):
    if value == -99:
        return "#ffffff00"
    idx = np.digitize(value, _bins) - 1
    idx = int(np.clip(idx, 0, _nbins - 1))
    return _discrete_colors[idx]

def precompute_color_grid(zi, nbins=10):
    global _bins, _nbins, _discrete_colors
    _nbins = nbins
    valid = zi != -99
    if np.any(valid):
        vmin, vmax = zi[valid].min(), zi[valid].max()
    else:
        vmin, vmax = 0, 1
    _bins = np.linspace(vmin, vmax, nbins + 1)
    cmap = plt.colormaps.get_cmap('viridis')
    _discrete_colors = [mcolors.to_hex(c) for c in cmap(np.linspace(0, 1, nbins))]
    return np.vectorize(map_value_to_color)(zi)

color_mapped_zi = precompute_color_grid(zi)

# -------------------------------
# Create base map
# -------------------------------
output = widgets.Output()
display(output)
m = Map(center=(LatGrid.mean(), LonGrid.mean()), zoom=8)

# -------------------------------
# Split raster into GeoJSON blocks
# -------------------------------
lon_sw = LonGrid[:-1, :-1]; lon_se = LonGrid[:-1, 1:]
lon_ne = LonGrid[1:, 1:];   lon_nw = LonGrid[1:, :-1]
lat_sw = LatGrid[:-1, :-1]; lat_se = LatGrid[:-1, 1:]
lat_ne = LatGrid[1:, 1:];   lat_nw = LatGrid[1:, :-1]

block_size = 10
block_layers = {}

def generate_block_geojson(br, bc):
    features = []
    rows, cols = zi.shape
    i0, i1 = br * block_size, min((br + 1) * block_size, rows)
    j0, j1 = bc * block_size, min((bc + 1) * block_size, cols)
    for i in range(i0, i1):
        for j in range(j0, j1):
            if zi[i, j] == -99:
                continue
            coords = [[
                [float(lon_sw[i, j]), float(lat_sw[i, j])],
                [float(lon_se[i, j]), float(lat_se[i, j])],
                [float(lon_ne[i, j]), float(lat_ne[i, j])],
                [float(lon_nw[i, j]), float(lat_nw[i, j])],
                [float(lon_sw[i, j]), float(lat_sw[i, j])]
            ]]
            feat = {
                "type": "Feature",
                "geometry": {"type": "Polygon", "coordinates": coords},
                "properties": {
                    "fill": map_value_to_color(zi[i, j]),
                    "stroke": "#000000",
                    "fill-opacity": 0.5,
                    "stroke-width": 0.2,
                    "i": i, "j": j
                }
            }
            features.append(feat)
    return {"type": "FeatureCollection", "features": features}

def update_all_blocks():
    for lyr in block_layers.values():
        m.remove_layer(lyr)
    block_layers.clear()
    rows, cols = zi.shape
    n_br = (rows + block_size - 1) // block_size
    n_bc = (cols + block_size - 1) // block_size
    for br in range(n_br):
        for bc in range(n_bc):
            fc = generate_block_geojson(br, bc)
            if not fc["features"]:
                continue
            layer = GeoJSON(
                data=fc,
                style_callback=lambda f: {
                    "fillColor": f["properties"]["fill"],
                    "color": f["properties"]["stroke"],
                    "weight": f["properties"]["stroke-width"],
                    "fillOpacity": f["properties"]["fill-opacity"],
                }
            )
            m.add_layer(layer)
            block_layers[(br, bc)] = layer

update_all_blocks()
print(f"Raster layering time: {time.time() - start_time:.2f} sec")

# -------------------------------
from ipyleaflet import Polyline

# Internal state
polyline_layer = None
polyline_points = []        # list of [lat, lon]
preloaded_point_ids = []    # indexes of preloaded vertices (0..n-1)

# Build initial polyline from stations_df (preserve order)
for idx, row in stations_df.iterrows():
    lon = float(row["lon"])
    lat = float(row["lat"])
    polyline_points.append([lat, lon])
    preloaded_point_ids.append(len(polyline_points) - 1)

# If there are preloaded vertices, create the Polyline layer
def add_or_update_polyline_layer():
    global polyline_layer
    if polyline_layer is not None:
        # update locations in-place to avoid removing/adding layer
        polyline_layer.locations = polyline_points
    else:
        if not polyline_points:
            return
        polyline_layer = Polyline(
            locations=polyline_points,
            color="blue",
            weight=3,
            opacity=0.8
        )
        m.add_layer(polyline_layer)

add_or_update_polyline_layer()

# -------------------------------
# DrawControl handlers: create / edited / deleted
# -------------------------------
def handle_draw_polyline(target, action, geo_json):
    """
    DrawControl callback for polylines.
    - action == "created": geo_json is the created feature GeoJSON
    - action == "edited": geo_json contains features with new coords
    - action == "deleted": geo_json contains deleted features
    """
    global polyline_layer, polyline_points, preloaded_point_ids

    if action == "created":
        # Expect a LineString geometry: coordinates are [[lon, lat], ...]
        geom = geo_json.get("geometry", {})
        if geom.get("type") not in ("LineString", "MultiLineString"):
            print("Created geometry is not a polyline; ignoring.")
            return

        coords = geom["coordinates"]
        # If MultiLineString, take first part
        if geom["type"] == "MultiLineString":
            coords = coords[0] if coords else []

        # Replace current polyline with the newly drawn one.
        polyline_points = [[float(c[1]), float(c[0])] for c in coords]
        # Reset preloaded ids since drawn polyline replaces initial vertices
        preloaded_point_ids = []
        add_or_update_polyline_layer()

        # Remove DrawControl's default temporary point/feature layer(s) if any
        # (We remove only GeoJSON layers that look like the DrawControl "created" temp features)
        for lyr in list(m.layers):
            if isinstance(lyr, GeoJSON):
                # heuristic: DrawControl's created layer often has feature properties like geometry.type
                props = lyr.data.get("features", [lyr.data])[0].get("properties", {})
                if props.get("_leaflet_id") or props.get("feature_type") or lyr.data.get("geometry", {}).get("type") in ("LineString", "MultiLineString"):
                    try:
                        m.remove_layer(lyr)
                    except Exception:
                        pass

        print(f"Created polyline with {len(polyline_points)} points")

    elif action == "edited":
        # GeoJSON may contain multiple features; update the first polyline feature found
        feats = geo_json.get("features", []) or [geo_json]
        updated = False
        for f in feats:
            geom = f.get("geometry", {})
            if geom.get("type") not in ("LineString", "MultiLineString"):
                continue
            coords = geom["coordinates"]
            if geom["type"] == "MultiLineString":
                coords = coords[0] if coords else []
            polyline_points = [[float(c[1]), float(c[0])] for c in coords]
            add_or_update_polyline_layer()
            updated = True
            break
        if updated:
            print(f"Edited polyline now has {len(polyline_points)} points")
        else:
            print("Edited event contained no polyline features")

    elif action == "deleted":
        # If user deleted the polyline via DrawControl, clear internal state
        # geo_json may contain features with ids; we'll just clear in any case
        polyline_points = []
        preloaded_point_ids = []
        if polyline_layer is not None:
            try:
                m.remove_layer(polyline_layer)
            except Exception:
                pass
            polyline_layer = None
        print("Polyline deleted")


draw_control = DrawControl(polygon={}, rectangle={}, circle={}, circlemarker={}, 
    polyline={"shapeOptions": {"color": "#0000FF"}}, marker={})
draw_control.on_draw(handle_draw_polyline)
m.add_control(draw_control)

# -------------------------------
# Save polyline vertices preserving preloaded ordering first
# -------------------------------
def save_polyline(path):
    """
    Save vertices. Writes preloaded vertices first,
    then any remaining points are appended.
    Format per line: lon,lat
    """
    # Build ordered list: first preloaded indexes (if any), then others in polyline order
    if not polyline_points:
        print("No polyline points to save")
        return

    # Determine which indices are preloaded (if preloaded_point_ids is non-empty and within range)
    all_indices = list(range(len(polyline_points)))
    pre = [i for i in preloaded_point_ids if i in all_indices]
    appended = [i for i in all_indices if i not in pre]

    with open(path, "w") as f:
        for i in pre + appended:
            lat, lon = polyline_points[i]
            f.write(f"{lon},{lat}\n")

    print(f"Saved {len(polyline_points)} polyline points to '{path}'")

save_btn_poly = widgets.Button(description="Save polyline", button_style="info")
save_btn_poly.on_click(lambda b: save_polyline(polyline_file))
with output:
    display(save_btn_poly)

# -------------------------------
# Show the map
# -------------------------------
display(m)

### 3.3.4 Run script

In [None]:
# User-specified parameters
contour_levels   = '[]' #'[0.1, 0.2]'
vmin = None # Define None for global color limits
vmax = 20 # Define None for global color limits
extent_cells = 1        # Number of extra cells added to the plot extent
increase_zoom_level = 1 # Increase computed zoom level by this amount to improve background image resolution
transparency_factor = 1.
dpi = 150               #specify the DPI
cmap = "jet"       # colour scale (jet, viridis,...)
save_frames = True       # Set to False to disable saving individual image frames

# Set to True to show velocity vectors, False to hide them.
show_vectors = True

# Quiver visual tuning (adjust interactively if needed)
scale_quiver = 2.0  # larger -> arrows shorter; tune for your units
quiver_width = 0.003
quiver_color = "white"
max_arrows_across = 40
max_arrows_vertical = 20
min_vector_mag = 1e-4  # mask vectors below this magnitude

# ============================
# DEFINE VARIABLE-LABEL DICTIONARY
# ============================
variable_label_dict = {
    "velocity modulus": "Velocity Modulus(m/s)",
    "salinity": "Salinity(psu)",
    "temperature": "Temperature(°C)",
    "water level": "Water Level(m)"
}

label = variable_label_dict.get(variable, "Unknown Variable")  # Fetch label from dictionary

#Input file to plot_hdf5.py
script_folder = os.path.join(os.getcwd(), "work", "Plot_Vertical_Cut")
script_name = os.path.join(script_folder, "plot_hdf5_vertical_cut.py")
input_file  = os.path.join(script_folder, "Input_Plot_HDF5_Cut.py")

start_date = datetime.strptime(start_date_str, "%Y-%m-%d").date()
end_date = datetime.strptime(end_date_str, "%Y-%m-%d").date()

# -----------------------
# WRITE Input_Plot_HDF5_Cut.py
# -----------------------
with open(input_file, 'w', encoding='utf-8') as f:
    f.write(f"backup_root       = r'{backup_root}'\n")
    f.write(f"hdf5_file         =r'{hdf5_file}'\n")
    f.write(f"hdf5_file_vectors =r'{hdf5_file_vectors}'\n")
    f.write(f"figures_folder    = r'{out_dir}'\n")
    f.write(f"csv_file          = r'{polyline_file}'\n")
    f.write(f"start_date_str    = '{start_date_str}'\n")
    f.write(f"end_date_str      = '{end_date_str}'\n")
    f.write(f"variable          = '{variable}'\n")
    f.write(f"label             = '{label}'\n")
    f.write(f"variable_vector   = {variable_vector}\n")
    f.write(f"show_vectors      = {show_vectors}\n")
    f.write(f"save_frames       = {save_frames}\n")
    f.write(f"skip_time         = {skip_time}\n")
    f.write(f"contour_levels    = {contour_levels}\n")
    f.write(f"vmin              = {vmin}\n")
    f.write(f"vmax              = {vmax}\n")
    f.write(f"extent_cells      = {extent_cells}\n")
    f.write(f"increase_zoom_level = {increase_zoom_level}\n")
    f.write(f"transparency_factor = {transparency_factor}\n")
    f.write(f"dpi               = {dpi}\n")
    f.write(f"cmap              = '{cmap}'\n")
    f.write(f"quiver_width      = {quiver_width}\n")
    f.write(f"scale_quiver      = {scale_quiver}\n")
    f.write(f"quiver_color      = '{quiver_color}'\n")
    f.write(f"max_arrows_across = {max_arrows_across}\n")
    f.write(f"max_arrows_vertical = {max_arrows_vertical}\n")
    f.write(f"min_vector_mag     = {min_vector_mag}\n")
    
# -----------------------
# RUN THE SCRIPT
# -----------------------

try:
    result = subprocess.run(
        ["python", os.path.basename(script_name)],
        cwd=script_folder,
        capture_output=True,
        text=True
    )
    result.check_returncode()
    print("STDOUT:\n", result.stdout)
    print("plot_hdf5_vertical_cut completed successfully.")
except subprocess.CalledProcessError as e:
    print("ERROR: plot_hdf5_vertical_cut.py exited with code", e.returncode)
    print("---- STDOUT ----")
    print(e.stdout)
    print("---- STDERR ----")
    print(e.stderr)
    raise

print("Done.")