# Coastal altimetry tide model rankings

This code compares multiple global ocean tide models against satellite altimetry data, and returns tide model performance and rankings in a standardised format for further analysis.

X-Track coastal altimetry data (v2.1, [10.24400/527896/a01-2022.020](doi.org/10.24400/527896/a01-2022.020)) used in this study were developed, validated by the CTOH/LEGOS, France and distributed by Aviso+.

> Birol, F., N. Fuller, F. Lyard, M. Cancet, F. Niño, C. Delebecque, S. Fleury, F. Toublanc, A. Melet, M. Saraceno, F. Léger, 2017. “Coastal Applications from Nadir Altimetry: Example of the X-TRACK Regional Products.” Advances in Space Research, 2017, 59 (4), p.936-953. doi:10.1016/j.asr.2016.11.005

## Getting started
Set working directory to top level of repo to ensure links work correctly:

In [1]:
cd ../..

/home/jovyan/Robbi/dea-intertidal


Install additional packages directly from the requirements file

In [None]:
pip install -r requirements.in --quiet

### Load packages

In [2]:
%load_ext autoreload
%autoreload 2

import os
import os.path
import sys
import glob
import datetime
import numpy as np
import xarray as xr
import pandas as pd
import geopandas as gpd
from ftplib import FTP


# def tpxo8_tides(x, y, time):
#     from otps import TimePoint
#     from otps import predict_tide

#     # Use the tidal model to compute tide heights for each observation:
#     print(f"Modelling tides using OTPS and the TPXO8 tidal model")
#     datetimes = time.values.astype("M8[s]").astype("O").tolist()
#     if len(np.atleast_1d(x)) > 1:
#         timepoints = [
#             TimePoint(x_i, y_i, d_i) for d_i, x_i, y_i in zip(datetimes, x, y)
#         ]
#     else:
#         timepoints = [TimePoint(x, y, d_i) for d_i in datetimes]
#     tide_predictions = predict_tide(timepoints)
#     return [i.tide_m for i in tide_predictions]


from dea_tools.coastal import model_tides

os.environ["DEA_TOOLS_TIDE_MODELS"] = "/gdata1/data/tide_models_clipped"
altimetry_path = "/gdata1/data/altimetry/X-TRACK/"

# Models to run
models = [
    "FES2022",
    "FES2014",
    "TPXO9-atlas-v5",
    "EOT20",
    "HAMTIDE11",
    "GOT4.10",
    "FES2012",
    "TPXO8-atlas-v1",
]

### Preprocess altimetry data

In [3]:
# Load Collection 3 summary grid, reproject to Albers
c3_path = "https://data.dea.ga.gov.au/derivative/ga_summary_grid_c3.geojson"
c3_grid = gpd.read_file(c3_path)
xmin, ymin, xmax, ymax = c3_grid.total_bounds

# Choose which data to load
paths = glob.glob(f"{altimetry_path}/ctoh.sla.ref.*.nc")  # All files
# paths = glob.glob(f"{altimetry_path}/ctoh.sla.ref.S3A*.nc")  # Sentinel-3 only
# paths = glob.glob(f"{altimetry_path}/ctoh.sla.ref.TP+*.nc")  # Topex/Jason-1/Jason-2/Jason-3
# paths = glob.glob(f"{altimetry_path}/ctoh.sla.ref.TPN*.nc")  # Topex/Jason-1/Jason-2 Interleaved orbit
# paths = glob.glob(f"{altimetry_path}/ctoh.sla.ref.TP*.nc")  # All Topex/Jason-1/Jason-2/Jason-3
# paths = glob.glob(f"{altimetry_path}/ctoh.sla.ref.ERS1*.nc")  # ERS-1/ERS-2/Envisat/SARAL/AltiKa
# paths = glob.glob(f"{altimetry_path}/ctoh.sla.ref.HY2*.nc")  # Haiyang-2A
# paths = glob.glob(f"{altimetry_path}/ctoh.sla.ref.GFO*.nc")  # Geosat Follow On

out = []

for path in paths:
    ds = xr.open_dataset(path, decode_times=False)
    ds_clean = (
        ds[["time", "ocean_tide", "sla", "solid_tide", "mssh"]]
        # Subset to study area
        .sel(
            points_numbers=(ds.lon > xmin)
            & (ds.lon < xmax)
            & (ds.lat > ymin)
            & (ds.lat < ymax)
        )
        # Combine point and cycle dimensions into a single "z" dim
        # so we can convert to a table-like dataframe
        .stack({"z": ["points_numbers", "cycles_numbers"]})
        .to_dataframe()
        # Add satellite series and pass info as a variable
        .assign(pass_number=ds.pass_number, satellites=path.split(".")[3])
        # Remove any rows with missing Sea Level Anomaly or tide data
        .dropna(how="any", axis=0, subset=["sla", "ocean_tide"])
        .reset_index(drop=True)
    )
    out.append(ds_clean)

df = pd.concat(out, axis=0)
df["time"] = datetime.datetime(1950, 1, 1) + pd.to_timedelta(df.time, unit="days")

#### Select subset

In [5]:
# Remove duplicates
df = df.loc[~df[["time", "lat", "lon"]].duplicated()]

# Select subset
# df_subset = df
# df_subset = df_subset.iloc[::1000]
# df_subset = df_subset.iloc[0:100000]
df_subset = df.loc[(df.time >= "2017") & (df.time < "2020")]

# Preview
df_subset

Unnamed: 0,time,ocean_tide,sla,solid_tide,mssh,lat,lon,points_numbers,cycles_numbers,pass_number,satellites
7,2017-01-13 12:19:30.999999961,0.4732,-0.069784,-0.0727,-11.310628,-46.219161,154.160068,1,12,275,S3A
8,2017-03-08 12:19:30.000000046,-0.0540,0.067857,-0.0288,-11.310628,-46.219161,154.160068,1,14,275,S3A
9,2017-05-01 12:19:35.999999869,-0.0106,-0.046348,-0.0275,-11.310628,-46.219161,154.160068,1,16,275,S3A
10,2017-05-28 12:19:41.000000096,0.4354,-0.006784,0.0779,-11.310628,-46.219161,154.160068,1,17,275,S3A
11,2017-06-24 12:19:39.999999861,0.4779,-0.011049,0.2209,-11.310628,-46.219161,154.160068,1,18,275,S3A
...,...,...,...,...,...,...,...,...,...,...,...
56937,2017-04-04 21:24:47.469636950,0.1066,-0.072588,0.0377,7.522210,-10.755479,114.455850,397,536,025,TPN+J1N+J2N
56938,2017-04-14 19:23:19.869975156,-0.1848,0.150388,0.1513,7.522210,-10.755479,114.455850,397,537,025,TPN+J1N+J2N
56939,2017-04-24 17:21:52.222898926,-0.5261,0.192265,0.1370,7.522210,-10.755479,114.455850,397,538,025,TPN+J1N+J2N
56940,2017-05-04 15:20:24.482038943,-0.5045,0.171980,0.0238,7.522210,-10.755479,114.455850,397,539,025,TPN+J1N+J2N


## Tide modelling

In [6]:
%%time
tide_df = model_tides(
    x=df_subset.lon,
    y=df_subset.lat,
    time=df_subset.time,
    model=models,
    mode="one-to-one",
    parallel_splits=100,
    output_format="wide",
)
tide_df

Modelling tides using FES2022, FES2014, TPXO9-atlas-v5, EOT20, HAMTIDE11, GOT4.10, FES2012, TPXO8-atlas-v1 in parallel


100%|██████████| 800/800 [05:02<00:00,  2.64it/s] 


Converting to a wide format dataframe
CPU times: user 1min 30s, sys: 5.78 s, total: 1min 36s
Wall time: 6min 36s


Unnamed: 0_level_0,Unnamed: 1_level_0,tide_model,EOT20,FES2012,FES2014,FES2022,GOT4.10,HAMTIDE11,TPXO8-atlas-v1,TPXO9-atlas-v5
time,x,y,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2017-01-13 12:19:30.999999961,154.160068,-46.219161,0.505858,0.519720,0.508526,0.510016,0.493040,0.505296,0.500784,0.506554
2017-03-08 12:19:30.000000046,154.160068,-46.219161,-0.059726,-0.047878,-0.053851,-0.060296,-0.055963,-0.067257,-0.049282,-0.055673
2017-05-01 12:19:35.999999869,154.160068,-46.219161,0.010163,-0.002777,-0.010197,-0.001777,0.000765,0.015471,-0.002218,0.012460
2017-05-28 12:19:41.000000096,154.160068,-46.219161,0.476280,0.455687,0.453888,0.458890,0.449563,0.467211,0.446363,0.458187
2017-06-24 12:19:39.999999861,154.160068,-46.219161,0.517098,0.519390,0.497131,0.495111,0.488257,0.489188,0.490549,0.487887
...,...,...,...,...,...,...,...,...,...,...
2017-04-04 21:24:47.469636950,114.455850,-10.755479,0.140148,0.137240,0.120577,0.131097,0.130543,0.147495,0.122971,0.141355
2017-04-14 19:23:19.869975156,114.455850,-10.755479,-0.171572,-0.173035,-0.205158,-0.194088,-0.189536,-0.191428,-0.188343,-0.174850
2017-04-24 17:21:52.222898926,114.455850,-10.755479,-0.552427,-0.522095,-0.555211,-0.548502,-0.576325,-0.616551,-0.561351,-0.526781
2017-05-04 15:20:24.482038943,114.455850,-10.755479,-0.501046,-0.530120,-0.531396,-0.537522,-0.535228,-0.533416,-0.517779,-0.512562


In [7]:
# Add non-tide corrected SLA data to dataframe
tide_df["sla_notidecorr"] = (df_subset.sla + df_subset.ocean_tide).values

# Add satellite annotation
tide_df["satellites"] = df_subset.satellites.values

# Reshape to long format
tide_df_long = tide_df.melt(
    ignore_index=False,
    id_vars=["satellites", "sla_notidecorr"],
    value_vars=models,
    value_name="tide_m",
)

# Re-apply tide correction with each of our models
tide_df_long["sla_tidecorr"] = tide_df_long.sla_notidecorr - tide_df_long.tide_m
tide_df_long

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,satellites,sla_notidecorr,tide_model,tide_m,sla_tidecorr
time,x,y,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-01-13 12:19:30.999999961,154.160068,-46.219161,S3A,0.403416,FES2022,0.510016,-0.106600
2017-03-08 12:19:30.000000046,154.160068,-46.219161,S3A,0.013857,FES2022,-0.060296,0.074153
2017-05-01 12:19:35.999999869,154.160068,-46.219161,S3A,-0.056948,FES2022,-0.001777,-0.055171
2017-05-28 12:19:41.000000096,154.160068,-46.219161,S3A,0.428616,FES2022,0.458890,-0.030274
2017-06-24 12:19:39.999999861,154.160068,-46.219161,S3A,0.466851,FES2022,0.495111,-0.028260
...,...,...,...,...,...,...,...
2017-04-04 21:24:47.469636950,114.455850,-10.755479,TPN+J1N+J2N,0.034012,TPXO8-atlas-v1,0.122971,-0.088959
2017-04-14 19:23:19.869975156,114.455850,-10.755479,TPN+J1N+J2N,-0.034412,TPXO8-atlas-v1,-0.188343,0.153931
2017-04-24 17:21:52.222898926,114.455850,-10.755479,TPN+J1N+J2N,-0.333835,TPXO8-atlas-v1,-0.561351,0.227516
2017-05-04 15:20:24.482038943,114.455850,-10.755479,TPN+J1N+J2N,-0.332520,TPXO8-atlas-v1,-0.517779,0.185258


In [8]:
# Calculate RMS for every model at each point
tide_df_long["sla_tidecorr_sq"] = tide_df_long[["sla_tidecorr"]] ** 2
df_rms = (
    np.sqrt(
        tide_df_long.groupby(["x", "y", "tide_model", "satellites"])[
            ["sla_tidecorr_sq"]
        ].mean()
    )
    .unstack("tide_model")["sla_tidecorr_sq"]
    .reset_index()
)

In [9]:
# Add start and end dates
dates_df = (
    tide_df_long.reset_index()
    .groupby(["x", "y", "satellites"])
    .agg(start=("time", min), end=("time", max))
)
dates_df["start"] = dates_df.start.dt.year
dates_df["end"] = dates_df.end.dt.year
df_rms[["start", "end"]] = dates_df.reset_index(drop=True)

# Filter to observations with three years of data
df_rms = df_rms.query("(start == 2017) & (end == 2019)")

  tide_df_long.reset_index()
  tide_df_long.reset_index()


### Process to standard format

In [12]:
combined_df = (
    df_rms.rename_axis("point_id")
    .assign(valid_perc=1.0, statistic="rms")
    .rename({"satellites": "source"}, axis=1)
    .reindex(["x", "y", "valid_perc", "source", "statistic"] + models, axis=1)
)

# Update source
combined_df["source"] = "x-track altimetry (" + combined_df.source + ")"

# Add additional columns
combined_df["min"] = combined_df.loc[:, models].min(axis=1)
combined_df["max"] = combined_df.loc[:, models].max(axis=1)
combined_df["diff"] = combined_df["max"] - combined_df["min"]
combined_df["ave"] = combined_df.loc[:, models].mean(axis=1)
combined_df["std"] = combined_df.loc[:, models].std(axis=1)
combined_df["missing"] = combined_df.loc[:, models].isna().sum(axis=1)

# Calculate ranks and best/worst models
combined_df = pd.concat(
    [combined_df, combined_df.loc[:, models].rank(axis=1, ascending=True).add_prefix("rank_")], axis=1
)
combined_df["top_model"] = combined_df.filter(regex='^rank').idxmin(axis=1).str[5:]
combined_df["worst_model"] = combined_df.filter(regex='^rank').idxmax(axis=1).str[5:]
combined_df


tide_model,x,y,valid_perc,source,statistic,FES2022,FES2014,TPXO9-atlas-v5,EOT20,HAMTIDE11,...,rank_FES2022,rank_FES2014,rank_TPXO9-atlas-v5,rank_EOT20,rank_HAMTIDE11,rank_GOT4.10,rank_FES2012,rank_TPXO8-atlas-v1,top_model,worst_model
point_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,109.279411,-24.989952,1.0,x-track altimetry (S3A),rms,0.082977,0.082808,0.083244,0.082759,0.083382,...,4.0,3.0,5.0,2.0,6.0,7.0,8.0,1.0,TPXO8-atlas-v1,FES2012
4,109.280453,-11.785102,1.0,x-track altimetry (S3A),rms,0.107927,0.108273,0.110986,0.104665,0.113318,...,3.0,4.0,7.0,1.0,8.0,5.0,2.0,6.0,EOT20,HAMTIDE11
5,109.280463,-37.320975,1.0,x-track altimetry (TP+J1+J2+J3),rms,0.099448,0.098609,0.098730,0.098968,0.098322,...,7.0,3.0,4.0,5.0,2.0,8.0,1.0,6.0,FES2012,GOT4.10
6,109.281241,-40.979623,1.0,x-track altimetry (TP+J1+J2+J3),rms,0.069795,0.069626,0.067110,0.068226,0.068437,...,8.0,7.0,1.0,3.0,4.0,5.0,6.0,2.0,TPXO9-atlas-v5,FES2022
8,109.281365,-40.024006,1.0,x-track altimetry (S3A),rms,0.064967,0.065405,0.065518,0.062829,0.066921,...,2.0,3.0,5.0,1.0,7.0,6.0,8.0,4.0,EOT20,FES2012
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54617,157.043790,-30.423198,1.0,x-track altimetry (S3A),rms,0.143866,0.146855,0.147071,0.145730,0.145274,...,2.0,6.0,7.0,4.0,3.0,5.0,8.0,1.0,TPXO8-atlas-v1,FES2012
54618,157.044016,-36.661762,1.0,x-track altimetry (TP+J1+J2+J3),rms,0.124134,0.123549,0.122962,0.121266,0.125809,...,7.0,6.0,4.0,1.0,8.0,5.0,2.0,3.0,EOT20,HAMTIDE11
54622,157.044746,-41.312958,1.0,x-track altimetry (S3A),rms,0.068317,0.068114,0.067022,0.066483,0.065455,...,7.0,6.0,4.0,2.0,1.0,5.0,8.0,3.0,HAMTIDE11,FES2012
54623,157.044817,-32.870982,1.0,x-track altimetry (TP+J1+J2+J3),rms,0.295843,0.295876,0.296271,0.287010,0.295144,...,6.0,7.0,8.0,1.0,4.0,5.0,2.0,3.0,EOT20,TPXO9-atlas-v5


### Export

In [13]:
# Export to GeoJSON
xtrack_rms_gdf = gpd.GeoDataFrame(
    data=combined_df,
    geometry=gpd.points_from_xy(x=combined_df.x, y=combined_df.y),
    crs="EPSG:4326",
)
xtrack_rms_gdf.to_file(f"data/raw/rankings_altimetry_2017-2019.geojson")