# Coastal altimetry comparison

In [None]:
!pip install -e /home/jovyan/Robbi/dea-notebooks/Tools/

In [None]:
!pip install pyTMD==2.0.8

In [1]:
%load_ext autoreload
%autoreload 2

import os
import os.path
import sys
import glob
import datetime
import numpy as np
import xarray as xr
import pandas as pd
import geopandas as gpd
from ftplib import FTP


def tpxo8_tides(x, y, time):
    from otps import TimePoint
    from otps import predict_tide

    # Use the tidal model to compute tide heights for each observation:
    print(f"Modelling tides using OTPS and the TPXO8 tidal model")
    datetimes = time.values.astype("M8[s]").astype("O").tolist()
    if len(np.atleast_1d(x)) > 1:
        timepoints = [
            TimePoint(x_i, y_i, d_i) for d_i, x_i, y_i in zip(datetimes, x, y)
        ]
    else:
        timepoints = [TimePoint(x, y, d_i) for d_i in datetimes]
    tide_predictions = predict_tide(timepoints)
    return [i.tide_m for i in tide_predictions]


# import sys
# sys.path.insert(1, '/home/jovyan/Robbi/dea-notebooks/Tools/')
from dea_tools.coastal import model_tides

os.environ["DEA_TOOLS_TIDE_MODELS"] = "/home/jovyan/tide_models_clipped"
os.chdir("/home/jovyan/altimetry/X-TRACK/")

### Download X-TRACK altimetry data

In [None]:
# Connect and list directory contents
ftp = FTP('ftp-access.aviso.altimetry.fr')
ftp.login("Robbi.BishopTaylor@ga.gov.au", "B9TRsr")

# # List all files in directory and return as Python list
# nc_files = ftp.nlst("regional-xtrack-coastal/version_xtrack_l2p_2022/WAUSTRALIA/S3A/SLA")
# nc_files = ftp.nlst("regional-xtrack-coastal/version_xtrack_l2p_2022/EAUSTRALIA/S3A/SLA")
# nc_files = ftp.nlst("regional-xtrack-coastal/version_xtrack_l2p_2022/WAUSTRALIA/TP+J1+J2+J3/SLA")
# nc_files = ftp.nlst("regional-xtrack-coastal/version_xtrack_l2p_2022/EAUSTRALIA/TP+J1+J2+J3/SLA")
# nc_files = ftp.nlst("regional-xtrack-coastal/version_xtrack_l2p_2022/EAUSTRALIA/ERS1+ERS2+ENV+SRL/SLA")
nc_files = ftp.nlst("regional-xtrack-coastal/version_xtrack_l2p_2022/WAUSTRALIA/ERS1+ERS2+ENV+SRL/SLA")  # Missing?
# nc_files = ftp.nlst("regional-xtrack-coastal/version_xtrack_l2p_2022/EAUSTRALIA/TPN+J1N+J2N/SLA")
# nc_files = ftp.nlst("regional-xtrack-coastal/version_xtrack_l2p_2022/WAUSTRALIA/TPN+J1N+J2N/SLA")
# nc_files = ftp.nlst("regional-xtrack-coastal/version_xtrack_l2p_2022/EAUSTRALIA/HY2/SLA")
# nc_files = ftp.nlst("regional-xtrack-coastal/version_xtrack_l2p_2022/WAUSTRALIA/HY2/SLA")
# nc_files = ftp.nlst("regional-xtrack-coastal/version_xtrack_l2p_2022/EAUSTRALIA/GFO/SLA")
# nc_files = ftp.nlst("regional-xtrack-coastal/version_xtrack_l2p_2022/WAUSTRALIA/GFO/SLA")

for nc_file in nc_files:
    filename = nc_file.split("/")[-1]
    
    # If file doesn't already exist
    if not os.path.isfile(filename[0:-5]):

        # Download file
        print(f"Downloading {filename}")
        with open(filename, "wb") as file: 
            ftp.retrbinary(f"RETR {nc_file}", file.write)
            
        # Extract NetCDF
        !unlzma {filename}

In [2]:
# Load Collection 3 summary grid, reproject to Albers
c3_path = "https://data.dea.ga.gov.au/derivative/ga_summary_grid_c3.geojson"
c3_grid = gpd.read_file(c3_path)
xmin, ymin, xmax, ymax = c3_grid.total_bounds

# Choose which data to load
# paths = glob.glob("ctoh.sla.ref.*.nc")  # All files
# paths = glob.glob("ctoh.sla.ref.S3A*.nc")  # Sentinel-3 only
# paths = glob.glob("ctoh.sla.ref.TP+*.nc")  # Topex/Jason-1/Jason-2/Jason-3
# paths = glob.glob("ctoh.sla.ref.TPN*.nc")  # Topex/Jason-1/Jason-2 Interleaved orbit
paths = glob.glob("ctoh.sla.ref.TP*.nc")  # All Topex/Jason-1/Jason-2/Jason-3
# paths = glob.glob("ctoh.sla.ref.ERS1*.nc")  # ERS-1/ERS-2/Envisat/SARAL/AltiKa
# paths = glob.glob("ctoh.sla.ref.HY2*.nc")  # Haiyang-2A
# paths = glob.glob("ctoh.sla.ref.GFO*.nc")  # Geosat Follow On


out = []

for path in paths:

    ds = xr.open_dataset(path, decode_times=False)
    ds_clean = (
        ds[["time", "ocean_tide", "sla", "solid_tide", "mssh"]]
        
        # Subset to study area
        .sel(
            points_numbers=(ds.lon > xmin)
            & (ds.lon < xmax)
            & (ds.lat > ymin)
            & (ds.lat < ymax)
        )
        
        # Combine point and cycle dimensions into a single "z" dim
        # so we can convert to a table-like dataframe
        .stack({"z": ["points_numbers", "cycles_numbers"]})
        .to_dataframe()
        
        # Add satellite series and pass info as a variable
        .assign(pass_number=ds.pass_number,
                satellites=path.split(".")[3])
        
        # Remove any rows with missing Sea Level Anomaly or tide data
        .dropna(how="any", axis=0, subset=["sla", "ocean_tide"])
        .reset_index(drop=True)
    )
    out.append(ds_clean)
    
df = pd.concat(out, axis=0)
df["time"] = datetime.datetime(1950, 1, 1) + pd.to_timedelta(df.time, unit="days")

In [3]:
df_subset = df  #.loc[(df.time >= '2016') & (df.time <= '2022')]
df_subset = df_subset.loc[~df_subset[["time", "lat", "lon"]].duplicated()]
# df_subset = df_subset.iloc[::1000]
# df_subset = df_subset.iloc[0:100000]
df_subset

Unnamed: 0,time,ocean_tide,sla,solid_tide,mssh,lat,lon,points_numbers,cycles_numbers,pass_number,satellites
0,2002-09-27 20:23:31.664169600,-0.0485,0.189382,-0.0780,-12.332647,-45.377102,149.607496,1,1,199,TPN+J1N+J2N
1,2002-10-07 18:22:03.589766400,-0.3380,0.083611,-0.0900,-12.332647,-45.377102,149.607496,1,2,199,TPN+J1N+J2N
2,2002-10-27 14:19:06.590870400,0.1327,-0.043894,-0.0760,-12.332647,-45.377102,149.607496,1,4,199,TPN+J1N+J2N
3,2002-11-06 12:17:37.910112000,0.3488,-0.024077,-0.1020,-12.332647,-45.377102,149.607496,1,5,199,TPN+J1N+J2N
4,2002-11-16 10:16:09.722784000,0.0918,-0.065637,0.0090,-12.332647,-45.377102,149.607496,1,6,199,TPN+J1N+J2N
...,...,...,...,...,...,...,...,...,...,...,...
94608,2022-02-07 07:54:36.364838400,0.7299,0.231943,0.0828,49.455764,-10.786327,127.927499,102,1067,012,TP+J1+J2+J3
94609,2022-02-17 05:53:07.163779200,0.6337,0.056607,0.1601,49.455764,-10.786327,127.927499,102,1068,012,TP+J1+J2+J3
94610,2022-02-27 03:51:39.895660800,0.1788,0.110365,0.0837,49.455764,-10.786327,127.927499,102,1069,012,TP+J1+J2+J3
94611,2022-03-09 01:50:11.483001600,-0.3406,0.112168,-0.0700,49.455764,-10.786327,127.927499,102,1070,012,TP+J1+J2+J3


In [4]:
%%time
tide_df = model_tides(
    x=df_subset.lon,
    y=df_subset.lat,
    time=df_subset.time,
    model=[
        "FES2014",
        "FES2012",
        "TPXO9-atlas-v5",
        "EOT20",
        "HAMTIDE11",
        "GOT4.10",
        # "TPXO8-atlas",
    ],
    mode="one-to-one",
    parallel_splits=5,
    output_format="wide",
).rename({"TPXO9-atlas-v5": "TPXO9", "TPXO8-atlas": "TPXO8"}, axis=1)

# # Add TPXO8 from OTPS
# tide_df["TPXO8"] = tpxo8_tides(x=df_subset.lon, y=df_subset.lat, time=df_subset.time)
tide_df

Modelling tides using FES2014, FES2012, TPXO9-atlas-v5, EOT20, HAMTIDE11, GOT4.10 in parallel


100%|██████████| 30/30 [05:32<00:00, 11.10s/it]  


Converting to a wide format dataframe
CPU times: user 2min 1s, sys: 19.9 s, total: 2min 21s
Wall time: 7min 44s


Unnamed: 0_level_0,Unnamed: 1_level_0,tide_model,EOT20,FES2012,FES2014,GOT4.10,HAMTIDE11,TPXO9
time,x,y,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2002-09-27 20:23:31.664169600,149.607496,-45.377102,-0.066522,-0.039712,-0.052674,-0.043963,-0.069828,-0.040122
2002-10-07 18:22:03.589766400,149.607496,-45.377102,-0.374161,-0.326374,-0.353483,-0.321390,-0.333827,-0.338492
2002-10-27 14:19:06.590870400,149.607496,-45.377102,0.127187,0.128982,0.145942,0.124646,0.130340,0.129942
2002-11-06 12:17:37.910112000,149.607496,-45.377102,0.369705,0.363109,0.371863,0.358172,0.371412,0.357768
2002-11-16 10:16:09.722784000,149.607496,-45.377102,0.069664,0.118472,0.098330,0.087481,0.095575,0.081517
...,...,...,...,...,...,...,...,...
2022-02-07 07:54:36.364838400,127.927499,-10.786327,0.764131,0.774515,0.764769,0.735162,0.679551,0.748499
2022-02-17 05:53:07.163779200,127.927499,-10.786327,0.668440,0.627070,0.652177,0.633678,0.614406,0.662820
2022-02-27 03:51:39.895660800,127.927499,-10.786327,0.215176,0.166697,0.186840,0.234856,0.246582,0.265094
2022-03-09 01:50:11.483001600,127.927499,-10.786327,-0.316740,-0.347720,-0.350907,-0.340373,-0.293227,-0.285295


In [5]:
# Add non-tide corrected SLA data to dataframe
tide_df["sla_notidecorr"] = (df_subset.sla + df_subset.ocean_tide).values

# Add satellite annotation
tide_df["satellites"] = df_subset.satellites.values

# Reshape to long format
tide_df_long = tide_df.melt(
    ignore_index=False,
    id_vars=["satellites", "sla_notidecorr"],
    value_vars=[
        "EOT20",
        "FES2012",
        "FES2014",
        "GOT4.10",
        "HAMTIDE11",
        "TPXO9",
        # "TPXO8",
    ],
    value_name="tide_m",
)

# Re-apply tide correction with each# of our models
tide_df_long["sla_tidecorr"] = tide_df_long.sla_notidecorr - tide_df_long.tide_m
tide_df_long

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,satellites,sla_notidecorr,tide_model,tide_m,sla_tidecorr
time,x,y,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2002-09-27 20:23:31.664169600,149.607496,-45.377102,TPN+J1N+J2N,0.140882,EOT20,-0.066522,0.207404
2002-10-07 18:22:03.589766400,149.607496,-45.377102,TPN+J1N+J2N,-0.254389,EOT20,-0.374161,0.119771
2002-10-27 14:19:06.590870400,149.607496,-45.377102,TPN+J1N+J2N,0.088806,EOT20,0.127187,-0.038381
2002-11-06 12:17:37.910112000,149.607496,-45.377102,TPN+J1N+J2N,0.324723,EOT20,0.369705,-0.044982
2002-11-16 10:16:09.722784000,149.607496,-45.377102,TPN+J1N+J2N,0.026163,EOT20,0.069664,-0.043500
...,...,...,...,...,...,...,...
2022-02-07 07:54:36.364838400,127.927499,-10.786327,TP+J1+J2+J3,0.961843,TPXO9,0.748499,0.213344
2022-02-17 05:53:07.163779200,127.927499,-10.786327,TP+J1+J2+J3,0.690307,TPXO9,0.662820,0.027487
2022-02-27 03:51:39.895660800,127.927499,-10.786327,TP+J1+J2+J3,0.289165,TPXO9,0.265094,0.024071
2022-03-09 01:50:11.483001600,127.927499,-10.786327,TP+J1+J2+J3,-0.228432,TPXO9,-0.285295,0.056863


In [6]:
# Calculate RMSE for every model at each point
tide_df_long["sla_tidecorr_sq"] = tide_df_long[["sla_tidecorr"]] ** 2
df_rmse = (
    np.sqrt(tide_df_long.groupby(["x", "y", "tide_model", "satellites"])[["sla_tidecorr_sq"]].mean())
    .unstack("tide_model")["sla_tidecorr_sq"]
    .reset_index()
)

In [7]:
# Add start and end dates
dates_df = tide_df_long.reset_index().groupby(["x", "y", "satellites"]).agg(start=('time', min), end=('time', max))
dates_df["start"] = dates_df.start.dt.year
dates_df["end"] = dates_df.end.dt.year
df_rmse[["start", "end"]] = dates_df.reset_index(drop=True)

In [8]:
# Export to GeoJSON
gdf = gpd.GeoDataFrame(
    data=df_rmse,
    geometry=gpd.points_from_xy(x=df_rmse.x, y=df_rmse.y),
    crs="EPSG:4326",
)
gdf.to_file("xtrack_rms_tp.geojson")

In [None]:
# import numpy as np
# import pandas as pd
# from pyTMD import compute_tide_corrections

# compute_tide_corrections(
#     x=np.linspace(155, 160, 10),
#     y=np.linspace(-30, -40, 10),
#     delta_time=pd.date_range("2020-01", "2020-02", periods=10),
#     DIRECTORY="/home/jovyan/tide_models_clipped",
#     MODEL="TPXO8-atlas",
#     EPSG=4326,
#     TYPE="drift",
#     TIME="datetime",
#     METHOD="bilinear",
# )

In [None]:
# compute_tide_corrections(
#     x=np.linspace(155, 160, 10),
#     y=np.linspace(-30, -40, 10),
#     delta_time=pd.date_range("2020-01", "2020-02", periods=10),
#     DIRECTORY="/home/jovyan/tide_models_clipped",
#     MODEL="TPXO8-atlas",
#     EPSG=4326,
#     TYPE="drift",
#     TIME="datetime",
#     METHOD="linear",
# )

In [None]:
# %%time
# from pyTMD import compute_tide_corrections

# tides_fes2014 = compute_tide_corrections(
#     x=df_subset.lon,
#     y=df_subset.lat,
#     delta_time=df_subset.time,
#     DIRECTORY="/home/jovyan/tide_models_clipped",
#     MODEL="TPXO8-atlas",
#     EPSG=4326,
#     TYPE="drift",
#     TIME="datetime",
#     METHOD="spline",
#     CUTOFF=np.inf,
# )

In [None]:
# print("Modelling FES2012")
# tides_fes2012 = compute_tide_corrections(
#     x=df_subset.lon,
#     y=df_subset.lat,
#     delta_time=df_subset.time,
#     DIRECTORY="/home/jovyan/gdata1/data/tide_models_clipped/",
#     # MODEL="FES2014",
#     DEFINITION_FILE="/home/jovyan/gdata1/data/tide_models_clipped/model_FES2012.def",
#     EPSG=4326,
#     TYPE="drift",
#     TIME="datetime",
#     CUTOFF=np.inf,
# )

# print("Modelling FES2014")
# tides_fes2014 = compute_tide_corrections(
#     x=df_subset.lon,
#     y=df_subset.lat,
#     delta_time=df_subset.time,
#     DIRECTORY="/gdata1/data/tide_models_clipped/",
#     MODEL="FES2014",
#     EPSG=4326,
#     TYPE="drift",
#     TIME="datetime",
#     CUTOFF=np.inf,
# )

# print("Modelling TPXO9")
# tides_tpxo9 = compute_tide_corrections(
#     x=df_subset.lon,
#     y=df_subset.lat,
#     delta_time=df_subset.time,
#     DIRECTORY="/gdata1/data/tide_models_clipped/",
#     MODEL="TPXO9-atlas-v5",
#     EPSG=4326,
#     TYPE="drift",
#     TIME="datetime",
#     CUTOFF=np.inf,
# )