# Coastal altimetry comparison

In [None]:
!pip install -e /home/jovyan/Robbi/dea-notebooks/Tools/

In [None]:
!pip install pyTMD==2.0.8

In [1]:
%load_ext autoreload
%autoreload 2

import os
import os.path
import sys
import glob
import datetime
import numpy as np
import xarray as xr
import pandas as pd
import geopandas as gpd
from ftplib import FTP


def tpxo8_tides(x, y, time):
    from otps import TimePoint
    from otps import predict_tide

    # Use the tidal model to compute tide heights for each observation:
    print(f"Modelling tides using OTPS and the TPXO8 tidal model")
    datetimes = time.values.astype("M8[s]").astype("O").tolist()
    if len(np.atleast_1d(x)) > 1:
        timepoints = [
            TimePoint(x_i, y_i, d_i) for d_i, x_i, y_i in zip(datetimes, x, y)
        ]
    else:
        timepoints = [TimePoint(x, y, d_i) for d_i in datetimes]
    tide_predictions = predict_tide(timepoints)
    return [i.tide_m for i in tide_predictions]


# import sys
# sys.path.insert(1, '/home/jovyan/Robbi/dea-notebooks/Tools/')
from dea_tools.coastal import model_tides

os.environ["DEA_TOOLS_TIDE_MODELS"] = "/home/jovyan/tide_models_clipped"
os.chdir("/home/jovyan/altimetry/X-TRACK/")

### Download X-TRACK altimetry data

In [None]:
# Connect and list directory contents
ftp = FTP('ftp-access.aviso.altimetry.fr')
ftp.login("Robbi.BishopTaylor@ga.gov.au", "B9TRsr")

# # List all files in directory and return as Python list
# nc_files = ftp.nlst("regional-xtrack-coastal/version_xtrack_l2p_2022/WAUSTRALIA/S3A/SLA")
# nc_files = ftp.nlst("regional-xtrack-coastal/version_xtrack_l2p_2022/EAUSTRALIA/S3A/SLA")
# nc_files = ftp.nlst("regional-xtrack-coastal/version_xtrack_l2p_2022/WAUSTRALIA/TP+J1+J2+J3/SLA")
# nc_files = ftp.nlst("regional-xtrack-coastal/version_xtrack_l2p_2022/EAUSTRALIA/TP+J1+J2+J3/SLA")
# nc_files = ftp.nlst("regional-xtrack-coastal/version_xtrack_l2p_2022/EAUSTRALIA/ERS1+ERS2+ENV+SRL/SLA")
# nc_files = ftp.nlst("regional-xtrack-coastal/version_xtrack_l2p_2022/WAUSTRALIA/ERS1+ERS2+ENV+SRL/SLA")
# nc_files = ftp.nlst("regional-xtrack-coastal/version_xtrack_l2p_2022/EAUSTRALIA/TPN+J1N+J2N/SLA")
# nc_files = ftp.nlst("regional-xtrack-coastal/version_xtrack_l2p_2022/WAUSTRALIA/TPN+J1N+J2N/SLA")
# nc_files = ftp.nlst("regional-xtrack-coastal/version_xtrack_l2p_2022/EAUSTRALIA/HY2/SLA")
# nc_files = ftp.nlst("regional-xtrack-coastal/version_xtrack_l2p_2022/WAUSTRALIA/HY2/SLA")
# nc_files = ftp.nlst("regional-xtrack-coastal/version_xtrack_l2p_2022/EAUSTRALIA/GFO/SLA")
# nc_files = ftp.nlst("regional-xtrack-coastal/version_xtrack_l2p_2022/WAUSTRALIA/GFO/SLA")

for nc_file in nc_files:
    filename = nc_file.split("/")[-1]
    
    # If file doesn't already exist
    if not os.path.isfile(filename[0:-5]):

        # Download file
        print(f"Downloading {filename}")
        with open(filename, "wb") as file: 
            ftp.retrbinary(f"RETR {nc_file}", file.write)
            
        # Extract NetCDF
        !unlzma {filename}

In [None]:
# Load Collection 3 summary grid, reproject to Albers
c3_path = "https://data.dea.ga.gov.au/derivative/ga_summary_grid_c3.geojson"
c3_grid = gpd.read_file(c3_path)
xmin, ymin, xmax, ymax = c3_grid.total_bounds

# Choose which data to load
paths = glob.glob("ctoh.sla.ref.*.nc")  # All files
# paths = glob.glob("ctoh.sla.ref.S3A*.nc")  # Sentinel-3 only
# paths = glob.glob("ctoh.sla.ref.TP+*.nc")  # Topex/Jason-1/Jason-2/Jason-3
# paths = glob.glob("ctoh.sla.ref.TPN*.nc")  # Topex/Jason-1/Jason-2 Interleaved orbit
# paths = glob.glob("ctoh.sla.ref.TP*.nc")  # All Topex/Jason-1/Jason-2/Jason-3
# paths = glob.glob("ctoh.sla.ref.ERS1*.nc")  # ERS-1/ERS-2/Envisat/SARAL/AltiKa
# paths = glob.glob("ctoh.sla.ref.HY2*.nc")  # Haiyang-2A
# paths = glob.glob("ctoh.sla.ref.GFO*.nc")  # Geosat Follow On


out = []

for path in paths:

    ds = xr.open_dataset(path, decode_times=False)
    ds_clean = (
        ds[["time", "ocean_tide", "sla", "solid_tide", "mssh"]]
        
        # Subset to study area
        .sel(
            points_numbers=(ds.lon > xmin)
            & (ds.lon < xmax)
            & (ds.lat > ymin)
            & (ds.lat < ymax)
        )
        
        # Combine point and cycle dimensions into a single "z" dim
        # so we can convert to a table-like dataframe
        .stack({"z": ["points_numbers", "cycles_numbers"]})
        .to_dataframe()
        
        # Add satellite series and pass info as a variable
        .assign(pass_number=ds.pass_number,
                satellites=path.split(".")[3])
        
        # Remove any rows with missing Sea Level Anomaly or tide data
        .dropna(how="any", axis=0, subset=["sla", "ocean_tide"])
        .reset_index(drop=True)
    )
    out.append(ds_clean)
    
df = pd.concat(out, axis=0)
df["time"] = datetime.datetime(1950, 1, 1) + pd.to_timedelta(df.time, unit="days")

In [None]:
df_subset = df  #.loc[(df.time >= '2016') & (df.time <= '2022')]
df_subset = df_subset.loc[~df_subset[["time", "lat", "lon"]].duplicated()]
# df_subset = df_subset.iloc[::1000]
# df_subset = df_subset.iloc[0:100000]
df_subset

In [None]:
%%time
tide_df = model_tides(
    x=df_subset.lon,
    y=df_subset.lat,
    time=df_subset.time,
    model=[
        "FES2014",
        "FES2012",
        "TPXO9-atlas-v5",
        "EOT20",
        "HAMTIDE11",
        "GOT4.10",
        "TPXO8-atlas-v1",
    ],
    mode="one-to-one",
    parallel_splits=200,
    output_format="wide",
).rename(
    {"TPXO9-atlas-v5": "TPXO9", 
     "TPXO8-atlas": "TPXO8", 
     "TPXO8-atlas-v1": "TPXO8",
    },
    axis=1,
)

# # Add TPXO8 from OTPS
# tide_df["TPXO8"] = tpxo8_tides(x=df_subset.lon, y=df_subset.lat, time=df_subset.time)
tide_df

In [None]:
# Add non-tide corrected SLA data to dataframe
tide_df["sla_notidecorr"] = (df_subset.sla + df_subset.ocean_tide).values

# Add satellite annotation
tide_df["satellites"] = df_subset.satellites.values

# Reshape to long format
tide_df_long = tide_df.melt(
    ignore_index=False,
    id_vars=["satellites", "sla_notidecorr"],
    value_vars=[
        "EOT20",
        "FES2012",
        "FES2014",
        "GOT4.10",
        "HAMTIDE11",
        "TPXO9",
        "TPXO8",
    ],
    value_name="tide_m",
)

# Re-apply tide correction with each# of our models
tide_df_long["sla_tidecorr"] = tide_df_long.sla_notidecorr - tide_df_long.tide_m
tide_df_long

In [None]:
# Calculate RMSE for every model at each point
tide_df_long["sla_tidecorr_sq"] = tide_df_long[["sla_tidecorr"]] ** 2
df_rmse = (
    np.sqrt(tide_df_long.groupby(["x", "y", "tide_model", "satellites"])[["sla_tidecorr_sq"]].mean())
    .unstack("tide_model")["sla_tidecorr_sq"]
    .reset_index()
)

In [None]:
# Add start and end dates
dates_df = tide_df_long.reset_index().groupby(["x", "y", "satellites"]).agg(start=('time', min), end=('time', max))
dates_df["start"] = dates_df.start.dt.year
dates_df["end"] = dates_df.end.dt.year
df_rmse[["start", "end"]] = dates_df.reset_index(drop=True)

In [None]:
# Export to GeoJSON
xtrack_rms_gdf = gpd.GeoDataFrame(
    data=df_rmse,
    geometry=gpd.points_from_xy(x=df_rmse.x, y=df_rmse.y),
    crs="EPSG:4326",
)
xtrack_rms_gdf.to_file("xtrack_rms_all.geojson")

## Further analysis

In [3]:
# xtrack_rms_gdf = gpd.read_file("xtrack_rms_all.geojson")
xtrack_rms_gdf

Unnamed: 0,x,y,satellites,EOT20,FES2012,FES2014,GOT4.10,HAMTIDE11,TPXO8,TPXO9,start,end,geometry
0,109.279411,-24.989952,S3A,0.088279,0.090760,0.089911,0.091474,0.091911,0.091466,0.090891,2016,2023,POINT (109.27941 -24.98995)
1,109.279429,-42.012947,ERS1+ERS2+ENV+SRL,0.078003,0.079709,0.079178,0.078494,0.078274,0.077973,0.077991,1992,2016,POINT (109.27943 -42.01295)
2,109.279453,-16.823695,TPN+J1N+J2N,0.092276,0.101060,0.100774,0.102176,0.104261,0.103634,0.103553,2002,2017,POINT (109.27945 -16.82370)
3,109.279509,-17.522447,TPN+J1N+J2N,0.102617,0.108923,0.108161,0.109216,0.109169,0.108955,0.109221,2002,2017,POINT (109.27951 -17.52245)
4,109.279765,-41.080309,HY2,0.073656,0.075364,0.076097,0.073511,0.073595,0.072173,0.073694,2014,2016,POINT (109.27976 -41.08031)
...,...,...,...,...,...,...,...,...,...,...,...,...,...
125678,157.044431,-41.477484,HY2,0.067346,0.069065,0.064542,0.064408,0.068801,0.065989,0.067317,2014,2016,POINT (157.04443 -41.47748)
125679,157.044642,-17.872796,TPN+J1N+J2N,0.070519,0.073841,0.073929,0.072561,0.073149,0.072530,0.073222,2002,2017,POINT (157.04464 -17.87280)
125680,157.044746,-41.312958,S3A,0.074224,0.075504,0.075217,0.077962,0.076889,0.076695,0.076525,2016,2023,POINT (157.04475 -41.31296)
125681,157.044817,-32.870982,TP+J1+J2+J3,0.262174,0.266165,0.265773,0.265585,0.266120,0.265434,0.265827,1993,2022,POINT (157.04482 -32.87098)


In [13]:
xtrack_rms_gdf.iloc[:, 3:10].subtract(xtrack_rms_gdf.iloc[:, 3:10].mean(axis=1), axis=0)

Unnamed: 0,EOT20,FES2012,FES2014,GOT4.10,HAMTIDE11,TPXO8,TPXO9
0,-0.002391,0.000089,-0.000759,0.000804,0.001241,0.000796,0.000221
1,-0.000514,0.001192,0.000660,-0.000023,-0.000243,-0.000545,-0.000527
2,-0.008829,-0.000045,-0.000330,0.001071,0.003156,0.002529,0.002448
3,-0.005420,0.000886,0.000123,0.001178,0.001132,0.000917,0.001184
4,-0.000357,0.001351,0.002084,-0.000502,-0.000418,-0.001839,-0.000319
...,...,...,...,...,...,...,...
125678,0.000565,0.002284,-0.002239,-0.002374,0.002020,-0.000792,0.000536
125679,-0.002302,0.001019,0.001107,-0.000261,0.000327,-0.000291,0.000400
125680,-0.001921,-0.000641,-0.000928,0.001817,0.000744,0.000550,0.000380
125681,-0.003123,0.000868,0.000476,0.000288,0.000823,0.000137,0.000530


In [7]:
 xtrack_rms_gdf.iloc[:, 3:10].mean(axis=1)

0         0.090670
1         0.078518
2         0.101105
3         0.108037
4         0.074013
            ...   
125678    0.066781
125679    0.072822
125680    0.076145
125681    0.265297
125682    0.082656
Length: 125683, dtype: float64

In [None]:
# import numpy as np
# import pandas as pd
# from pyTMD import compute_tide_corrections

# compute_tide_corrections(
#     x=np.linspace(155, 160, 10),
#     y=np.linspace(-30, -40, 10),
#     delta_time=pd.date_range("2020-01", "2020-02", periods=10),
#     DIRECTORY="/home/jovyan/tide_models_clipped",
#     MODEL="TPXO8-atlas",
#     EPSG=4326,
#     TYPE="drift",
#     TIME="datetime",
#     METHOD="bilinear",
# )

In [None]:
# compute_tide_corrections(
#     x=np.linspace(155, 160, 10),
#     y=np.linspace(-30, -40, 10),
#     delta_time=pd.date_range("2020-01", "2020-02", periods=10),
#     DIRECTORY="/home/jovyan/tide_models_clipped",
#     MODEL="TPXO8-atlas",
#     EPSG=4326,
#     TYPE="drift",
#     TIME="datetime",
#     METHOD="linear",
# )

In [None]:
# %%time
# from pyTMD import compute_tide_corrections

# tides_fes2014 = compute_tide_corrections(
#     x=df_subset.lon,
#     y=df_subset.lat,
#     delta_time=df_subset.time,
#     DIRECTORY="/home/jovyan/tide_models_clipped",
#     MODEL="TPXO8-atlas",
#     EPSG=4326,
#     TYPE="drift",
#     TIME="datetime",
#     METHOD="spline",
#     CUTOFF=np.inf,
# )

In [None]:
# print("Modelling FES2012")
# tides_fes2012 = compute_tide_corrections(
#     x=df_subset.lon,
#     y=df_subset.lat,
#     delta_time=df_subset.time,
#     DIRECTORY="/home/jovyan/gdata1/data/tide_models_clipped/",
#     # MODEL="FES2014",
#     DEFINITION_FILE="/home/jovyan/gdata1/data/tide_models_clipped/model_FES2012.def",
#     EPSG=4326,
#     TYPE="drift",
#     TIME="datetime",
#     CUTOFF=np.inf,
# )

# print("Modelling FES2014")
# tides_fes2014 = compute_tide_corrections(
#     x=df_subset.lon,
#     y=df_subset.lat,
#     delta_time=df_subset.time,
#     DIRECTORY="/gdata1/data/tide_models_clipped/",
#     MODEL="FES2014",
#     EPSG=4326,
#     TYPE="drift",
#     TIME="datetime",
#     CUTOFF=np.inf,
# )

# print("Modelling TPXO9")
# tides_tpxo9 = compute_tide_corrections(
#     x=df_subset.lon,
#     y=df_subset.lat,
#     delta_time=df_subset.time,
#     DIRECTORY="/gdata1/data/tide_models_clipped/",
#     MODEL="TPXO9-atlas-v5",
#     EPSG=4326,
#     TYPE="drift",
#     TIME="datetime",
#     CUTOFF=np.inf,
# )