# Hydrographic obs from the M120 cruise and from NEMO

In [None]:
# parameters

esm_vfc_data_dir = "../esm-vfc-data/"
catalog_url = "https://raw.githubusercontent.com/ESM-VFC/esm-vfc-catalogs/master/catalogs/NEMO_ORCA05_FOCI_Test_Minimal.yaml"
obs_data_doi = "10.1594/PANGAEA.868654"

## Tech preamble

In [None]:
# set up intake catalog
import intake
from esmvfc_cattools import fetch_zenodo_data
import os

os.environ["ESM_VFC_DATA_DIR"] = esm_vfc_data_dir

In [None]:
# be able to load data directly from Pangaea.de
from pangaeapy.pandataset import PanDataSet

In [None]:
# set up plotting
import hvplot.pandas
import hvplot.xarray
import geoviews.feature as gf
from cartopy import crs

In [None]:
# install and import a tool for looking up nemo indices
%pip install git+https://git.geomar.de/python/xorca_lonlat2ij.git@v1.0.0
import xorca_lonlat2ij as xll2ij

## Get obs data, extract near-surface measurements, plot positions

In [None]:
obs_data = PanDataSet(obs_data_doi)
obs_df = obs_data.data  # Pandas dataframe

In [None]:
display(obs_data.title)
display(obs_df.head())

In [None]:
near_surface_obs = obs_df.loc[
    obs_df.groupby("Event")["Depth water"].idxmin()
]
near_surface_obs = near_surface_obs.set_index("Event")
near_surface_obs

In [None]:
(
    near_surface_obs.hvplot("Longitude", "Latitude", geo=True, kind="points")
    * gf.coastline
)

## Load catalog and fetch data

In [None]:
model_data_cat = intake.open_catalog(catalog_url)
fetch_zenodo_data(
    model_data_cat["NEMO_ORCA05_FOCI_Test_Minimal_grid_T"]
)
fetch_zenodo_data(
    model_data_cat["NEMO_ORCA05_FOCI_Test_Minimal_mesh_mask"]
)

## Restrict to Atlantic, calc mean SST, plot with obs positions

In [None]:
model_dataset = model_data_cat["NEMO_ORCA05_FOCI_Test_Minimal_grid_T"].to_dask()
model_dataset = model_dataset.set_coords(["nav_lat", "nav_lon"])

# restrict to equatorial Atlantic
# this helps getting rid of non-monotonicity of the coord fields
model_dataset = model_dataset.isel(x=slice(410, 620), y=slice(150, 350))

# need compute / cast to numpy array here in order for datashade to work
# (see https://datashader.org/user_guide/Performance.html)
model_mean_sst = model_dataset.sosstsst.mean("time_counter").compute()

In [None]:
(
    model_mean_sst.hvplot.quadmesh(
        "nav_lon", "nav_lat",
        geo=True, datashade=True)
    * near_surface_obs.hvplot(
        "Longitude", "Latitude",
        geo=True, kind="points", color="red")
    * gf.coastline
)

## Extract model data along ship track

In [None]:
# Need the grid definitions
model_meshmask = model_data_cat["NEMO_ORCA05_FOCI_Test_Minimal_mesh_mask"].to_dask()
model_meshmask = model_meshmask.isel(x=slice(410, 620), y=slice(150, 350))
model_meshmask

In [None]:
positions = list(zip(
    near_surface_obs["Longitude"],
    near_surface_obs["Latitude"]
))

depths = near_surface_obs["Depth water"].to_xarray()
depths

lat_ind, lon_ind = xll2ij.get_ij(
    model_meshmask, positions, 't', xgcm=False, xarray_out=True)
lat_ind = lat_ind.rename({"location": "Event"})
lon_ind = lon_ind.rename({"location": "Event"})

In [None]:
ship_track_data = model_dataset.isel(y=lat_ind, x=lon_ind)
ship_track_data = ship_track_data.where(track_data.votemper != 0)

display(ship_track_data)

In [None]:
ship_track_data_near_surface = ship_track_data.sel(deptht=depths, method="nearest")
ship_track_data_near_surface

## Create some water mass plots for the obs and model data

In [None]:
(
    ship_track_data_near_surface.votemper.mean("time_counter").to_pandas().hvplot()
    * near_surface_obs["Temp"].hvplot()
)

In [None]:
(
    ship_track_data_near_surface.vosaline.mean("time_counter").to_pandas().hvplot()
    * near_surface_obs["Sal"].hvplot()
)

In [None]:
(
    obs_df.hvplot.scatter(
        "Temp", "Sal", 
        datashade=True,
        ylim=[33, 37]
    )
    * near_surface_obs.hvplot.scatter(
        "Temp", "Sal", color="red", alpha=0.2
    )
    * ship_track_data_near_surface.isel(time_counter=10).to_dataframe().hvplot.scatter(
        "votemper", "vosaline", color="green", alpha=0.1
    )
    + ship_track_data.isel(time_counter=10).to_dataframe().hvplot.scatter(
        "votemper", "vosaline", datashade=True,
        ylim=[33, 37]
    )
).cols(1)