# HySwash: A hybrid method for nearshore wave processes

In [22]:
import os
import os.path as op

import numpy as np
import xarray as xr
import pandas as pd

from bluemath_tk.core.io import load_model
from bluemath_tk.datamining.lhs import LHS
from matplotlib import pyplot as plt
import plotly.express as px


import sys

sys.path.insert(0, "..")
from utils.plotting import show_graph_for_different_parameters

root_dir = os.getcwd()
output_dir = op.join(root_dir, "output_Veggy_Hs_mono")
templates_dir = op.join(root_dir, "templates")
export_dir = op.join(root_dir, "exported_Veggy_Hs_mono")
os.makedirs(export_dir, exist_ok=True)

os.environ["OMP_NUM_THREADS"] = "1"

If you want to run this notebook using the pickle files exported in Part 1, execute the code below. Otherwise, you can skip it. Sometimes the download hangs. If it happens, run the cell again

In [7]:
import requests
import tarfile

url = "https://geoocean.sci.unican.es/data/exported_Veggy_Hs_mono.tar.gz"
response = requests.get(url, stream=True)
file = tarfile.open(fileobj=response.raw, mode="r|gz")
file.extractall(path=root_dir)

  file.extractall(path=root_dir)


Load swash_model and mda from a pickle file and postprocessed_output NetCDF file

In [23]:
mda = load_model(op.join(export_dir, "mda_model.pkl"))
swash_model = load_model(op.join(export_dir, "swash_model.pkl"))
depth_file = op.join(templates_dir, "depth.bot")
depth_array = np.loadtxt(depth_file)
postprocessed_output = xr.open_dataset(
    op.join(export_dir, "output_postprocessed_clean.nc")
)

## 5. Reconstruction: Principal Component Analysis (PCA) & Radial Basis Fucntions (RBF)

The reconstruction of wave parameters is carried out by an interpolation technique based on radial basis functions (RBF), a scheme wich is very convenient for scatter and multivariate data. The RBF approximation has been applied successfully in many fields, usually with better results than other interpolation methods (Hardy, 1990).
    
Suppose that $f=f(x)$ is the real-valued function that we want to approximate. We are given M scattered data points $\{x_1,..., x_M\}$ of dimension $\textit{n}$ and the associated real function values $\{f_1, ..., f_M\}$, being $f_i = f(x_j), j = 1,...,M$. The RBF interpolation method consists of a weighted sum of radially symmetric basic functions located at the data points. The approximation function is assumed to be of the form:
$$RBF(x) = p(x) + \sum\limits_{j=1}^M a_j\Phi{\large (}{\large \parallel}{x - x_j}{\large \parallel}{\large )}$$

### Hs:  Spatial Reconstruction

In [None]:
from bluemath_tk.datamining.pca import PCA
from bluemath_tk.interpolation.rbf import RBF

postprocessed_output = postprocessed_output.copy(deep=True)

# Apply PCA to the postprocessed output
pca = PCA()
_pcs_ds = pca.fit_transform(
    data=postprocessed_output,
    vars_to_stack=["Hs"],
    coords_to_stack=["Xp"],
    pca_dim_for_rows="case_num",
    value_to_replace_nans={"Hs": 0.0},
)

# Apply RBF reconstruction
rbf = RBF()
rbf.fit(
    subset_data=mda.centroids.iloc[postprocessed_output["case_num"].values, :],
    target_data=pca.pcs_df,
)

pca.save_model(
    model_path=op.join(export_dir, "pca_model.pkl"),
)
rbf.save_model(
    model_path=op.join(export_dir, "rbf_model.pkl"),
)

The process PCA + RBF takes 2 hours. To avoid run them again we can load the models from the exported folder

In [6]:
pca = load_model(op.join(export_dir, "pca_model.pkl"))
rbf = load_model(op.join(export_dir, "rbf_model.pkl"))

Visualize the effect of the plants on Hs

In [None]:
variables_to_analyse_in_metamodel = ["Hs", "Hs_L0", "Wv", "hv", "Nv"]
lhs_parameters = {
    "num_dimensions": 5,
    "num_samples": 11000,
    "dimensions_names": variables_to_analyse_in_metamodel,
    "lower_bounds": [0.5, 0.003, 0, 0, 0],
    "upper_bounds": [3, 0.01, 200, 1.5, 1000],
}

# To avoid excessive logging, you can disable the logger for RBF
rbf.logger.disabled = True
pca.logger.disabled = True

show_graph_for_different_parameters(
    pca=pca, rbf=rbf, lhs_parameters=lhs_parameters, depthfile=depth_file
)

interactive(children=(FloatSlider(value=0.6640121680248141, description='Hs', max=3.0, min=0.5, step=0.25), Fl…

<function utils.plotting.show_graph_for_different_parameters.<locals>.update_plot(Hs, Hs_L0, Wv, hv, Nv)>

### RunUp: Singular Reconstruction
We reconstruct the RunUp using the RBF technique for all the LHS combinations. First, we fit the RBF function with the RunUp simulations output.

In [None]:
Ru2=postprocessed_output[["Ru2"]].Ru2.values
# Convert Ru2 numpy array to DataFrame

rbf_Ru = RBF()
Ru2_df = pd.DataFrame(Ru2, columns=["Ru2"])
rbf_Ru.fit(
    subset_data=mda.centroids.iloc[postprocessed_output["case_num"].values, :],
    target_data=Ru2_df,
)


We predict the RunUp fixing Hs and Hs_L0. Please, choose the Hs and Hs_L0:

In [24]:
Hs=1.5
Hs_L0=0.005

In [30]:
data=mda.centroids.iloc[postprocessed_output.case_num.values].copy()

rbf_Ru=load_model(op.join(export_dir, "rbf_Ru_model.pkl"))
variables_to_analyse_in_metamodel = ["Wv","hv","Nv"]
lhs_3_parameters = {
    "num_dimensions": 3,
    "num_samples": 500,
    "dimensions_names": variables_to_analyse_in_metamodel,
    "lower_bounds": [0, 0, 0],
    "upper_bounds": [200, 1.5, 1000],
}
lhs_3_parameters = LHS(
    num_dimensions=lhs_parameters.get("num_dimensions"),
)

df_3_parameters = lhs_3_parameters.generate(
    dimensions_names=lhs_parameters.get("dimensions_names"),
    lower_bounds=lhs_parameters.get("lower_bounds"),
    upper_bounds=lhs_parameters.get("upper_bounds"),
    num_samples=lhs_parameters.get("num_samples"),
)

# Add Hs and Hs_Lo to the lhs dataset
df_3_parameters["Hs"] = Hs
df_3_parameters["Hs_L0"] = Hs_L0
RunUp_3_parameters=rbf_Ru.predict(df_3_parameters[["Hs","Hs_L0","Wv", "hv", "Nv"]])
df_3_parameters["RunUp"] = RunUp_3_parameters
fig = px.scatter_3d(df_3_parameters, x='Nv', y='Wv', z='hv', color='RunUp',
                    labels={'Nv': 'Nv', 'Wv': 'Wv', 'hv': 'hv', 'RunUp': 'RunUp'},
                    title=f'3D Scatter Plot of RunUp vs Nv, Wv, and hv with Hs: {Hs} and Hs_L0: {Hs_L0}'
)
fig.update_traces(marker=dict(size=3))
fig.show()



The NumPy module was reloaded (imported a second time). This can in some cases result in small but subtle issues and is discouraged.



In [None]:
# Uncomment the following lines if you want to run the prediction on the original lhs data
# lhs_data=mda._data
# lhs_data = lhs_data.reset_index(drop=True)
# RunUp=rbf_Ru.predict(lhs_data[["Hs","Hs_L0","Wv", "hv", "Nv"]])
# lhs_data["RunUp"] = RunUp
# lhs_data
# fig = px.scatter_3d(lhs_data, x='Nv', y='Wv', z='hv', color='RunUp',
#                     labels={'Nv': 'Nv', 'Wv': 'Wv', 'hv': 'hv', 'RunUp': 'RunUp'},
#                     title='3D Scatter Plot of RunUp vs Nv, Wv, and hv',
# )
# fig.update_traces(marker=dict(size=3))
# fig.show()