# Explore SOCIB Model Output
* Access data from the SOCIB THREDDS Server using OpenDAP
* Visualize and explore with Holoviz
* Compare to observational data at a specific time step

In [None]:
import xarray as xr
import hvplot.xarray
from datetime import datetime
import panel as pn

In [None]:
import pandas as pd

# Get the current date and time
stop_time= pd.Timestamp.now()

# Subtract 10 days using pd.DateOffset
start_time = stop_time - pd.DateOffset(days=3)

print(f"Start Time: {start_time}")
print(f"Stop Time: {stop_time}")

In [None]:
dap_url = 'http://thredds.socib.es/thredds/dodsC/operational_models/oceanographical/hydrodynamics/model_run_aggregation/wmop_surface/wmop_surface_best.ncd'

In [None]:
ds = xr.open_dataset(dap_url)

In [None]:
cmin = 18
cmax = 30

In [None]:
%%time
da = ds['temp'].sel(time=slice(start_time,stop_time)).load()
mod_viz = da.hvplot.quadmesh(x='lon_rho', y='lat_rho', rasterize=True, 
                             cmap='turbo', geo=True, tiles='OSM', clim=(cmin, cmax),
                            widgets={'time': pn.widgets.Select})

In [None]:
mod_viz

## Load sensor data

In [None]:
import copernicusmarine
import hvplot.pandas

In [None]:
lon_range = [ds['lon_rho'].min().values, ds['lon_rho'].max().values]
lat_range = [ds['lat_rho'].min().values, ds['lat_rho'].max().values]

In [None]:
var='TEMP'
# Load xarray dataset
df = copernicusmarine.read_dataframe(
    dataset_id="cmems_obs-ins_med_phybgcwav_mynrt_na_irr",
    dataset_part="latest",
    variables=[var],
    start_datetime=start_time,
    end_datetime=stop_time,
    minimum_longitude = lon_range[0],
    maximum_longitude = lon_range[1],
    minimum_latitude = lat_range[0],
    maximum_latitude = lat_range[1],
)

In [None]:
import pandas as pd
import numpy as np

def find_closest_data(df, time, depth, depth_window):
    """
    Finds the closest data points in a DataFrame based on time and depth,
    within a specified depth window.

    Args:
        df (pd.DataFrame): The input DataFrame.
        time (pd.Timestamp or datetime): The target time.
        depth (float): The target depth.
        depth_window (float): The window (plus or minus) to filter the data by depth.

    Returns:
        pd.DataFrame: A DataFrame with the closest data points for each institution,
                      within the specified depth window.
    """
    df['time'] = pd.to_datetime(df['time'])

    # Handle timezone differences
    if df['time'].dt.tz is not None:
        if pd.to_datetime(time).tz is None:
            time = pd.to_datetime(time).tz_localize(df['time'].dt.tz)

    # Filter the DataFrame to include only data within the depth window
    min_depth = depth - depth_window
    max_depth = depth + depth_window
    filtered_df = df[(df['depth'] >= min_depth) & (df['depth'] <= max_depth)].copy()

    # If no data is found within the window, return an empty DataFrame
    if filtered_df.empty:
        return pd.DataFrame(columns=df.columns)

    # Calculate time and depth differences on the filtered data
    filtered_df['time_diff'] = np.abs(filtered_df['time'] - pd.to_datetime(time))
    
    # Sort by time difference to find the closest time for each institution
    df_sorted_time = filtered_df.sort_values(by=['time_diff', 'institution'])

    closest_data = []
    for institution in df_sorted_time['institution'].unique():
        inst_df = df_sorted_time[df_sorted_time['institution'] == institution]
        
        # Get the row with the minimum time difference
        closest_time_row = inst_df.iloc[0].copy()
        
        # Find all rows with that same minimum time difference
        all_closest_time_rows = inst_df[inst_df['time_diff'] == closest_time_row['time_diff']].copy()
        
        # Find the one with the closest depth
        all_closest_time_rows['depth_diff'] = np.abs(all_closest_time_rows['depth'] - depth)
        closest_row = all_closest_time_rows.loc[all_closest_time_rows['depth_diff'].idxmin()]
        
        closest_data.append(closest_row)
        
    result_df = pd.DataFrame(closest_data)
    
    # Clean up temporary columns
    result_df = result_df.drop(columns=['time_diff', 'depth_diff'], errors='ignore')
    
    return result_df

In [None]:
compare_time = stop_time
#compare_time = '2025-10-6 00:00'
depth = 0
depth_window = 4
print(time)

In [None]:
df0  = find_closest_data(df, compare_time, depth, depth_window)

In [None]:
df0

In [None]:
import pandas as pd
import hvplot.pandas  # This registers the .hvplot accessor on pandas DataFrames

def plot_data_on_map_hvplot(df, color_column):
    """
    Plots a DataFrame on a map using hvplot.

    Args:
        df (pd.DataFrame): The input DataFrame.
        color_column (str): The name of the column to use for coloring the dots.

    Returns:
        holoviews.core.overlay.Overlay: A HoloViews object representing the map plot.
    """
    # Use the hvplot.points method to create the plot
    map_plot = df.hvplot.points(
        x='longitude',
        y='latitude',
        geo=True,             # This tells hvplot to treat the data as geographic
        tiles='OSM',          # Use OpenStreetMap as the background tile source
        c=color_column,       # Color the points by the specified column
        hover_cols=[color_column, 'institution', 'time'],  # Add a hover tool to show data
        title=f"Data Locations by {color_column.capitalize()}",
        size=40,
        clim=(cmin, cmax),
        line_color='black',
        cmap='turbo'
    )
    
    return map_plot

# Example usage:
# Assuming 'filtered_df' is your DataFrame after filtering and 'value' is the column to color by.
# plot_data_on_map_hvplot(filtered_df, 'value')

In [None]:
obs_viz = plot_data_on_map_hvplot(df0, 'value')

In [None]:
da = ds['temp'].sel(time=compare_time, method='nearest').load()
mod_viz = da.hvplot.quadmesh(x='lon_rho', y='lat_rho', rasterize=True, 
                             cmap='turbo', geo=True, tiles='OSM', clim=(cmin, cmax))

In [None]:
mod_viz * obs_viz