# Subsetting STOFS-3D-Atl field2d.nc Files 
We are subsetting abd visualizing the forecast data from STOFS-3D-Atl data.

To begin, load the environment. `source /nhc/Atieh.Alipour/environment/miniconda3/bin/activate  env_subsetting` 

## Modifying Node Connectivity and Dimension Names in filed2d.nc Files 

The filed2d.nc files exhibit different node connectivity, possibly due to some masking of the original node connectivity files. To facilitate data subsetting using the Thalassa package, we need to make adjustments to the node connectivities and certain dimension names.

In [1]:
import dask
import geoviews as gv
import holoviews as hv
import numcodecs
import numpy as np
import pandas as pd
import shapely
import xarray as xr
import matplotlib.pyplot as plt
import s3fs  # Importing the s3fs library for accessing S3 buckets
import time  # Importing the time library for recording execution time
import shapely  # Importing shapely for geometric operations 
import thalassa  # Importing thalassa library for STOFS data analysis
from thalassa import api  # Importing thalassa API for data handling
from thalassa import normalization
from thalassa import utils
from holoviews import opts as hvopts
from holoviews import streams
from holoviews.streams import PointerXY
from holoviews.streams import Tap
import bokeh.plotting as bp



In [2]:
def read_netcdf_from_s3(bucket_name, key):
    """
    Function to read a NetCDF file from an S3 bucket using thalassa API.
    
    Parameters:
    - bucket_name: Name of the S3 bucket
    - key: Key/path to the NetCDF file in the bucket
    
    Returns:
    - ds: xarray Dataset containing the NetCDF data
    """
    s3 = s3fs.S3FileSystem(anon=True)  # Enable anonymous access to the S3 bucket
    url = f"s3://{bucket_name}/{key}"
    ds = xr.open_dataset(s3.open(url, 'rb'), drop_variables=['nvel'])  # Open NetCDF dataset and drop 'nvel' variable
    return ds

In [3]:
def normalize_data(ds, bucket_name, base_key, filename, date):
    """
    Function to modify/normalize a dataset using the Thalassa package.

    Parameters:
    - ds: xarray Dataset containing the data
    - bucket_name: Name of the S3 bucket
    - base_key: Base key for the dataset in the S3 bucket
    - filename: Original filename to be replaced
    - date: Date string for the new filename

    Returns:
    - normalized_ds: Thalassa dataset ready for cropping or visualizing
    """

    if 'element' in ds:
        normalized_ds = thalassa.normalize(ds)
    else:
        key = f'{base_key}/{filename}'
        ds_with_element_key = key.replace(filename, f'schout_adcirc_{date}.nc')
        ds_with_element = read_netcdf_from_s3(bucket_name, ds_with_element_key)  # Read NetCDF data from S3 bucket

        # Modify the field2d.nc file based on schout_adcirc.nc file
        ds = ds.rename({'nSCHISM_hgrid_node': 'node', 'nSCHISM_hgrid_face': 'nele', 'nMaxSCHISM_hgrid_face_nodes': 'nvertex'})
        ds = ds.drop_dims('nele')  # Drop nele variable if it exists
        ds['nele'] = ds_with_element['nele']
        ds['nvertex'] = ds_with_element['nvertex']
        ds['element'] = ds_with_element['element']
        ds = ds.rename({'SCHISM_hgrid_node_x': 'x', 'SCHISM_hgrid_node_y': 'y'})

        # Normalize data
        normalized_ds = thalassa.normalize(ds)

    return normalized_ds

In [4]:
def subset_thalassa(ds, box):
    """
    Function to subset a thalassa Dataset based on a bounding box using shapely.
    
    Parameters:
    - ds: thalassa Dataset containing the data
    - box: Tuple representing the bounding box (x_min, x_max, y_min, y_max)
    
    Returns:
    - new_ds: Subset of the input dataset within the specified bounding box
    """
    bbox = shapely.box(box[0], box[2], box[1], box[3])  # Create a shapely box from the bounding box coordinates
    new_ds = thalassa.crop(ds, bbox)  # Crop the dataset using the bounding box
    return new_ds


In [5]:

def save_subset_to_netcdf(xarray_ds, output_file):
    """
    Function to save a subset of an xarray Dataset to a NetCDF file.
    
    Parameters:
    - xarray_ds: Subset of the xarray Dataset
    - output_file: Path to save the output NetCDF file
    """
    xarray_ds.to_netcdf(output_file)  # Save the subset to a NetCDF file



# 1- Read and Subet data on the Fly

The following lines of code read data, normalize it, and subset the data. 

Skip saving the data if you don't need to save the subset data and just want to visualize.

Change the date and filename to explore different dates

In [6]:
# Reading data

start_time = time.time()  # Record the start time

bucket_name = 'noaa-nos-stofs3d-pds'
#key = 'STOFS-3D-Atl/stofs_3d_atl.20240325/schout_adcirc_20240325.nc'
date= '20240325'
base_key = f'STOFS-3D-Atl/stofs_3d_atl.{date}'
#Possible filenames: filenames = ['stofs_3d_atl.t12z.f001_024.field2d.nc', 'stofs_3d_atl.t12z.f025_048.field2d.nc', 'stofs_3d_atl.t12z.n001_024.field2d.nc']
filename = 'stofs_3d_atl.t12z.f001_024.field2d.nc'
key = f'{base_key}/{filename}'
dataset = read_netcdf_from_s3(bucket_name, key)  # Read NetCDF data from S3 bucket

end_time = time.time()  # Record the end time
execution_time = end_time - start_time  # Calculate execution time
print(f"Execution time for reading data: {execution_time} seconds")  # Print execution time


Execution time for reading data: 9.974045276641846 seconds


In [7]:
#Prepare data for subsetting and visualization

start_time = time.time()  # Record the start time

normalize_dataset = normalize_data(dataset, bucket_name, base_key, filename, date)
end_time = time.time()  # Record the end time
execution_time = end_time - start_time  # Calculate execution time
print(f"Execution time for normalizing: {execution_time} seconds")  # Print execution time


Execution time for normalizing: 8.274687767028809 seconds


In [8]:
# Subset Data

start_time = time.time()  # Record the start time

# Define the bounding box
box = (-76.6, -76.4, 39.1, 39.3)

ds2 = subset_thalassa(normalize_dataset, box)  # Subset the thalassa dataset

end_time = time.time()  # Record the end time
execution_time = end_time - start_time  # Calculate execution time
print(f"Execution time for subsetting: {execution_time} seconds")  # Print execution time


Execution time for subsetting: 2.465284585952759 seconds


In [9]:
# Skip this part
#Change to True if you want to save subset data

if False:
    start_time = time.time()  # Record the start time
    output_file = 'stofs3D_subset_test.nc'
    save_subset_to_netcdf(ds2, output_file)  # Save the subset to a NetCDF file
    end_time = time.time()  # Record the end time
    execution_time = end_time - start_time  # Calculate execution time
    print(f"Execution time for writing: {execution_time} seconds")  # Print execution time


# 2- Estimate Current Magnitude and Direction

One of the variables that we are interested in is current. We want to visualize both current magnitude and direction. 

Here, we first estimate these values for the subset data and save them in the same dataset.

In [10]:
ds2

In [11]:
# Calculate magnitude using Pythagorean theorem
ds2['uv_mag_surface'] = np.sqrt(ds2['uvel_surface']**2 + ds2['vvel_surface']**2)

# Calculate angle in radians using arctangent (atan2)
# calculates the angle relative to the positive y-axis 
angle_rad = np.pi / 2 - np.arctan2(-ds2['vvel_surface'], -ds2['uvel_surface'])

# Convert angle from radians to degrees
ds2['uv_angle_deg'] = np.degrees(angle_rad)
#ds2['uv_angle_deg'] = angle_rad

In [12]:
ds2

# 3- Plotting

Plotting different variables of interest.

In [15]:

#ds2 = xr.open_dataset('./20240328/stofs_3d_atl.t12z.f025_048.field2d.nc') If you are reading data from your local machine
hv.extension("bokeh")



In [16]:
#vectorfield = gv.VectorField((ds['lon'], ds['lon'], 1000 * ds['uv_ang_surface'][1,:], ds['uv_mag_surface'][1,:]))


variable, layer, timestamp = "uv_mag_surface", None, ds2.time.values[4]


# The trimesh is the most basic object. This is what you need to create all the others graphs
# It is on this object that you specify the timestamp and/or the layer.
trimesh = api.create_trimesh(ds2.sel(time=timestamp), variable=variable)

# The wireframe is the representation of the mesh
wireframe = api.get_wireframe(trimesh)

# The tiles is using the tiling service from Open Street maps
tiles =  api.get_tiles() 

# The raster object is the basic Map that visualizes the variable. 
# You can specify things like the colorbar limits and/or the extents
#raster = api.get_raster(trimesh, clim_min=0, clim_max=15)
raster = api.get_raster(trimesh)

# The pointer/tap timeseries extract the timeseries of a specific node from the xr.Dataset and visualize it.
pointer_dmap = api.get_pointer_timeseries(ds=ds2, variable=variable, source_raster=raster)

# Create and customize the magnitude timeseries plot
tap_dmap_uv_mag = api.get_tap_timeseries(ds=ds2, variable=variable, source_raster=raster)
tap_dmap_uv_mag.opts(
    width=250, height=250, title="Magnitude",
    xlabel="Time",  # Use default if units not present
    ylabel="velocity (m/s)"
)

# Create and customize the angle timeseries plot
tap_dmap_uv_ang = api.get_tap_timeseries(ds=ds2, variable='uv_angle_deg', source_raster=raster)
tap_dmap_uv_ang.opts(
    width=250, height=250, title="Angle",
    xlabel="Time",  # Use default if units not present
    ylabel="Angle (degrees)"
)

lat_br_mid =  39.217192
lon_br_mid = -76.528318
Key_bridge = ( lon_br_mid,lat_br_mid , 'KeyBrj')
kb = gv.Points([Key_bridge], vdims='Brj.').opts(color="red", marker="circle_dot", size=4)

raster_layout = tiles * raster.opts(width=400, height = 400, cmap="viridis", title="Surface Magnitude")
lay = raster_layout * wireframe  * kb + tap_dmap_uv_mag.opts() + tap_dmap_uv_ang.opts()
lay

In [17]:
raster_layout = tiles * raster.opts(width=400, height = 400, cmap="viridis", title="Surface Currents")
lay = raster_layout * wireframe  * kb + tap_dmap_uv_mag.opts() + tap_dmap_uv_ang.opts()
lay

In [20]:
# Convert the interactive plot to a Bokeh figure
bokeh_figure = hv.render(lay, backend='bokeh')

# Specify the output file
bp.output_file("FrancisScottKeyBridge_Current.html")

# Save the Bokeh figure
bp.save(bokeh_figure)


You are generating standalone HTML/JS output, but trying to use real Python
callbacks (i.e. with on_change or on_event). This combination cannot work.

Only JavaScript callbacks may be used with standalone output. For more
information on JavaScript callbacks with Bokeh, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/interaction/js_callbacks.html

Alternatively, to use real Python callbacks, a Bokeh server application may
be used. For more information on building and running Bokeh applications, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/server.html



'/nhc/Atieh.Alipour/code/subsetting/Subsetting/Tests/FrancisScottKeyBridge_Current.html'