In [1]:
import xarray as xr

from pyproj import CRS, Transformer
import pyproj
import numpy as np
import xesmf as xe
import matplotlib.pyplot as plt
import fsspec 
from dask.distributed import Client, progress
import re

Global Parameters

In [2]:
CMP_MAX = 990.0
BUCKET_NAME = 'noaa-nws-hafs-pds'
DROP_VARIABLES = ['q', 'w', 'wz', 'absv', 'clwmr', 'icmr', 'rwmr', 'snmr','grle', 'rare']


In [3]:
try:
    from dask.distributed import get_client
    get_client().close()
except Exception:
    pass

client = Client()  # set up local cluster on your laptop
client

fs = fsspec.filesystem('s3', anon=True)

Project Functions

In [4]:
def frame_processing(s3_path: str):
    '''
    Docstring for frame_processing
    
    :param ds: A HAFSA model timestep
    :type ds: xr.Dataset

    Checks for frame validity:
        No land within 200km of center
        Central minimum pressure of at least 990mb
    
    If valid returns a Model Dataset consisting of:
        PBL temp, heights, rh, and radial/azimuthal winds
        700 mb temp, heights, rh, and radial/azimuthal winds
        central minimum pressure for the frame,
        maximum wind for the frame
    '''
    header_path = 'simplecache::s3://noaa-nws-hafs-pds/'

    frame_path = header_path + s3_path
    nxt_frame_path = header_path = re.sub('.024', '.027', s3_path)

    frame_file = fsspec.open_local(frame_path, s3={'anon': True}, filecache={'cache_storage':'/tmp/files'})
    nxt_frame_file = fsspec.open_local(nxt_frame_path, s3={'anon': True}, filecache={'cache_storage':'/tmp/files'})

    ds_atm = xr.open_dataset(frame_file,
                         drop_variables = DROP_VARIABLES, 
                         filter_by_keys={'typeOfLevel': 'isobaricInhPa'},
                         engine = 'cfgrib'
                        )   
    ds_sfc = xr.open_dataset(frame_file,  
                         filter_by_keys={'typeOfLevel': 'meanSea'})
    
    ds_sfc_nxt_step = xr.open_dataset(nxt_frame_file, 
                         filter_by_keys={'typeOfLevel': 'meanSea'})
    
    c_mslp = ds_sfc['prmsl'].min()
    center_coords = ds_sfc['prmsl'].where(ds_sfc['prmsl'] == c_mslp, drop=True).squeeze()

    c_mslp_nxt_step = ds_sfc_nxt_step['prmsl'].min()
    center_coords_nxt_step = ds_sfc_nxt_step['prmsl'].where(ds_sfc_nxt_step['prmsl'] == c_mslp_nxt_step, drop=True).squeeze()

    if c_mslp > CMP_MAX:
        return None
    
    
def list_s3_files(BUCKET_NAME, prefix=''):

    """
    Lists files in an AWS S3 bucket, optionally filtered by a prefix.

    Args:
        bucket_name (str): The name of the S3 bucket.
        prefix (str, optional): An optional prefix to filter files (e.g., 'folder/').
    """
    s3_client = boto3.client('s3', config=Config(signature_version=UNSIGNED))
    
    bucket_name = BUCKET_NAME
    # Handle pagination for more than 1000 objects
    paginator = s3_client.get_paginator('list_objects_v2')
    pages = paginator.paginate(Bucket=bucket_name, Prefix=prefix)
    file_list = []
    for page in pages:
        if 'Contents' in page:
            for obj in page['Contents']:
                file_list.append(obj)
        else:
            print(f"No files found in '{bucket_name}' with prefix '{prefix}'.")
    return file_list

def get_links():

    link_folder = 'Data/links'
    loaded_list = []
    
    if does_file_exist_in_dir(link_folder):
        
        with open('Data/links/link_list.txt', 'r') as f:
            for line in f:
                loaded_list.append(line.strip()) # .strip() removes newline characters
        return loaded_list
    else:
        file_list = list_s3_files(BUCKET_NAME=BUCKET_NAME)

        with open('Data/links/link_list.txt', 'w') as f:
            for item in file_list:
                 if (('f024' in item['Key'] in item['Key']) 
                      and 'idx' not in item['Key']
                      and 'storm.atm' in item['Key']):
                        loaded_list.append(item['Key'])
                        f.write(item['Key'] + '\n')
        return loaded_list
    
def does_file_exist_in_dir(path):
        return any(isfile(join(path, i)) for i in listdir(path))

# def near_land(sfc_frame, land_mask):
def calc_heading(lat1, lon1, lat2, lon2):

    lat1_rad = np.deg2rad(lat1)
    lat2_rad = np.deg2rad(lat2)
    lon1_rad = np.deg2rad(lon1)
    lon2_rad = np.deg2rad(lon2)

    d_lon = lon2_rad - lon1_rad

    y = np.sin(d_lon) * np.cos(lat2_rad)
    x = (np.cos(lat1_rad) * np.sin(lat2_rad) -
         np.sin(lat1_rad) * np.cos(lat2_rad) * np.cos(d_lon)) 
    
    initial_bearing_rad = np.arctan2(y, x)

    initial_bearing_deg = np.rad2deg(initial_bearing_rad)

    bearing = (initial_bearing_deg + 360) % 360

    return bearing 

def rotate_point_around_origin(x, y, angle):
    """
    Rotates a 2D point (x, y) around the origin (0, 0) by a given angle.

    Args:
        x (float): The x-coordinate of the point.
        y (float): The y-coordinate of the point.
        angle_radians (float): The angle of rotation in radians (counter-clockwise).

    Returns:
        tuple: A tuple (new_x, new_y) representing the rotated point.
    """
    angle = np.deg2rad(angle)
    new_x = x * math.cos(angle) - y * math.sin(angle)
    new_y = x * math.sin(angle) + y * math.cos(angle)
    return new_x, new_y

def to_polar(
        ds: xr.Dataset,
        origin_lat: float,
        origin_lon: float,
        ref_lat: float,
        ref_lon: float,
        rho_steps: int,
        theta_steps: int
    ) -> xr.Dataset:
    
        R = 6378137 # radius of Earth in meters
        R_max = 250.0 # Max range in km
        R_step = 0.5  # Range step in km
        A_step = 0.5  # Azimuth step in degrees

        new_range = np.arange(0.0, R_max + R_step, R_step)
        new_azimuth = np.arange(0.0, 360.0, A_step)
        R, T = np.meshgrid(new_azimuth, new_range, indexing = 'ij')

        heading_shift = calc_heading(origin_lat, origin_lon, ref_lat, ref_lon)
        print(heading_shift)
     
        X = R * np.cos(T) * 1000 # Convert km to meters
        Y = R * np.sin(T) * 1000
        local_proj_str = f"+proj=aeqd +lat_0={origin_lat} +lon_0={origin_lon} +units=m"
        transformer = pyproj.Transformer.from_crs(local_proj_str, "EPSG:4326", always_xy=True)

        X,Y = rotate_point_around_origin(X, Y, heading_shift)
        target_lons, target_lats = transformer.transform(X, Y)
        
        # print("Target Lats: " + str(np.shape(target_lats)))
        # print("Target Lons: " + str(np.shape(target_lons)))
        # print("range: " + str(np.shape(new_range)))
        # print("azimuth: " + str(np.shape(new_azimuth)))

        # print("New Range: " + str(new_range.max()))
        # print("New Angle: " + str(new_azimuth.max()))
        ds_out = xr.Dataset(
                            coords={
                                    "latitude": (("angle", "radius"), target_lats),
                                    "longitude": (("angle", "radius"), target_lons),
                                    "radius": new_range,
                                    "angle": new_azimuth
                                        }
                            )
        regridder = xe.Regridder(ds, ds_out, 'bilinear')
        polar_out = regridder(ds)
       
       
        
        
        return polar_out


         


In [5]:
frame_links = get_links()

In [7]:
frame_links[:10]

['hfsa/20230620/00/03l.2023062000.hfsa.storm.atm.f024.grb2',
 'hfsa/20230620/00/93l.2023062000.hfsa.storm.atm.f024.grb2',
 'hfsa/20230620/06/03l.2023062006.hfsa.storm.atm.f024.grb2',
 'hfsa/20230620/06/93l.2023062006.hfsa.storm.atm.f024.grb2',
 'hfsa/20230620/12/03l.2023062012.hfsa.storm.atm.f024.grb2',
 'hfsa/20230620/12/93l.2023062012.hfsa.storm.atm.f024.grb2',
 'hfsa/20230620/18/03l.2023062018.hfsa.storm.atm.f024.grb2',
 'hfsa/20230620/18/93l.2023062018.hfsa.storm.atm.f024.grb2',
 'hfsa/20230621/00/03l.2023062100.hfsa.storm.atm.f024.grb2',
 'hfsa/20230621/00/93l.2023062100.hfsa.storm.atm.f024.grb2']

In [None]:

uri = "simplecache::s3://noaa-nws-hafs-pds/hfsa/20230707/12/93e.2023070712.hfsa.storm.atm.f027.grb2"

file = fsspec.open_local(uri, s3={'anon': True}, filecache={'cache_storage':'/tmp/files'})

ds_atm = xr.open_dataset(file,
                         drop_variables = DROP_VARIABLES, 
                         filter_by_keys={'typeOfLevel': 'isobaricInhPa'},
                         engine = 'cfgrib'
                        )   

In [59]:
re.sub(".024", ".027", uri)

'simplecache::s3://noaa-nws-hafs-pds/hfsa/20230707/12/93e.2023070712.hfsa.storm.atm.f027.grb2'