In [None]:
import xarray as xr
import os
import boto3
from botocore import UNSIGNED
from botocore.client import Config

Global Parameters

In [None]:
CMP_MAX = 990.0
BUCKET_NAME = 'noaa-nws-hafs-pds'

Project Functions

In [None]:
def frame_processing(ds: xr.Dataset):
    '''
    Docstring for frame_processing
    
    :param ds: A HAFSA model timestep
    :type ds: xr.Dataset

    Checks for frame validity:
        No land within 200km of center
        Central minimum pressure of at least 990mb
    
    If valid returns a Model Dataset consisting of:
        PBL temp, heights, rh, and radial/azimuthal winds
        700 mb temp, heights, rh, and radial/azimuthal winds
        central minimum pressure for the frame,
        maximum wind for the frame
    '''
    drop_variables = ['q', 'w', 'wz', 'absv', 'clwmr', 'icmr', 'rwmr', 'snmr','grle', 'rare']
    
    ds_atm = xr.open_dataset('Data\92l.2023082400.hfsa.storm.atm.f024.grb2',
                         drop_variables = drop_variables, 
                         filter_by_keys={'typeOfLevel': 'isobaricInhPa'})

    ds_sfc = xr.open_dataset('Data\92l.2023082400.hfsa.storm.atm.f024.grb2',
                         drop_variables = drop_variables,  
                         filter_by_keys={'typeOfLevel': 'meanSea'})

    # Determines the central minimum pressure for the frame
    cmp = ds_sfc['prmsl'].min().values

    if cmp > CMP_MAX:
        return None

def list_s3_files(BUCKET_NAME, prefix=''):

    """
    Lists files in an AWS S3 bucket, optionally filtered by a prefix.

    Args:
        bucket_name (str): The name of the S3 bucket.
        prefix (str, optional): An optional prefix to filter files (e.g., 'folder/').
    """
    s3_client = boto3.client('s3', config=Config(signature_version=UNSIGNED))
    
    bucket_name = BUCKET_NAME
    # Handle pagination for more than 1000 objects
    paginator = s3_client.get_paginator('list_objects_v2')
    pages = paginator.paginate(Bucket=bucket_name, Prefix=prefix)
    file_list = []
    for page in pages:
        if 'Contents' in page:
            for obj in page['Contents']:
                file_list.append(obj)
        else:
            print(f"No files found in '{bucket_name}' with prefix '{prefix}'.")
    return file_list

def get_links():

    link_folder = 'Data/links'
    loaded_list = []
    if os.listdir(link_folder):
        
        with open('Data/links/link_list.txt', 'r') as f:
            for line in f:
                loaded_list.append(line.strip()) # .strip() removes newline characters
        return loaded_list
    else:
        file_list = list_s3_files

        with open('Data/links/link_list.txt', 'w') as f:
            for item in list:
                 if ('f024' in item['Key'] or 'f027' in item['Key']) and 'idx' not in item['Key']:
                    loaded_list.append(item['Key'])
                    f.write(item['Key'] + '\n')

In [None]:
frame_links = get_links()

In [None]:
frame_links