In [1]:
import boto3
from botocore import UNSIGNED
from botocore.config import Config

s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
paginator = s3.get_paginator('list_objects_v2')

for day in range(1,31):
    prefix = f"aqm.202411{day:02d}/aqm.t06z.ave_1hr_o3.227.grib2"
    response = s3.list_objects_v2(Bucket='noaa-aqm-pds', Prefix=prefix)
    if 'Contents' in response:
        print(f"2024-11-{day:02d} exists: {response['Contents'][0]['Size']/1e6:.1f}MB")


NoSuchBucket: An error occurred (NoSuchBucket) when calling the ListObjectsV2 operation: The specified bucket does not exist

In [None]:
import sys
import os
from datetime import datetime
import os
import requests
import xarray as xr
import cfgrib

current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)

print("Current Directory:", current_dir)
print("Parent Directory:", parent_dir)

# Add the forked Herbie to the path
herbie_fork_path = os.path.join(parent_dir, 'herbie_fork')
sys.path.insert(0, herbie_fork_path)

# Now you can import from your forked version
from herbie import Herbie


In [None]:
class AQMHerbie:
    """Class for downloading AQM ozone forecast data with a Herbie-like interface"""

    def __init__(self, date, model='aqm', cycle=6, avg_hours=1,
                 bias_corrected=False, fxx=0, resolution="227", save_dir=None):
        """
        Initialize AQM object for accessing Air Quality Model data

        Parameters:
        -----------
        date : str or datetime
            Date for the model run (e.g., '2024-04-08')
        model : str
            Model name (default is 'aqm')
        cycle : int
            Cycle hour (0, 6, 12, 18)
        avg_hours : int
            Averaging period in hours (1 or 8)
        bias_corrected : bool
            Whether to use bias-corrected data
        fxx : int
            Forecast hour (0-72 for 06 and 12 UTC cycles)
        resolution : str
            Resolution identifier (e.g., "227")
        save_dir : str
            Directory to save downloaded files
        """
        # Convert date to datetime if string
        if isinstance(date, str):
            self.date = datetime.strptime(date, "%Y-%m-%d") if len(date) == 10 else \
                datetime.strptime(date, "%Y-%m-%d %H:%M")
        else:
            self.date = date

        # Set attributes
        self.model = model
        self.cycle = cycle
        self.avg_hours = avg_hours
        self.bias_corrected = bias_corrected
        self.fxx = fxx
        self.resolution = resolution
        self.save_dir = save_dir or os.path.join(os.getcwd(), 'data', 'aqm')

        # Construct file details
        self._construct_file_details()

    def _construct_file_details(self):
        """Construct file name and paths based on parameters"""
        # Format strings
        date_str = self.date.strftime("%Y%m%d")
        cycle_str = f"{self.cycle:02d}"
        bc_str = "_bc" if self.bias_corrected else ""

        # File naming based on averaging period
        if self.avg_hours in [1, 8]:
            self.product = f"ave_{self.avg_hours}hr_o3"
            self.file_name = f"aqm.t{cycle_str}z.{self.product}{bc_str}.{self.resolution}.grib2"
        elif self.avg_hours == "max":
            self.product = "max_8hr_o3"
            self.file_name = f"aqm.t{cycle_str}z.{self.product}.{self.resolution}.grib2"
        else:
            raise ValueError("avg_hours must be 1, 8, or 'max'")

        # URLs for different sources
        self.nomads_url = f"https://nomads.ncep.noaa.gov/pub/data/nccf/com/aqm/prod/aqm.{date_str}/{self.file_name}"
        self.aws_url = f"https://noaa-aqm-pds.s3.amazonaws.com/aqm.{date_str}/{self.file_name}"

        # Local path structure similar to Herbie
        self.local_dir = os.path.join(self.save_dir, date_str)
        self.local_path = os.path.join(self.local_dir, self.file_name)

    def inventory(self):
        """Show inventory of the GRIB2 file (variables, levels, etc.)"""
        date_str = self.date.strftime("%Y%m%d")
        inventory_url = f"https://www.nco.ncep.noaa.gov/pmb/products/aqm/{self.file_name}.shtml"

        print(f"✅ Found inventory for {self.file_name}")
        print(f"Inventory URL: {inventory_url}")

        # In a complete implementation, we would:
        # 1. Fetch the inventory page
        # 2. Parse the HTML table
        # 3. Return structured data about variables

        return inventory_url

    def download(self, search_query=None):
        """
        Download the GRIB2 file or a subset

        Parameters:
        -----------
        search_query : str, optional
            Query to subset the data (e.g., "OZCON")

        Returns:
        --------
        str
            Path to the downloaded file
        """
        # Create directory if it doesn't exist
        os.makedirs(self.local_dir, exist_ok=True)

        # Check if file already exists
        if os.path.exists(self.local_path):
            print(f"✅ File already exists at {self.local_path}")
            return self.local_path

        # Try downloading from different sources
        sources = [
            ("AWS", self.aws_url),
            ("NOMADS", self.nomads_url)
        ]

        for source_name, url in sources:
            try:
                print(f"Attempting download from {source_name}: {url}")
                # In a real implementation, use requests, curl, or wget
                # This is a simplified version:
                response = requests.get(url)
                if response.status_code == 200:
                    with open(self.local_path, 'wb') as f:
                        f.write(response.content)
                    print(f"✅ Successfully downloaded to {self.local_path}")

                    # Handle subsetting if search_query is provided
                    if search_query:
                        # This would require implementing GRIB subsetting similar to Herbie
                        # For now, return the full file path
                        pass

                    return self.local_path
            except Exception as e:
                print(f"❌ Failed to download from {source_name}: {e}")

        raise FileNotFoundError(f"Could not download {self.file_name} from any source")

    def xarray(self, search_query=None):
        """
        Load the data into an xarray Dataset

        Parameters:
        -----------
        search_query : str, optional
            Query to subset the data (e.g., "OZCON")

        Returns:
        --------
        xarray.Dataset
        """
        # Download if necessary
        file_path = self.download(search_query)

        # Open with xarray+cfgrib
        try:
            ds = xr.open_dataset(file_path, engine='cfgrib')
            print(f"✅ Loaded {self.file_name} into xarray")

            # Apply filtering based on search_query if provided
            if search_query:
                # This would require implementing filtering similar to Herbie
                pass

            return ds
        except Exception as e:
            raise RuntimeError(f"Failed to open file with xarray: {e}")


In [None]:
# Add forked Herbie to path
# parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# herbie_fork_path = os.path.join(parent_dir, 'herbie_fork')
# sys.path.insert(0, herbie_fork_path)


# Create an AQM instance for 1-hour average ozone
aqm = AQMHerbie(
    date='2024-04-08',
    cycle=6,
    avg_hours=1,
    fxx=0
)

# Check the inventory
aqm.inventory()

# Download the data
file_path = aqm.download()

# Load into xarray
ds = aqm.xarray()

# Plot the data
import matplotlib.pyplot as plt
# Assuming the dataset has the variable 'OZCON'
ds['OZCON'].isel(time=0).plot()
plt.title('Ozone Concentration 1-hour average')
plt.show()


In [None]:
assert 1==0

In [None]:
"""
Example script demonstrating how to use the AQM model template with Herbie
"""

import sys
import os
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr

# Add your local Herbie fork to the Python path
# Replace this with the actual path to your Herbie fork
HERBIE_PATH = Path('~/PycharmProjects/herbie_fork').expanduser()
sys.path.insert(0, str(HERBIE_PATH))

# Import Herbie after adding it to the path
from herbie import Herbie

# Example 1: Recent AQM Data
# --------------------------
print("Example 1: Recent AQM Data")
# Use a very recent date to increase chances of finding data
date = pd.Timestamp.now() - pd.Timedelta(days=1)
print(f"Trying to access data for {date}")

# Force using 6Z or 12Z initialization time
hour = 6 if date.hour < 9 else 12
date = date.replace(hour=hour)

H = Herbie(
    date=date,
    model="aqm",
    product="ave_8hr_o3",
    fxx=1,
    grid="227",  # CONUS grid
)

# Check what's available for the model
print(f"Available AQM products: {H.PRODUCTS}")

if H.grib:
    print(f"\nFound GRIB file at source: {H.grib_source}")

    # Try downloading the file first
    print("\nDownloading GRIB file...")
    local_file = H.download()
    print(f"Downloaded to: {local_file}")

    # Now try to get inventory - this might work even without an index file
    try:
        print("\nGRIB file inventory:")
        inv = H.inventory()
        print(inv.head())
    except Exception as e:
        print(f"Couldn't get inventory: {str(e)}")
        print("This is likely because there's no index file. Continuing anyway...")

    # Try opening with xarray - should work even without idx
    try:
        print("\nOpening with xarray...")
        ds = H.xarray()
        print(ds)

        # If successful, try a simple plot
        if isinstance(ds, xr.Dataset):
            print("\nPlotting data...")
            # Get the first variable in the dataset
            var_name = list(ds.data_vars)[0]

            # Basic plot
            fig, ax = plt.subplots(figsize=(12, 8))

            # Plot the data with a colormap appropriate for air quality
            ds[var_name].isel(time=0, step=0).plot(
                ax=ax,
                cmap='YlOrRd',  # Yellow-Orange-Red colormap
                robust=True,    # Use robust color scaling
            )

            plt.title(f"AQM {H.product} - Init: {H.date:%Y-%m-%d %H:%M} F{H.fxx:02d}")
            plt.savefig("aqm_example_plot.png", dpi=150, bbox_inches='tight')
            print("Saved plot to aqm_example_plot.png")
    except Exception as e:
        print(f"Error opening with xarray: {str(e)}")
else:
    print("No GRIB file found - trying alternative dates/products")

    # Try alternative products
    for product in ["pm25", "max_1hr_o3"]:
        print(f"\nTrying product: {product}")
        H_alt = Herbie(
            date=date,
            model="aqm",
            product=product,
            fxx=1,
            grid="227",
        )
        if H_alt.grib:
            print(f"Found GRIB file with product={product} at source: {H_alt.grib_source}")
            break

# Example 2: Historical AQM Data
# -----------------------------
print("\n\nExample 2: Historical AQM Data")
# Try a date that might have data - summer months often have more ozone forecasts
historical_date = pd.Timestamp("2023-07-15 12:00")
print(f"Trying to access historical data for {historical_date}")

H_hist = Herbie(
    date=historical_date,
    model="aqm_archive",  # Use the archive-specific template
    product="pm25",
    fxx=3,
    grid="227",
)

# Try to locate files
if H_hist.grib:
    print(f"Found historical data at source: {H_hist.grib_source}")

    # Download and try to open file
    print("Downloading historical file...")
    hist_file = H_hist.download()
    print(f"Downloaded to: {hist_file}")

    # Try opening with xarray
    try:
        ds_hist = H_hist.xarray()
        print(ds_hist)
    except Exception as e:
        print(f"Error opening historical file: {str(e)}")
else:
    print("No historical data found.")

    # Try other dates systematically
    print("\nTrying additional historical dates:")
    for year in [2022, 2021, 2020]:
        for month in [7, 1]:  # Try summer and winter
            alt_date = pd.Timestamp(f"{year}-{month:02d}-15 12:00")
            print(f"Checking {alt_date}...")

            H_alt = Herbie(
                date=alt_date,
                model="aqm_archive",
                product="pm25",
                fxx=1,
                grid="227",
            )

            if H_alt.grib:
                print(f"Found historical data at {alt_date} from source: {H_alt.grib_source}")

                # Try to download
                try:
                    alt_file = H_alt.download()
                    print(f"Downloaded to: {alt_file}")
                    break
                except Exception as e:
                    print(f"Error downloading: {str(e)}")
            else:
                print(f"No data found for {alt_date}")

In [None]:
assert 1==0

In [None]:
import sys
import os

# Find the absolute path to the herbie package within herbie_fork
current_dir = os.path.dirname(os.path.abspath('__file__'))
fork_path = os.path.abspath(os.path.join(current_dir, '../../', 'herbie_fork'))

# Check if herbie package exists in the fork
herbie_pkg_path = os.path.join(fork_path, 'herbie')
if os.path.isdir(herbie_pkg_path):
    print(f"Found herbie package at: {herbie_pkg_path}")

    # Add the fork path to sys.path so Python can find the herbie package
    if fork_path not in sys.path:
        sys.path.insert(0, fork_path)
        print(f"Added {fork_path} to Python path")
else:
    print(f"ERROR: No 'herbie' package found in {fork_path}")
    print(f"Available contents:")
    for item in os.listdir(fork_path):
        print(f"  {item}")

# Now try importing
try:
    import herbie
    print(f"Successfully imported herbie from {herbie.__file__}")

    # Check if models module exists
    import herbie.models
    print(f"Successfully imported herbie.models from {herbie.models.__file__}")

    # List available models
    print("\nAvailable models:")
    for model in dir(herbie.models):
        if not model.startswith('__'):
            print(f"  {model}")

    # Check for your AQM file
    aqm_file = os.path.join(herbie_pkg_path, 'models', 'aqm.py')
    print(f"\nAQM file exists: {os.path.exists(aqm_file)}")

    # Try importing your AQM model
    try:
        from herbie.models import aqm
        print("Successfully imported the AQM module!")
    except ImportError as e:
        print(f"Failed to import AQM module: {e}")

except ImportError as e:
    print(f"Failed to import herbie: {e}")

In [None]:
from herbie import Herbie
import matplotlib.pyplot as plt

# Initialize Herbie for AQM model
date = "2025-04-08 06:00"
H = Herbie(date=date, model="aqm", product="ave_8hr_o3")

print(f"Model: {H.model.upper()}")
print(f"Product: {H.product} - {H.product_description}")
print(f"Date: {H.date}")
print(f"File exists: {H.grib is not None}")
if H.grib is not None:
    print(f"Source: {H.grib_source}")

    # Download the full file first
    print("\nDownloading the full file...")
    local_file = H.download()
    print(f"Downloaded to: {local_file}")

    # Create a new Herbie object with the local file
    H = Herbie(date=date, model="aqm", product="ave_8hr_o3", fxx=48)

    # Now try to access the inventory
    inventory = H.inventory()
    print("\nAvailable variables:")
    for var in inventory.variable.unique()[:10]:  # Show first 10 variables
        print(f"  {var}")

    # Now try to load specific variables
    print("\nReading data into xarray...")
    try:
        # Try with a common variable name for ozone
        ds = H.xarray(":OZMAX8:")
        var_name = "OZMAX8"
    except Exception as e:
        print(f"Error with specific variable: {e}")
        print("Trying to load all variables instead...")
        ds = H.xarray()
        var_name = list(ds.data_vars)[0]  # Get first variable

    # Display dataset information
    print("\nDataset info:")
    print(ds)

    # Plot the data
    plt.figure(figsize=(12, 8))
    ds[var_name].plot(cmap='viridis')
    plt.title(f"{var_name} - {H.date}")
    plt.xlabel("Longitude")
    plt.ylabel("Latitude")
    plt.colorbar(label=f"{var_name}")
    plt.grid(True)
    plt.show()

else:
    print("File not found. Try a different date or time.")

In [None]:
import sys
import os

# Find the absolute path to the herbie package within herbie_fork
current_dir = os.path.dirname(os.path.abspath('__file__'))
fork_path = os.path.abspath(os.path.join(current_dir, '../../', 'herbie_fork'))

# Check if herbie package exists in the fork
herbie_pkg_path = os.path.join(fork_path, 'herbie')
if os.path.isdir(herbie_pkg_path):
    print(f"Found herbie package at: {herbie_pkg_path}")

    # Add the fork path to sys.path so Python can find the herbie package
    if fork_path not in sys.path:
        sys.path.insert(0, fork_path)
        print(f"Added {fork_path} to Python path")
else:
    print(f"ERROR: No 'herbie' package found in {fork_path}")
    print(f"Available contents:")
    for item in os.listdir(fork_path):
        print(f"  {item}")

# Now try importing
try:
    import herbie
    print(f"Successfully imported herbie from {herbie.__file__}")

    # Check if models module exists
    import herbie.models
    print(f"Successfully imported herbie.models from {herbie.models.__file__}")

    # List available models
    print("\nAvailable models:")
    for model in dir(herbie.models):
        if not model.startswith('__'):
            print(f"  {model}")

    # Check for your AQM file
    aqm_file = os.path.join(herbie_pkg_path, 'models', 'aqm.py')
    print(f"\nAQM file exists: {os.path.exists(aqm_file)}")

    # Try importing your AQM model
    try:
        from herbie.models import aqm
        print("Successfully imported the AQM module!")
    except ImportError as e:
        print(f"Failed to import AQM module: {e}")

except ImportError as e:
    print(f"Failed to import herbie: {e}")

In [None]:
AQM_GRIDS = {
    'conus': {
        'template': 3.30,
        'proj': 'lcc',
        'params': {'lat_0': 25.0, 'lon_0': -95.0, 'lat_1': 25.0, 'lat_2': 25.0}
    }}

AQM_OZONE_PATTERNS = [
    r':O3Dmax8hr:',
    r':O3_8hr_Back_Average:',
    r':AQS_O3_8hr:'
]



In [None]:
class AQMHerbieTemplate:
    DESCRIPTION = "NOAA Air Quality Model (AQM) ozone forecasts"
    DETAILS = {
        "NOMADS": "https://nomads.ncep.noaa.gov/pub/data/nccr/com/aqm/prod/",
        "DOCS": "https://www.weather.gov/media/notification/scn22-25_aqm_aaqfs.pdf"
    }

    PRODUCTS = {
        "ozone": {
            "6hr": {
                "grib2_codes": ["O3_Dmax_8hr", "O3_8hr_Back_Average"],
                "levels": ["surface"],
                "sources": ["nomads"]
            }
        }
    }

    SOURCES = {
        "nomads": {
            "name": "NCEP NOMADS",
            "path": "https://nomads.ncep.noaa.gov/pub/data/nccr/com/aqm/prod/aqm.{date:%Y%m%d}/aqm.t{date:%H}z.average{product}.grib2",
            "chunks": {"time": 6}  # 6-hour averages
        }
    }

    GRIDS = {
        "conus": {
            "type": "lambert",
            "parameters": {
                "grid": [259, 375],
                "dx": 12000, "dy": 12000,
                "lat_0": 25, "lon_0": -95,
                "lat_1": 25, "lat_2": 25
            }
        }
    }

    IDX_STYLE = "wgrib2"  # AQM uses standard wgrib2 indices

In [None]:
import pandas as pd
import xarray as xr
from pyproj import CRS
from herbie import Herbie
import requests

class AQMHerbie(Herbie):
    """Extension of Herbie to handle NOAA Air Quality Model (AQM) data"""

    MODEL = "aqm"  # Static model name

    def __init__(self, date, product="ave_8hr_o3", grid_region="conus", fxx=1, **kwargs):
        """
        Initialize AQM Herbie

        Parameters
        ----------
        date : str or datetime
            Model initialization date
        product : str
            AQM product type:
            - "ave_8hr_o3" (default): 8-hour average ozone
            - "max_1hr_o3": 1-hour maximum ozone
            - "max_8hr_o3": 8-hour maximum ozone
            - "ave_1hr_o3": 1-hour average ozone
            - "ave_24hr_pm25": 24-hour average PM2.5
            - "max_1hr_pm25": 1-hour maximum PM2.5
            - "ave_1hr_pm25": 1-hour average PM2.5

            Add "_bc" suffix for bias-corrected versions (e.g., "ave_8hr_o3_bc")
        grid_region : str
            'conus' (default) or 'alaska'
        fxx : int
            Forecast hour (default: 1)
        **kwargs :
            Additional arguments for Herbie
        """
        # Define products dictionary before initialization
        self.PRODUCTS = {
            "ave_8hr_o3": "8-hour average ozone concentration",
            "max_8hr_o3": "8-hour maximum ozone concentration",
            "ave_1hr_o3": "1-hour average ozone concentration",
            "max_1hr_o3": "1-hour maximum ozone concentration",
            "ave_24hr_pm25": "24-hour average PM2.5 concentration",
            "ave_1hr_pm25": "1-hour average PM2.5 concentration",
            "max_1hr_pm25": "1-hour maximum PM2.5 concentration",
            # Bias-corrected versions
            "ave_8hr_o3_bc": "8-hour average ozone concentration (bias-corrected)",
            "max_8hr_o3_bc": "8-hour maximum ozone concentration (bias-corrected)",
            "ave_1hr_o3_bc": "1-hour average ozone concentration (bias-corrected)",
            "max_1hr_o3_bc": "1-hour maximum ozone concentration (bias-corrected)",
        }

        # Validate product
        if product not in self.PRODUCTS:
            raise ValueError(f"Product must be one of {list(self.PRODUCTS.keys())}")

        # Set grid ID based on region
        grid_id = "198" if grid_region.lower() == "alaska" else "227"
        self.grid_region = grid_region.lower()
        self.grid_id = grid_id

        # Custom projection handling
        self.proj = CRS.from_cf({
            "grid_mapping_name": "lambert_conformal_conic",
            "standard_parallel": 25.0,
            "longitude_of_central_meridian": -95.0,
            "latitude_of_projection_origin": 25.0
        })

        # Initialize Herbie with base model
        super().__init__(date=date, model=self.MODEL, product=product, fxx=fxx, **kwargs)

        # Build URLs for different sources
        date_fmt = self.date.strftime('%Y%m%d')
        hour_fmt = self.date.strftime('%H')

        # Try different directory structures and naming conventions
        self.SOURCES = {
            # Main NOMADS path - standard location
            "nomads": f"https://nomads.ncep.noaa.gov/pub/data/nccf/com/aqm/prod/aqm.{date_fmt}/aqm.t{hour_fmt}z.{self.product}.{self.grid_id}.grib2",

            # Alternative paths based on hour of day
            "nomads_alt": f"https://nomads.ncep.noaa.gov/pub/data/nccf/com/aqm/prod/aqm.{date_fmt}/{hour_fmt}/aqm.t{hour_fmt}z.{self.product}.{self.grid_id}.grib2",

            # FTPPRD path (similar structure to NOMADS)
            "ftpprd": f"https://ftpprd.ncep.noaa.gov/data/nccf/com/aqm/prod/aqm.{date_fmt}/aqm.t{hour_fmt}z.{self.product}.{self.grid_id}.grib2",

            # Alternative FTPPRD path
            "ftpprd_alt": f"https://ftpprd.ncep.noaa.gov/data/nccf/com/aqm/prod/aqm.{date_fmt}/{hour_fmt}/aqm.t{hour_fmt}z.{self.product}.{self.grid_id}.grib2",

            # NCO path format (specific hourly runs might be available)
            "nco": f"https://www.nco.ncep.noaa.gov/pmb/products/aqm/aqm.t{hour_fmt}z.{self.product}.{self.grid_id}.grib2",
        }

        # Override description for more detail
        self.DESCRIPTION = f"NOAA Air Quality Model (AQM) - {self.PRODUCTS[self.product]} - {self.grid_region.upper()}"

        # Re-check if file exists with updated sources
        self.grib, self.grib_source = self.find_grib()
        self.idx, self.idx_source = self.find_idx()

    def xarray(self, search=None, **kwargs):
        """
        Open AQM GRIB data as xarray DataSet with appropriate handling for AQM-specific fields.

        Parameters
        ----------
        search : str
            Variable search string (e.g., ":OZMAX8:")
        **kwargs :
            Additional arguments for xarray
        """
        # Set default backend kwargs for proper handling of AQM files
        backend_kwargs = kwargs.pop('backend_kwargs', {})

        # For ozone products, we might need special handling
        if 'o3' in self.product.lower():
            # Make sure we have proper filter_by_keys if not specified
            if 'filter_by_keys' not in backend_kwargs:
                backend_kwargs['filter_by_keys'] = {'typeOfLevel': 'surface'}

        # Call parent xarray method with our specialized parameters
        return super().xarray(search=search, backend_kwargs=backend_kwargs, **kwargs)

# Try a function that searches for specific hours
def find_available_aqm_data():
    """Try to find available AQM data by testing various combinations"""
    # Get today's date and a few days back
    today = pd.Timestamp.now().normalize()
    dates = [(today - pd.Timedelta(days=i)).strftime('%Y-%m-%d') for i in range(5)]

    # AQM typically runs 4 times per day
    hours = ["00", "06", "12", "18"]

    # Try different products
    products = ["ave_8hr_o3", "max_1hr_o3", "ave_24hr_pm25"]

    # Track successful combinations
    found_files = []

    print("Searching for available AQM data...")
    for date in dates:
        for hour in hours:
            for product in products:
                datetime_str = f"{date} {hour}:00"
                print(f"\nChecking {datetime_str} with product {product}...")

                H = AQMHerbie(datetime_str, product=product)

                if H.grib:
                    print(f"✅ SUCCESS! Found file at {H.grib_source}")
                    found_files.append((datetime_str, product, H.grib_source, H.grib))

                    # Try to look at inventory
                    try:
                        inventory = H.inventory()
                        print(f"Variables in file: {len(inventory.variable.unique())}")
                        print("First few variables:")
                        for var in list(inventory.variable.unique())[:5]:
                            print(f"  {var}")

                        # If we found a file with variables, we can stop
                        if len(inventory.variable.unique()) > 0:
                            return H
                    except Exception as e:
                        print(f"Error reading inventory: {e}")
                else:
                    print(f"❌ File not found")

    if found_files:
        print("\nFound files:")
        for datetime_str, product, source, url in found_files:
            print(f"  {datetime_str} - {product} from {source}")
            print(f"  URL: {url}")

        # Return a Herbie object for the first successful combination
        datetime_str, product, _, _ = found_files[0]
        return AQMHerbie(datetime_str, product=product)
    else:
        print("\nNo files found with any combination!")
        return None

# Run our finder function
H = find_available_aqm_data()

# If we found data, try to open it
if H:
    print(f"\nAttempting to open data for {H.date}, product {H.product}...")

    # Check inventory again to see variable names
    inventory = H.inventory()
    print("\nVariable names in file:")
    for var in inventory.variable.unique():
        print(f"  {var}")

    # Try to open with xarray using the appropriate search term
    try:
        # Choose a search term based on the inventory
        search_term = None
        if "OZMAX8" in str(inventory.variable.unique()):
            search_term = "OZMAX8"
        elif "PM25" in str(inventory.variable.unique()):
            search_term = "PM25"

        if search_term:
            print(f"\nAttempting to load variable: {search_term}")
            ds = H.xarray(f":{search_term}:")
        else:
            print("\nLoading all variables")
            ds = H.xarray()

        print("\nDataset successfully opened:")
        print(ds)

        # Plot data
        import matplotlib.pyplot as plt
        var_name = list(ds.data_vars)[0]

        plt.figure(figsize=(12, 8))
        ds[var_name].plot(cmap='viridis')
        plt.title(f"{var_name} - {H.date}")
        plt.xlabel("Longitude")
        plt.ylabel("Latitude")
        plt.grid(True)
        plt.show()
    except Exception as e:
        print(f"Error opening dataset: {e}")

In [None]:
import requests
from bs4 import BeautifulSoup
import re

# Let's check one of the SHTML pages to see where the actual data is
url = "https://www.nco.ncep.noaa.gov/pmb/products/aqm/aqm.t06z.ave_8hr_o3.227.grib2.shtml"
response = requests.get(url)
print(f"Status code: {response.status_code}")

# Parse the page to find information about accessing the data
soup = BeautifulSoup(response.text, 'html.parser')
text = soup.get_text()

# Look for patterns that might indicate the data location
print("\nSearching for information about data access...")
access_info = []
for line in text.split('\n'):
    if any(keyword in line.lower() for keyword in ['ftp', 'http', 'nomads', 'com/aqm', 'data']):
        access_info.append(line.strip())

for info in access_info:
    if info:  # Filter out empty lines
        print(f"  {info}")

# Try to extract more information from the page
tables = soup.find_all('table')
if tables:
    print("\nFound tables with potential information:")
    for i, table in enumerate(tables):
        rows = table.find_all('tr')
        if len(rows) > 0:
            headers = [th.text.strip() for th in rows[0].find_all(['th', 'td'])]
            print(f"Table {i+1} headers: {headers}")

In [None]:
# Create AQM Herbie instance for ozone
H = AQMHerbie("2024-04-06 12:00", product="ave_8hr_o3", fxx=6)

# Check if file exists
print(f"Model: {H.model.upper()}")
print(f"Product: {H.product} - {H.product_description}")
print(f"Date: {H.date}")
print(f"File exists: {H.grib is not None}")

if H.grib is not None:
    print(f"Source: {H.grib_source}")

    # Look at inventory to find the right variable name
    inventory = H.inventory()
    print("\nAvailable variables:")
    print(inventory.variable.unique())

    # For ozone data, you might need to find the specific variable name
    # from the inventory, but try a common one first:
    try:
        ds = H.xarray(
            ":O3_8hr_Back_Average:",
            backend_kwargs={'filter_by_keys': {'typeOfLevel': 'surface'}}
        )

        print("\nDataset info:")
        print(ds)

        # Plot data
        import matplotlib.pyplot as plt
        var_name = list(ds.data_vars)[0]  # Get first variable

        plt.figure(figsize=(12, 8))
        ds[var_name].plot(cmap='viridis')
        plt.title(f"{var_name} - {H.date}")
        plt.xlabel("Longitude")
        plt.ylabel("Latitude")
        plt.grid(True)
        plt.show()

    except Exception as e:
        print(f"Error opening dataset: {e}")
        print("Try using a different search term based on the inventory output above.")
else:
    print("File not found. URLs to check:")
    for source, url in H.SOURCES.items():
        print(f"  {source}: {url}")

    # Check if files exist
    import requests
    for source, url in H.SOURCES.items():
        try:
            response = requests.head(url, timeout=5)
            print(f"  {source}: Status {response.status_code}")
        except requests.exceptions.RequestException:
            print(f"  {source}: Connection error")

    # Try with a different date
    print("\nTrying with a different date...")
    H_alt = AQMHerbie("2024-04-01 12:00", product="ave_8hr_o3", fxx=6)
    print(f"File exists for alternate date: {H_alt.grib is not None}")

In [None]:
H = AQMHerbie("2025-04-06 12:00", fxx=6)
ds = H.xarray(":O3_8hr_Back_Average:",
              engine='cfgrib',
              backend_kwargs={'filter_by_keys': {'typeOfLevel': 'surface'}})


In [None]:
H = AQMHerbie(
    '2025-04-06 12:00',
    grid_region='conus',
    product='ozone',
    fxx=6
)
print(H.inventory())



In [None]:
ds = H.xarray(":O3Dmax8hr:", engine='cfgrib')
print(ds['O3Dmax8hr'].attrs)


In [None]:
xyz = H.download(":O3Dmax8hr: & typeOfLevel=surface")
xyz

In [None]:
# from herbie import config
# import os
# config.set_aqm_credentials(username=os.environ['NCEP_USER'],
#                            password=os.environ['NCEP_PWD'])
