# Accessing Global Forecast System (GFS) data and simple visualization

### References:
- https://www.nco.ncep.noaa.gov/pmb/products/gfs/#GFS

#### Define a function `file_path()` to fetch the urls in public Azure container

### Run the following cell to install the dependencies

In [None]:
# %pip install --upgrade pip
# %pip install --upgrade xarray[complete]
# %pip install eccodes
# %pip install cfgrib
# %pip install ecmwflibs
# %pip install numpy==1.23.0
%pip install numpy --upgrade

In [None]:
import os

def file_path_JL(cycle_runtime: str, forecast_hour: str, year: int, month: str, day: str, resolution_degree: float) -> str:
    prefix_path = "https://noaagfs.blob.core.windows.net/"
    product_name = "gfs"

    if len(cycle_runtime) == 1:
        cycle_runtime = cycle_runtime.rjust(2, "0")

    if len(forecast_hour) != 3:
        forecast_hour = forecast_hour.rjust(3, "0")

    if len(month) == 1:
        month = month.rjust(2, "0")

    if len(day) == 1:
        day = day.rjust(2, "0")

    if len(str(resolution_degree).split(".")[1]) == 1:
        split_resolution_degree = []
        split_resolution_degree.append(str(resolution_degree).split(".")[0])
        split_resolution_degree.append(str(resolution_degree).split(".")[1].ljust(2, "0"))
    else:
        split_resolution_degree = str(resolution_degree).split(".")

    file_path = (
        f"{product_name}/{product_name}.{year}{month}{day}/"
        f"{cycle_runtime}/atmos/{product_name}.t{cycle_runtime}z."
        f"pgrb2.{split_resolution_degree[0]}p{split_resolution_degree[1]}.f{forecast_hour}"
    )

    whole_path = os.path.join(prefix_path, file_path)

    return whole_path

In [None]:
import os


# KJW: make the parameters have consistent types.  e.g. month should be an int, not a string
def file_path(cycle_runtime: int, forecast_hour: int, year: int, month: int, day: int, resolution_degree: float) -> str:
    prefix_path = "https://noaagfs.blob.core.windows.net/"
    product_name = "gfs"

    resolution_split = str(resolution_degree).split(".")

    file_path = (
        f"{product_name}/{product_name}.{year}{month:>02}{day:>02}/"
        f"{cycle_runtime:>02}/atmos/{product_name}.t{cycle_runtime:>02}z."
        f"pgrb2.{resolution_split[0]}p{resolution_split[1]:<02}.f{forecast_hour:>03}"
    )

    whole_path = os.path.join(prefix_path, file_path)

    return whole_path

#### Define a function `read_into_xarray_dataset()` to read given url into xarray dataset

References for different keywords:

        filter_by_keys={'typeOfLevel': 'meanSea'}
        filter_by_keys={'typeOfLevel': 'hybrid'}
        filter_by_keys={'typeOfLevel': 'atmosphere', 'steptype': 'instant'}
        filter_by_keys={'typeOfLevel': 'atmosphere', 'steptype': 'avg'}
        filter_by_keys={'typeOfLevel': 'surface', 'steptype': 'instant'}
        filter_by_keys={'typeOfLevel': 'surface', 'steptype': 'avg'}
        filter_by_keys={'typeOfLevel': 'surface', 'steptype': 'accum'}
        filter_by_keys={'typeOfLevel': 'planetaryBoundaryLayer'}
        filter_by_keys={'typeOfLevel': 'isobaricInPa'}
        filter_by_keys={'typeOfLevel': 'isobaricInhPa'}
        filter_by_keys={'typeOfLevel': 'heightAboveGround'}
        filter_by_keys={'typeOfLevel': 'depthBelowLandLayer'}
        filter_by_keys={'typeOfLevel': 'heightAboveSea'}
        filter_by_keys={'typeOfLevel': 'atmosphereSingleLayer'}
        filter_by_keys={'typeOfLevel': 'lowCloudLayer', 'steptype': 'instant'}
        filter_by_keys={'typeOfLevel': 'lowCloudLayer', 'steptype': 'avg'}
        filter_by_keys={'typeOfLevel': 'middleCloudLayer', 'steptype': 'instant'}
        filter_by_keys={'typeOfLevel': 'middleCloudLayer', 'steptype': 'avg'}
        filter_by_keys={'typeOfLevel': 'highCloudLayer', 'steptype': 'instant'}
        filter_by_keys={'typeOfLevel': 'highCloudLayer', 'steptype': 'avg'}
        filter_by_keys={'typeOfLevel': 'cloudCeiling'}
        filter_by_keys={'typeOfLevel': 'heightAboveGroundLayer'}
        filter_by_keys={'typeOfLevel': 'tropopause'}
        filter_by_keys={'typeOfLevel': 'maxWind'}
        filter_by_keys={'typeOfLevel': 'isothermZero'}
        filter_by_keys={'typeOfLevel': 'highestTroposphericFreezing'}
        filter_by_keys={'typeOfLevel': 'pressureFromGroundLayer'}
        filter_by_keys={'typeOfLevel': 'sigmaLayer'}
        filter_by_keys={'typeOfLevel': 'sigma'}
        filter_by_keys={'typeOfLevel': 'potentialVorticity'}
 

In [None]:
import xarray as xr
import urllib.request
from urllib.error import HTTPError
from typing import Optional


def read_into_xarray_dataset(URL: str, level: str, step: Optional[str] = None):
    try:
        filename, _ = urllib.request.urlretrieve(URL)

        step_key = ["atmosphere", "surface", "lowCloudLayer", "middleCloudLayer", "highCloudLayer"]

        if level in step_key:
            ds = xr.open_dataset(
                filename,
                engine="cfgrib",
                filter_by_keys={"typeOfLevel": level, "stepType": step},
                backend_kwargs={"errors": "ignore"},
            )
        else:
            ds = xr.open_dataset(
                filename, 
                engine="cfgrib", 
                filter_by_keys={"typeOfLevel": level}, 
                backend_kwargs={"errors": "ignore"}
            )

        return ds
    except HTTPError as err:
        if err.code == 404:
            print(f"{URL} does not exist. Please check the parameters again.")

In [None]:
URL = file_path(cycle_runtime=12, forecast_hour=102, year=2024, month=6, day=10, resolution_degree=0.5)
ds = read_into_xarray_dataset(URL, 'surface', 'accum')
ds

#### Simple visualization of gfs data

In [None]:
import matplotlib.pyplot as plt
from alive_progress import alive_bar

# forecast_hours = [str(x) for x in range(12, 72 + 1, 12)]
forecast_hours = [x for x in range(6, 72 + 1, 6)]
fig = plt.subplots(figsize = (15, 12))

rows = len(forecast_hours) // 2 + len(forecast_hours) % 2
print(rows)
cols = 2

with alive_bar(len(forecast_hours), force_tty=True, title='Running', length=20, bar = 'smooth') as bar:
    
    for n, forecast_hour in enumerate(forecast_hours):
        
        URL = file_path(12, forecast_hour, 2024, 6, 12, 1.)

        ds = read_into_xarray_dataset(URL, 'pressureFromGroundLayer')

        ax = plt.subplot(rows, cols, n + 1)
        
        ds['t'].plot(ax = ax)

        plt.title(f"forecast {ds.step.values.astype('timedelta64[h]')} from {ds.time.values.astype('datetime64[s]')}")  

        bar()

plt.suptitle('Three days forecast of Temperature with 1.00 degree resolution', size = 18)
plt.tight_layout()
plt.subplots_adjust(top=0.92)

plt.show()


In [None]:
import cartopy.crs as ccrs

fig, ax = plt.subplots(figsize=(16, 8), subplot_kw={'projection': ccrs.PlateCarree(central_longitude=180)})
ds['r'].plot(ax = ax)
plt.title(f"forecast {ds.step.values.astype('timedelta64[h]')} from {ds.time.values.astype('datetime64[s]')}")
plt.show()