# Test the speed of reading GFS data with 0.25 and 1.00 resolution degree into `Xarray dataset` from four sources: `NOMADS server` , `Azure Blob Storage` , `AWS S3 Bucket`, and `Google Cloud Storage` in local environment.

In [None]:
import xarray as xr
import urllib.request

### 0.25 resolution degree = 515 MB / file 
- NOMADS: 45.4s
- Azure Blob Storage: 57.4s
- AWS S3 Bucket: 43.1s
- Google Cloud Storage: 45.3s

In [None]:
URL = "https://nomads.ncep.noaa.gov/pub/data/nccf/com/gfs/prod/gfs.20240801/12/atmos/gfs.t12z.pgrb2.0p25.f108"
filename, _ = urllib.request.urlretrieve(URL)

ds = xr.open_dataset(
        filename,
        engine="cfgrib",
        filter_by_keys={'typeOfLevel': 'pressureFromGroundLayer'},
        backend_kwargs={"errors": "ignore"}
)

ds

In [None]:
URL = "https://noaagfs.blob.core.windows.net/gfs/gfs.20240801/12/atmos/gfs.t12z.pgrb2.0p25.f108"

filename, _ = urllib.request.urlretrieve(URL)
ds = xr.open_dataset(
        filename,
        engine="cfgrib",
        filter_by_keys={'typeOfLevel': 'pressureFromGroundLayer'},
        backend_kwargs={"errors": "ignore"}
)

ds

In [None]:
URL = "https://noaa-gfs-bdp-pds.s3.amazonaws.com/gfs.20240801/12/atmos/gfs.t12z.pgrb2.0p25.f108"

filename, _ = urllib.request.urlretrieve(URL)
ds = xr.open_dataset(
        filename,
        engine="cfgrib",
        filter_by_keys={'typeOfLevel': 'pressureFromGroundLayer'},
        backend_kwargs={"errors": "ignore"}
)

ds

In [None]:
URL = "https://storage.googleapis.com/global-forecast-system/gfs.20240801/12/atmos/gfs.t12z.pgrb2.0p25.f108"

filename, _ = urllib.request.urlretrieve(URL)
ds = xr.open_dataset(
        filename,
        engine="cfgrib",
        filter_by_keys={'typeOfLevel': 'pressureFromGroundLayer'},
        backend_kwargs={"errors": "ignore"}
)

ds

### 1.00 resolution degree = 42.5 MB / file 
- NOMADS: 6.2s
- Azure Blob Storage: 5.2s
- AWS S3 Bucket: 4.3s
- Google Cloud Storage: 4.4s

In [None]:
URL = "https://nomads.ncep.noaa.gov/pub/data/nccf/com/gfs/prod/gfs.20240729/12/atmos/gfs.t12z.pgrb2.1p00.f108"

filename, _ = urllib.request.urlretrieve(URL)
ds = xr.open_dataset(
        filename,
        engine="cfgrib",
        filter_by_keys={'typeOfLevel': 'pressureFromGroundLayer'},
        backend_kwargs={"errors": "ignore"}
)

ds

In [None]:
GFS_URL = 'https://noaagfs.blob.core.windows.net/gfs/gfs.20240729/12/atmos/gfs.t12z.pgrb2.1p00.f108'

filename, _ = urllib.request.urlretrieve(GFS_URL)
ds = xr.open_dataset(
        filename,
        engine="cfgrib",
        filter_by_keys={'typeOfLevel': 'pressureFromGroundLayer'},
        backend_kwargs={"errors": "ignore"}
)

ds

In [None]:
URL = "https://noaa-gfs-bdp-pds.s3.amazonaws.com/gfs.20240729/12/atmos/gfs.t12z.pgrb2.1p00.f108"

filename, _ = urllib.request.urlretrieve(URL)
ds = xr.open_dataset(
        filename,
        engine="cfgrib",
        filter_by_keys={'typeOfLevel': 'pressureFromGroundLayer'},
        backend_kwargs={"errors": "ignore"}
)

ds

In [None]:
URL = "https://storage.googleapis.com/global-forecast-system/gfs.20240729/12/atmos/gfs.t12z.pgrb2.1p00.f108"

filename, _ = urllib.request.urlretrieve(URL)
ds = xr.open_dataset(
        filename,
        engine="cfgrib",
        filter_by_keys={'typeOfLevel': 'pressureFromGroundLayer'},
        backend_kwargs={"errors": "ignore"}
)

ds

### Concatenate 37 GFS file with 0.25 resolution degree (step: 0 - 36)
- NOMADS: 31m 2s
- Azure blob: 31m 46s

In [None]:
import os
import warnings
warnings.filterwarnings('ignore')

def file_path_azure(cycle_runtime: int, forecast_hour: int, year: int, month: int, day: int, resolution_degree: float) -> str:
    prefix_path = "https://noaagfs.blob.core.windows.net/"
    product_name = "gfs"

    resolution_split = str(resolution_degree).split(".")

    file_path = (
        f"{product_name}/{product_name}.{year}{month:>02}{day:>02}/"
        f"{cycle_runtime:>02}/atmos/{product_name}.t{cycle_runtime:>02}z."
        f"pgrb2.{resolution_split[0]}p{resolution_split[1]:<02}.f{forecast_hour:>03}"
    )

    whole_path = os.path.join(prefix_path, file_path)

    return whole_path

In [None]:
def file_path_nomads(cycle_runtime: int, forecast_hour: int, year: int, month: int, day: int, resolution_degree: float) -> str:
    prefix_path = "https://nomads.ncep.noaa.gov/pub/data/nccf/com/"
    product_name = "gfs"
    version = "prod"

    resolution_split = str(resolution_degree).split(".")

    file_path = (
        f"{product_name}/{version}/{product_name}.{year}{month:>02}{day:>02}/"
        f"{cycle_runtime:>02}/atmos/{product_name}.t{cycle_runtime:>02}z."
        f"pgrb2.{resolution_split[0]}p{resolution_split[1]:<02}.f{forecast_hour:>03}"
    )

    whole_path = os.path.join(prefix_path, file_path)

    return whole_path

In [None]:
ds_list = []
for i in range(0, 36 + 1):
    URL = file_path_azure(cycle_runtime=12, forecast_hour=i, year=2024, month=7, day=30, resolution_degree=.25)
    filename, _ = urllib.request.urlretrieve(URL)
    ds = xr.open_dataset(
         filename,
         engine="cfgrib",
         filter_by_keys={'typeOfLevel': 'pressureFromGroundLayer'},
         backend_kwargs={"errors": "ignore"}
    )
    ds_list.append(ds)

ds_merged = xr.concat(ds_list,  dim='step')

ds_merged

In [None]:
ds_list = []
for i in range(0, 36 + 1):
    URL = file_path_nomads(cycle_runtime=12, forecast_hour=i, year=2024, month=7, day=30, resolution_degree=.25)
    filename, _ = urllib.request.urlretrieve(URL)
    ds = xr.open_dataset(
         filename,
         engine="cfgrib",
         filter_by_keys={'typeOfLevel': 'pressureFromGroundLayer'},
         backend_kwargs={"errors": "ignore"}
    )
    ds_list.append(ds)

ds_merged = xr.concat(ds_list,  dim='step')

ds_merged