# Tutorial: Downloading, Visualizing, and Interpreting GFS Data

The GFS (Global Forecast System) is a global weather model developed by NOAA (the National Oceanic and Atmospheric Administration) in the United States.
It provides forecasts for the entire world, with data updated every 6 hours. It is a free and widely used source for weather analysis and forecasting.

# Step 1: Import the Required Python Libraries

In [None]:
import datetime
import xarray as xr
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import xarray as xr
from matplotlib.colors import BoundaryNorm, ListedColormap
from matplotlib.colorbar import ColorbarBase
from ipyleaflet import Map, DrawControl
import ipywidgets as widgets
from IPython.display import display
from ipyleaflet import Map, DrawControl, Rectangle
import ipywidgets as widgets
from IPython.display import display
from tqdm import tqdm
import pandas as pd
import numpy as np
from scipy.interpolate import interp1d
from matplotlib.colorbar import ColorbarBase

print("All libraries have been successfully imported!")

# Step 2: Select an area of interest using an interactive map 
Using the function below, we will create an interactive map.
On this map, you can select the region for which you want to download and analyze GFS forecasts.

In [None]:
def interactive_bbox_ipy(center=(5,5), zoom=5):
    bbox_output = widgets.Output()
    m = Map(center=center, zoom=zoom, layout=widgets.Layout(width='70%', height='500px'))

    # Tool to draw a rectangle on the map
    draw = DrawControl(
        rectangle={"shapeOptions": {"color": "#0000FF"}},
        polygon={}, circle={}, polyline={}, marker={}
    )

    current_rectangle = None
    # Mutable object to store the bounding box coordinates
    bbox_coords = {"lat_min": None, "lat_max": None, "lon_min": None, "lon_max": None}

    def handle_draw(self, action, geo_json):
        nonlocal current_rectangle
        coords = geo_json['geometry']['coordinates'][0]
        lons = [c[0] for c in coords]
        lats = [c[1] for c in coords]

        # Update the dictionary with new coordinates
        bbox_coords["lon_min"] = min(lons)
        bbox_coords["lon_max"] = max(lons)
        bbox_coords["lat_min"] = min(lats)
        bbox_coords["lat_max"] = max(lats)

        # Remove the old rectangle (if it exists)
        if current_rectangle:
            m.remove_layer(current_rectangle)

        # Add the new rectangle selected by the user
        current_rectangle = Rectangle(
            bounds=[[bbox_coords["lat_min"], bbox_coords["lon_min"]],
                    [bbox_coords["lat_max"], bbox_coords["lon_max"]]],
            color="blue",
            fill_opacity=0.1
        )
        m.add_layer(current_rectangle)

        # Update the table with coordinates
        with bbox_output:
            bbox_output.clear_output()
            display(widgets.HTML(
                value=f"""
                <table style="border:1px solid black; border-collapse: collapse;">
                <tr><th style="border:1px solid black; padding:5px">Parameter</th>
                    <th style="border:1px solid black; padding:5px">Value</th></tr>
                <tr><td style="border:1px solid black; padding:5px">lat_min</td><td style="border:1px solid black; padding:5px">{bbox_coords['lat_min']:.4f}</td></tr>
                <tr><td style="border:1px solid black; padding:5px">lat_max</td><td style="border:1px solid black; padding:5px">{bbox_coords['lat_max']:.4f}</td></tr>
                <tr><td style="border:1px solid black; padding:5px">lon_min</td><td style="border:1px solid black; padding:5px">{bbox_coords['lon_min']:.4f}</td></tr>
                <tr><td style="border:1px solid black; padding:5px">lon_max</td><td style="border:1px solid black; padding:5px">{bbox_coords['lon_max']:.4f}</td></tr>
                </table>
                """
            ))

    draw.on_draw(handle_draw)
    m.add_control(draw)

    title = widgets.HTML("<h3>Select the Area of Interest</h3>")
    hbox = widgets.HBox([m, bbox_output])
    layout = widgets.VBox([title, hbox])
    display(layout)

    return bbox_coords  # the dictionary remains accessible after drawing

# Usage:
bbox_coords = interactive_bbox_ipy(center=(7,0), zoom=5)


### User Explanation:

On the left side of the map, you will see a small black square. By clicking on it, you can draw a rectangle on the map to select your area of interest.
The latitude (lat) and longitude (lon) coordinates of this area will automatically appear in a table on the right side of the map.
If you want to select a new region, simply rerun the cell above.

# Step 3: Import GFS Data for the Selected Area

With this function, we will plot GFS model precipitation forecasts:
- On the left: a map showing the full extent defined by the user.
- On the right: a zoomed-in view of the area of interest (Bounding Box) you selected.

In [None]:
print('Selected lat/lon coordinates from the interactive map:')
print(bbox_coords["lat_min"], bbox_coords["lat_max"], bbox_coords["lon_min"], bbox_coords["lon_max"])

def plot_gfs_forecast(hindasting, hours_ahead,
                      lat_min=4, lat_max=16, lon_min=-6, lon_max=3,
                      bbox_coords=None):
    """
    Plot a GFS forecast with two panels:
    - Left: full map
    - Right: zoomed-in view on a user-defined bounding box (BBox)
    """
    
    # Determine initialization hour (0, 6, 12, or 18 UTC)
    init_hour = max([h for h in [0, 6, 12, 18] if h <= hindasting.hour])
    init_time_str = f"{init_hour:02d}"
    date_str = hindasting.strftime("%Y%m%d")
    
    # Round forecast hour to nearest multiple of 3 (GFS forecasts every 3 hours)
    fh_rounded = (hours_ahead // 3) * 3
    fh_str = f"f{fh_rounded:03d}"
    
    # Build GFS dataset URL
    dataset_url = (
        f"https://thredds.rda.ucar.edu/thredds/dodsC/files/g/d084001/"
        f"{date_str[:4]}/{date_str}/gfs.0p25.{date_str}{init_time_str}.{fh_str}.grib2"
    )
    
    try:
        # Open GFS dataset
        dataset = xr.open_dataset(dataset_url, engine="netcdf4")
        
        # Check if precipitation variable exists
        if "Precipitation_rate_surface" not in dataset.data_vars:
            print(f"'Precipitation_rate_surface' not available in {dataset_url}")
            return
        
        # Convert precipitation to mm/h (original in kg/m²/s)
        precip = dataset["Precipitation_rate_surface"] * 3600
        precip.attrs["units"] = "mm/h"
        
        # Check if data is empty
        if precip.size == 0 or precip.isnull().all():
            print(f"No precipitation data available in {dataset_url}")
            return
               
        # Define discrete color scale
        bounds = [0, 0.5, 2, 5, 10, 15, 25, 40, 100]
        colors = ["none", "#add8e6", "#0000ff", "#00ff00",
                  "#ffff00", "#ffa500", "#ff0000", "#ff69b4"]
        cmap = ListedColormap(colors)
        norm = BoundaryNorm(bounds, cmap.N)
        
        # Create figure with two side-by-side subplots
        fig = plt.figure(figsize=(16,6))
        gs = fig.add_gridspec(1, 2, width_ratios=[1,1], wspace=0.3)
        
        # --- Left panel: full map ---
        ax1 = fig.add_subplot(gs[0], projection=ccrs.PlateCarree())
        precip.plot(ax=ax1, transform=ccrs.PlateCarree(),
                    cmap=cmap, norm=norm, add_colorbar=False)
        ax1.coastlines()
        ax1.add_feature(cfeature.BORDERS)
        ax1.set_extent([lon_min, lon_max, lat_min, lat_max])
        ax1.set_title(f"Full Map\nGFS {date_str} init {init_time_str} UTC +{fh_rounded}h")
        
        gl1 = ax1.gridlines(draw_labels=True, linewidth=1, color='gray',
                            alpha=0.5, linestyle='--')
        gl1.top_labels = False
        gl1.right_labels = False
        
        # --- Right panel: zoom on BBox ---
        ax2 = fig.add_subplot(gs[1], projection=ccrs.PlateCarree())
        if bbox_coords is not None:
            ax2.set_extent([bbox_coords["lon_min"], bbox_coords["lon_max"],
                            bbox_coords["lat_min"], bbox_coords["lat_max"]])
        else:
            ax2.set_extent([lon_min, lon_max, lat_min, lat_max])
        
        precip.plot(ax=ax2, transform=ccrs.PlateCarree(),
                    cmap=cmap, norm=norm, add_colorbar=False)
        ax2.coastlines()
        ax2.add_feature(cfeature.BORDERS)
        ax2.set_title("Zoom on Selected Area (BBox)")
        
        gl2 = ax2.gridlines(draw_labels=True, linewidth=1, color='gray',
                            alpha=0.5, linestyle='--')
        gl2.top_labels = False
        gl2.right_labels = False
        
        # --- Add discrete colorbar ---
        cax = fig.add_axes([0.92, 0.25, 0.02, 0.5])
        cbar = ColorbarBase(cax, cmap=cmap, norm=norm,
                            boundaries=bounds, spacing="uniform",
                            ticks=bounds[:-1])
        cbar.set_ticklabels(["0","0.5","2","5","10","15","25",">40"])
        cbar.set_label("Precipitation [mm/h]")
        
        plt.show()
        
    except Exception as e:
        print(f"Error accessing dataset: {dataset_url}\n{e}")


In [None]:
# Using plot_gfs_forecast
hindasting = datetime.datetime(2024, 10, 6, 0)  # Select a date/time when the GFS forecast was available
hours_ahead = 12                                # Select the forecast lead time in hours

plot_gfs_forecast(
    hindasting, hours_ahead,
    lat_min=4, lat_max=16, lon_min=-6, lon_max=3,
    bbox_coords=bbox_coords
)

# Step 4: Import GFS Data for Multiple Forecast Hours

In this step, we will retrieve and visualize GFS forecasts for multiple hours ahead from the chosen initialization time.
You can select a list of forecast lead times, for example [3, 6, 12, 18, 24], to see the forecast 3h, 6h, 12h, 18h, and 24h after initialization.

The map will be zoomed in on the area you selected using the interactive Bounding Box.

Colors indicate precipitation intensity in mm/h.

In [None]:
def plot_gfs_forecast_series(hindasting, lead_hours_list, bbox_coords):
    # Define color palette for precipitation
    bounds = [0, 0.5, 2, 5, 10, 15, 25, 40, 100]
    colors = ["none", "#add8e6", "#0000ff", "#00ff00",
              "#ffff00", "#ffa500", "#ff0000", "#ff69b4"]
    cmap = ListedColormap(colors)
    norm = BoundaryNorm(bounds, cmap.N)
    
    # Number of panels = number of forecast lead times
    n_panels = len(lead_hours_list)
    fig, axes = plt.subplots(1, n_panels, figsize=(4*n_panels, 4),
                             subplot_kw={'projection': ccrs.PlateCarree()})
    
    if n_panels == 1:
        axes = [axes]  # Make axes iterable if only one panel
    
    print("Retrieving GFS forecasts...")
    
    for i, fh in enumerate(tqdm(lead_hours_list, desc="Forecast Steps")):
        # Determine model initialization hour
        init_hour = max([h for h in [0, 6, 12, 18] if h <= hindasting.hour])
        init_time_str = f"{init_hour:02d}"
        date_str = hindasting.strftime("%Y%m%d")
        
        # Round forecast hour to nearest multiple of 3 (GFS provides data every 3h)
        fh_rounded = (fh // 3) * 3
        fh_str = f"f{fh_rounded:03d}"
        
        # Build URL for GFS GRIB file
        dataset_url = (
            f"https://thredds.rda.ucar.edu/thredds/dodsC/files/g/d084001/"
            f"{date_str[:4]}/{date_str}/gfs.0p25.{date_str}{init_time_str}.{fh_str}.grib2"
        )
        
        try:
            # Open GFS dataset
            ds = xr.open_dataset(dataset_url, engine="netcdf4")
            if "Precipitation_rate_surface" not in ds.data_vars:
                print(f"Precipitation variable not available in {dataset_url}")
                continue
            
            # Extract and convert precipitation to mm/h
            precip = ds["Precipitation_rate_surface"] * 3600
            
            # Select corresponding panel
            ax = axes[i]
            im = precip.plot(ax=ax, transform=ccrs.PlateCarree(),
                             cmap=cmap, norm=norm, add_colorbar=False)
            ax.coastlines()
            ax.add_feature(cfeature.BORDERS)
            
            # Set zoom area according to selected BBox
            ax.set_extent([bbox_coords["lon_min"], bbox_coords["lon_max"],
                           bbox_coords["lat_min"], bbox_coords["lat_max"]])
            ax.set_title(f"+{fh_rounded}h")  
            
            # Add gridlines with lat/lon labels
            gl = ax.gridlines(draw_labels=True, linewidth=0.5, color='gray',
                              alpha=0.5, linestyle='--')
            gl.top_labels = False
            gl.right_labels = False
            
        except Exception as e:
            print(f"Error accessing {dataset_url}: {e}")
            ax.set_visible(False)  
    
    # Shared colorbar for all panels
    cbar_ax = fig.add_axes([0.92, 0.15, 0.02, 0.7])
    cbar = ColorbarBase(cbar_ax, cmap=cmap, norm=norm, boundaries=bounds,
                        spacing='uniform', ticks=bounds[:-1])
    cbar.set_ticklabels(["0","0.5","2","5","10","15","25",">40"])
    cbar.set_label("Precipitation [mm/h]")
    
    plt.tight_layout(rect=[0, 0, 0.9, 1])
    plt.show()


In [None]:
# Using plot_gfs_forecast_series
hindasting = datetime.datetime(2024, 10, 6, 0)  # Select a date/time when the GFS forecast was available
lead_hours_list = [3, 12, 24, 36]               # List of forecast lead times to visualize

plot_gfs_forecast_series(hindasting, lead_hours_list, bbox_coords)

# Step 5: Create a Graph of GFS Forecasted Precipitation

In this graph, the average precipitation (in mm) is calculated over the selected area for each GFS forecast step.
Additionally, using linear interpolation, we obtain the cumulative mean precipitation over time.

Note: Linear interpolation provides a general idea of the forecasted precipitation but does not accurately reflect the variations or uncertainties in the forecasts.

In [None]:
def plot_gfs_forecast_bars_cumulative(hindasting, lead_hours_list, bbox_coords):
    """
    Plot a bar chart of mean precipitation intensity over the BBox with cumulative precipitation.
    Labels and legend are in French.
    """
    avg_precip_list = []
    lon_min = bbox_coords["lon_min"] % 360
    lon_max = bbox_coords["lon_max"] % 360
    lat_min = bbox_coords["lat_min"]
    lat_max = bbox_coords["lat_max"]

    for fh in tqdm(lead_hours_list, desc="Leadtimes"):
        init_hour = max([h for h in [0, 6, 12, 18] if h <= hindasting.hour])
        init_time_str = f"{init_hour:02d}"
        date_str = hindasting.strftime("%Y%m%d")
        fh_rounded = (fh // 3) * 3
        fh_str = f"f{fh_rounded:03d}"

        dataset_url = (
            f"https://thredds.rda.ucar.edu/thredds/dodsC/files/g/d084001/"
            f"{date_str[:4]}/{date_str}/gfs.0p25.{date_str}{init_time_str}.{fh_str}.grib2"
        )

        try:
            ds = xr.open_dataset(dataset_url, engine="netcdf4")
            if "Precipitation_rate_surface" not in ds.data_vars:
                avg_precip_list.append(np.nan)
                continue

            precip = ds["Precipitation_rate_surface"] * 3600
            bbox_precip = precip.sel(
                lat=slice(lat_max, lat_min),
                lon=slice(lon_min, lon_max)
            )
            avg_precip = float(bbox_precip.mean().values)
            avg_precip_list.append(avg_precip)

        except Exception as e:
            print(f"Error accessing {dataset_url}: {e}")
            avg_precip_list.append(np.nan)

    # DataFrame
    df = pd.DataFrame({
        "leadtime_h": lead_hours_list,
        "avg_precip_mmph": avg_precip_list
    })

    # Cumulative precipitation (mm)
    cum_precip = [0]
    for i in range(1, len(lead_hours_list)):
        delta_h = lead_hours_list[i] - lead_hours_list[i-1]
        cum = cum_precip[-1] + avg_precip_list[i-1]*delta_h
        cum_precip.append(cum)
    df["cum_precip_mm"] = cum_precip

    # Linear interpolation for cumulative curve
    interp_func = interp1d(df["leadtime_h"], df["cum_precip_mm"], kind="linear")

    # Plot
    fig, ax1 = plt.subplots(figsize=(10,5))
    ax1.bar(df["leadtime_h"], df["avg_precip_mmph"], color='skyblue', label="Intensité moyenne [mm/h]")
    ax1.set_xlabel("Forecast Hour")
    ax1.set_ylabel("Mean Precipitation Intensity [mm/h]")
    ax1.grid(axis='y', linestyle='--', alpha=0.7)
    ax1.set_xticks(lead_hours_list)
    ax1.set_xticklabels([f"+{h}h" for h in lead_hours_list])

    # Secondary y-axis for cumulative precipitation
    ax2 = ax1.twinx()
    leadtime_fine = np.linspace(min(lead_hours_list), max(lead_hours_list), 200)
    ax2.plot(leadtime_fine, interp_func(leadtime_fine), color='darkblue', label="Cumulative Precipitation [mm]")
    ax2.set_ylabel("Cumulative Precipitation [mm]")

    # Combined legend
    lines, labels = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax1.legend(lines + lines2, labels + labels2, loc="upper left")

    plt.title(f"GFS Forecast: Mean and Cumulative Precipitation over BBox for Initialization {hindasting}")
    plt.show()

    return df


In [None]:
# Using plot_gfs_forecast_bars_cumulative
hindasting = datetime.datetime(2024, 10, 6, 0)
lead_hours_list = [3, 12, 24, 36]

df_GFS = plot_gfs_forecast_bars_cumulative(hindasting, lead_hours_list, bbox_coords)

# Congratulations! You have completed the tutorial and are now able to import, visualize, and quantify GFS data.