“Je télécharge ERA5 sur une zone et une période définies par quelques cyclones représentatifs, puis j’extrais tous les cyclones qui passent dans cette zone.”

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
from pathlib import Path

import plotly.io as pio
pio.renderers.default = "notebook"

import sys
sys.path.append("../")
from building_era5 import load_IBTrACS

import matplotlib.pyplot as plt

# IBTrACS

In [None]:
PROCESSED_DIR = Path("../../data/processed")
PROCESSED_DIR.mkdir(parents=True, exist_ok=True)

path_ibtracs = PROCESSED_DIR / "ibtracs_usa_20251216.csv"

df_ibtracs = load_IBTrACS(path_ibtracs, years=[2022, 2024])

df_ibtracs["time_stamp"] = pd.to_datetime(df_ibtracs["time_stamp"], errors="coerce")
df_ibtracs = df_ibtracs.dropna(subset=["time_stamp"])

years = sorted(df_ibtracs["time_stamp"].dt.year.unique())

for year in years:
    df_y = df_ibtracs[df_ibtracs["time_stamp"].dt.year == year]

    counts = (
        df_y.groupby("basin")
        .size()
        .sort_values(ascending=False)
    )

    plt.figure(figsize=(8, 4))
    counts.plot(kind="bar")
    plt.title(f"Number of IBTrACS observations per basin – {year}")
    plt.ylabel("Number of observations")
    plt.xlabel("Basin")
    plt.grid(axis="y")
    plt.tight_layout()
    plt.show()


# ERA5

In [None]:
# Paths
PATH = "../../data/processed/ibtracs_era5_20251216.csv"

# Load data
df = pd.read_csv(PATH, parse_dates=["time_stamp"])

print("Dataset shape:", df.shape)
print("Years:", sorted(df["time_stamp"].dt.year.unique()))

In [None]:
METEO_VARS = [
    "10m_u_component_of_wind",
    "10m_v_component_of_wind",
    "2m_temperature",
    "mean_sea_level_pressure",
]

def summarize_year(df, year):
    df_y = df[df["time_stamp"].dt.year == year]

    if df_y.empty:
        print(f"\n===== SUMMARY {year} =====")
        print("No data")
        return

    print(f"\n===== SUMMARY {year} =====")
    print("Observations:", len(df_y))
    print("Cyclones:", df_y["sid"].nunique())
    print(
        "Period:",
        df_y["time_stamp"].min(),
        "→",
        df_y["time_stamp"].max(),
    )

    print("\nObservations per cyclone:")
    print(df_y.groupby("sid").size().describe())

    print("\nMeteorological variables:")
    display(df_y[METEO_VARS].describe())


In [None]:
for year in sorted(df["time_stamp"].dt.year.unique()):
    summarize_year(df, year)

In [None]:
def check_nans_year(df, year):
    df_y = df[df["time_stamp"].dt.year == year]

    print(f"\nNaN check {year}")
    if df_y.empty:
        print("No data")
        return

    print(df_y[METEO_VARS].isna().mean())


for year in sorted(df["time_stamp"].dt.year.unique()):
    check_nans_year(df, year)


In [None]:
def plot_cyclone_tracks_year(df, year):
    df_y = (
        df[df["time_stamp"].dt.year == year]
        .sort_values("time_stamp")
        .copy()
    )

    if df_y.empty:
        print(f"No data for {year}")
        return

    fig = px.line_geo(
        df_y,
        lon="lon",
        lat="lat",
        color="sid",
        hover_name="name",
        projection="natural earth",
        title=f"Cyclone trajectories – {year}",
    )

    fig.update_traces(
        line=dict(width=2),
        hovertemplate=
        "<b>Cyclone:</b> %{customdata[0]}<br>"
        "<b>Date:</b> %{customdata[1]}<br>"
        "<b>Lat:</b> %{lat:.2f}°<br>"
        "<b>Lon:</b> %{lon:.2f}°<br><br>"
        "<b>IBTrACS</b><br>"
        "Wind: %{customdata[2]} kt<br>"
        "Pressure: %{customdata[3]} hPa<br><br>"
        "<b>ERA5</b><br>"
        "2m Temp: %{customdata[4]:.1f} K<br>"
        "MSLP: %{customdata[5]:.0f} Pa<br>"
        "U10: %{customdata[6]:.1f} m/s<br>"
        "V10: %{customdata[7]:.1f} m/s"
        "<extra></extra>",
        customdata=df_y[[
            "name",
            "time_stamp",
            "wind",
            "pressure",
            "2m_temperature",
            "mean_sea_level_pressure",
            "10m_u_component_of_wind",
            "10m_v_component_of_wind",
        ]].values
    )

    fig.update_layout(
        legend_title_text="Cyclone ID",
        margin=dict(l=0, r=0, t=40, b=0),
        hovermode="closest",
    )

    fig.show()


In [None]:
plot_cyclone_tracks_year(df, 2022)
plot_cyclone_tracks_year(df, 2023)
plot_cyclone_tracks_year(df, 2024)

Apparent name changes along some cyclone trajectories are not inconsistencies but result from IBTrACS aggregating multiple operational agencies and basin-specific naming conventions. Cyclone names are reused and may change when a system crosses basin boundaries, while the SID remains the unique identifier.