In [None]:
%matplotlib inline

import os
from os.path import join as pjoin
from datetime import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
import seaborn as sns

from metpy.units import units
from metpy.calc import wind_direction, wind_speed

import pyproj

geodesic = pyproj.Geod(ellps="WGS84")


DATA_DIR = r"C:\WorkSpace\data\tc"
OUTPUT_DIR = r"..\data\frequency"
SOURCE = "https://www.ncei.noaa.gov/products/international-best-track-archive"

In [None]:
def load_ibtracs_df():
    """
    Helper function to load the IBTrACS database.
    Column names are mapped to the same as the BoM dataset to minimise
    the changes elsewhere in the code

    """
    dataFile = os.path.join(DATA_DIR, "ibtracs.since1980.list.v04r00.csv")
    df = pd.read_csv(
        dataFile,
        skiprows=[1],
        usecols=[0, 1, 3, 5, 6, 8, 9, 11, 13, 23],
        keep_default_na=False,
        na_values=[" "],
        parse_dates=[1],
        date_format="%Y-%m-%d %H:%M:%S",
    )
    df.rename(
        columns={
            "SID": "DISTURBANCE_ID",
            "ISO_TIME": "TM",
            "WMO_WIND": "MAX_WIND_SPD",
            "WMO_PRES": "CENTRAL_PRES",
            "USA_WIND": "MAX_WIND_SPD",
        },
        inplace=True,
    )

    df["TM"] = pd.to_datetime(
        df.TM, format="%Y-%m-%d %H:%M:%S", errors="coerce")
    df = df[~pd.isnull(df.TM)]

    # Filter to every 6 hours (to match sub-daily ERA data)
    df["hour"] = df["TM"].dt.hour
    df = df[df["hour"].isin([0, 6, 12, 18])]
    df.drop(columns=["hour"], inplace=True)
    df["SEASON"] = df["SEASON"].astype(int)
    # df = df[df.SEASON == season]

    # IBTrACS includes spur tracks (bits of tracks that are
    # different to the official) - these need to be dropped.
    df = df[df.TRACK_TYPE == "main"]

    df.reset_index(inplace=True)
    fwd_azimuth, _, distances = geodesic.inv(
        df.LON[:-1],
        df.LAT[:-1],
        df.LON[1:],
        df.LAT[1:],
    )

    df["new_index"] = np.arange(len(df))
    idxs = df.groupby(["DISTURBANCE_ID"]).agg(
        {"new_index": "max"}).values.flatten()
    df.drop("new_index", axis=1, inplace=True)
    # Convert max wind speed to m/s for consistency
    df["MAX_WIND_SPD"] = df["MAX_WIND_SPD"] * 0.5144

    dt = np.diff(df.TM).astype(float) / 3_600_000_000_000
    u = np.zeros_like(df.LAT)
    v = np.zeros_like(df.LAT)
    v[:-1] = np.cos(fwd_azimuth * np.pi / 180) * distances / (dt * 1000) / 3.6
    u[:-1] = np.sin(fwd_azimuth * np.pi / 180) * distances / (dt * 1000) / 3.6

    v[idxs] = 0
    u[idxs] = 0
    df["u"] = u
    df["v"] = v

    dt = np.diff(df.TM).astype(float) / 3_600_000_000_000
    dt_ = np.zeros(len(df))
    dt_[:-1] = dt
    df["dt"] = dt_

    df = df[df.u != 0].copy()
    print(f"Number of records: {len(df)}")
    return df


In [None]:
df = load_ibtracs_df()
basins = df["BASIN"].unique()

In [None]:
df['speed'] = wind_speed(df['u'].values* units('m/s'), df['v'].values* units('m/s'))
df['direction'] = wind_direction(df['u'].values* units('m/s'), df['v'].values* units('m/s'))

In [None]:
g = sns.displot(df, x='u', col="BASIN", col_wrap=3, stat='density')
for ax in g.axes:
    ax.grid(True)

In [None]:
df.groupby('BASIN')['u'].describe()

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(12, 6), sharex=True)

ax = sns.boxplot(df, x='u', y='BASIN', hue='BASIN', orient='h', ax=axes[0])
ax.grid()
ax.set_xlabel("Zonal speed [m/s]")

ax = sns.kdeplot(df, x='u', hue='BASIN', ax=axes[1], cut=0)
ax.grid()
ax.set_xlabel("Zonal speed [m/s]")
fig.tight_layout()

South Pacific basin is the only basin where the mean zonal translation speed is positive (i.e. eastwards). Most basins are right-skewed, consistent with moderate westerly motion at low latitudes, and rapid acceleration in the mid-latitude westerlies at higher latitudes. 

In [None]:
df.groupby("BASIN").count()

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(12, 6), sharex=True)

ax = sns.boxplot(df, x='v', y='BASIN', hue='BASIN', orient='h', ax=axes[0])
ax.grid()
ax.set_xlabel("Meridional speed [m/s]")

ax = sns.kdeplot(df, x='v', hue='BASIN', ax=axes[1], cut=0)
ax.grid()
ax.set_xlabel("Meridional speed [m/s]")
fig.tight_layout()

Distribution of meridional translation speed in the South Pacific is left-skewed, moreso than the South Indian or any other basin. 