In [None]:
%matplotlib inline

from os.path import join as pjoin
import pandas as pd
import numpy as np
import geopandas as gpd
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
from datetime import datetime
import seaborn as sns

from matplotlib import patheffects
pe = patheffects.withStroke(foreground="white", linewidth=3)
matplotlib.rcParams['font.sans-serif'] = 'Arial'
sns.set_context('talk')

Load the classified storm data. This is *only* the visually classified events. The full event set are plotted elsewhere.

In [None]:
BASEDIR = r"..\data"
DATADIR = r"..\data\training"
eventFile = pjoin(DATADIR, "visual_storm_types.csv")

stormdf = pd.read_csv(eventFile, usecols=[1, 2, 3],
                      parse_dates=['date'],
                      dtype={
                          'stnNum': int,
                          'stormType': 'category'
                          })

Plot a bar chart of the visually classified storm counts. The columns are re-ordered to match the grouping of non-convective, convective and error storm types.

In [None]:
order = ["Synoptic storm", "Synoptic front",
         "Storm-burst", "Thunderstorm",
         "Front up", "Front down",
         "Spike", "Unclassified"]
colors = sns.color_palette("viridis", n_colors=8)
stormdf.stormType.value_counts().loc[order].plot(kind='bar', color=colors)
plt.savefig(pjoin(DATADIR, "plots", "visual_storm_counts.jpg"),
            bbox_inches="tight", dpi=600)

In [None]:
stormdf.stormType.value_counts().loc[order]

Load all the observed data, for all HQ stations and concatenate into a single dataframe. We then create a new attribute (called `idx`) that is a tuple of the MultiIndex of the dataframe (based on station and date)

In [None]:
stndf = pd.read_csv(pjoin("../data", 'hqstations.csv'), index_col="stnNum")

def loadData(stnNum):
    fname = pjoin(DATADIR, "events", f"{stnNum:06d}.pkl")
    df = pd.read_pickle(fname)
    df['date'] = pd.to_datetime(df['date'])
    df['stnNum'] = stnNum
    df.reset_index(inplace=True)
    # Create a MultiIndex based on station number and storm date:
    df.set_index(['stnNum', 'date'], inplace=True)
    return df

dflist = []
for stn in stndf.index:
    df = loadData(stn)
    dflist.append(df)

alldf = pd.concat(dflist)
alldf['idx'] = alldf.index

We create a similar attribute in the list of storm types, then create lists of indicies for each storm type.

In [None]:
stormdf['idx'] = stormdf[['stnNum', 'date']].apply(tuple, axis=1)

synidx = stormdf[stormdf['stormType']=="Synoptic storm"]['idx']
syfidx = stormdf[stormdf['stormType']=="Synoptic front"]['idx']
sbidx = stormdf[stormdf['stormType']=="Storm-burst"]['idx']

tsidx = stormdf[stormdf['stormType']=="Thunderstorm"]['idx']
fuidx = stormdf[stormdf['stormType']=="Front up"]['idx']
fdidx = stormdf[stormdf['stormType']=="Front down"]['idx']

ucidx = stormdf[stormdf['stormType']=="Unclassified"]['idx']
spidx = stormdf[stormdf['stormType']=="Spike"]['idx']

Then we can select the time series of each event that is classified as the selected storm type. We then group by the time difference (i.e. the time before/after the peak gust) and take the mean of all the values. This creates eight separate dataframes - one for each storm type - which hold the mean time series for each storm type. 

In [None]:
synevents = alldf[alldf.idx.isin(synidx.values)]
syfevents = alldf[alldf.idx.isin(syfidx.values)]
sbevents = alldf[alldf.idx.isin(sbidx.values)]


tsevents = alldf[alldf.idx.isin(tsidx.values)]
fuevents = alldf[alldf.idx.isin(fuidx.values)]
fdevents = alldf[alldf.idx.isin(fdidx.values)]

ucevents = alldf[alldf.idx.isin(ucidx.values)]
spevents = alldf[alldf.idx.isin(spidx.values)]


In [None]:

meansyn = synevents.reset_index().groupby('tdiff').mean(numeric_only=True).reset_index()
meansyf = syfevents.reset_index().groupby('tdiff').mean(numeric_only=True).reset_index()
meansb = sbevents.reset_index().groupby('tdiff').mean(numeric_only=True).reset_index()

meants = tsevents.reset_index().groupby('tdiff').mean(numeric_only=True).reset_index()
meanfu = fuevents.reset_index().groupby('tdiff').mean(numeric_only=True).reset_index()
meanfd = fdevents.reset_index().groupby('tdiff').mean(numeric_only=True).reset_index()

meanuc = ucevents.reset_index().groupby('tdiff').mean(numeric_only=True).reset_index()
meansp = spevents.reset_index().groupby('tdiff').mean(numeric_only=True).reset_index()

The following code plots a multipanel figure with the mean time series of the eight storm types. There's some stuffing around to get multiple y-axes on each panel, and the labelling is done across multiple axes.

In [None]:
def plotEvent(df, ax, wlims=(0, 100), tlims=(-4, 4), plims=(-2, 2), pos='l'):
    """
    Plot a time series onto an axis

    :param df: DataFrame containing time series of temperature, wind speed, dew point and station pressure values
    :param ax: `matplotlib.Axes`
    """
    axt = ax.twinx()
    axp = ax.twinx()
    ax.set_zorder(1)
    ax.patch.set_visible(False)
    l1 = axt.plot(df.tdiff, df.tempanom, label=r"Temperature [$^o$C]",
             color='r', marker='^', markerfacecolor="None", lw=1, path_effects=[pe], zorder=1,
             markevery=5)
    l2 = axt.plot(df.tdiff, df.dpanom, label=r"Dew point [$^o$C]", color='orangered', marker='.', markerfacecolor="None",
             lw=1, path_effects=[pe], zorder=1, markevery=5)

    l3 = ax.plot(df.tdiff, df.windgust, label="Wind gust [km/h]", color="#4C72B0",
            lw=2, path_effects=[pe], markerfacecolor="None",zorder=100)
    l4 = axp.plot(df.tdiff, df.stnpanom, label="Pressure [hPa]", color='purple', lw=2, path_effects=[pe],
             ls='--',)

    if pos != 'l':
        #axt.spines['right'].set_position(("axes", 1.075))
        axt.spines[['right']].set_color('r')
        axt.yaxis.label.set_color('r')
        axt.tick_params(axis='y', colors='r')
        axt.set_ylabel(r"Temperature anomaly [$^o$C]")
        axp.spines[['right']].set_position(('axes', 1.15))
        axp.spines[['right']].set_color('purple')
        axp.yaxis.label.set_color('purple')
        axp.tick_params(axis='y', colors='purple')
        axp.set_ylabel("Pressure anomaly [hPa]")

    if pos == 'l':
        ax.set_ylabel("Gust wind speed [km/h]")
        axp.tick_params(labelright=False, right=False)
        axt.tick_params(labelright=False, right=False)

    gmin, gmax = ax.get_ylim()
    pmin, pmax = axp.get_ylim()
    tmin, tmax = axt.get_ylim()
    ax.set_ylim(wlims)
    ax.set_xlim((-60, 60))
    axp.set_ylim(plims)
    axt.set_ylim(tlims)
    #axt.set_ylim((min(-2.0, tmin), max(tmax, 2.0)))
    #ax2.set_ylim((0, 360))
    #ax2.set_yticks(np.arange(0, 361, 90))
    #axr.set_ylim((0, 100))
    #ax.set_title(meants.index[0])
    ax.grid(True)
    #ax2.grid(False)
    axt.grid(False)
    axp.grid(False)
    #axr.grid(False)

def identify_axes(axdict, labels):
    kw = dict(ha="left", va="top", fontsize=12, color='black',
              bbox=dict(boxstyle='square',
                        ec="white",
                        fc="white",
                        alpha=0.7))
    for (k, ax), label in zip(axdict.items(), labels):
        ax.text(.02, 0.95, f"{k} {label}", transform=ax.transAxes, **kw)

In [None]:
fig, axes = plt.subplot_mosaic(
    [["(a)", "(b)"],
     ["(c)", "(d)"],
     ["(e)", "(f)"],
     ["(g)", "(h)"]],
    sharex=True, sharey=True,
    figsize=(20, 16)
)

plotEvent(meansyn, axes['(a)'], wlims=(0, 120), tlims=(-6, 6), plims=(-4, 4), pos='l')
plotEvent(meansyf, axes['(b)'], wlims=(0, 120), tlims=(-6, 6), plims=(-4, 4), pos='r')
plotEvent(meansb, axes['(c)'], wlims=(0, 120), tlims=(-6, 6), plims=(-4, 4), pos='l')
plotEvent(meants, axes['(d)'], wlims=(0, 120), tlims=(-6, 6), plims=(-4, 4), pos='r')
plotEvent(meanfu, axes['(e)'], wlims=(0, 120), tlims=(-6, 6), plims=(-4, 4), pos='l')
plotEvent(meanfd, axes['(f)'], wlims=(0, 120), tlims=(-6, 6), plims=(-4, 4), pos='r')
plotEvent(meansp, axes['(g)'], wlims=(0, 120), tlims=(-6, 6), plims=(-4, 4), pos='l')
plotEvent(meanuc, axes['(h)'], wlims=(0, 120), tlims=(-6, 6), plims=(-4, 4), pos='r')
identify_axes(axes, labels=
              ["Synoptic storm", "Synoptic front",
               "Storm-burst", "Thunderstorm",
               "Front-up", "Front-down",
               "Spike", "Unclassified"])
tlegendline = mlines.Line2D([], [], color='red', marker='^',
                            markerfacecolor="None",
                            label=r"Temperature [$^o$C]")
dlegendline = mlines.Line2D([], [], color='orangered',
                            marker='.', markerfacecolor="None",
                            label=r"Dew point [$^o$C]")
plegendline = mlines.Line2D([], [], color='purple',
                            ls='--', label=r"Pressure [hPa]")
wlegendline = mlines.Line2D([], [], color='#4C72B0',
                            label=r"Wind speed [km/h]")
axes['(a)'].legend(handles=[wlegendline, plegendline,
                            tlegendline, dlegendline], loc=3,
                   fontsize='small')
axes['(g)'].set_xlabel("Time since gust [min]")
axes['(h)'].set_xlabel("Time since gust [min]")
plt.tight_layout()

fig.savefig(pjoin(DATADIR, "plots", "mean_visual_storm_profile.jpg"),
            bbox_inches="tight", dpi=600)

Now load the events classified using the El Rafei _et al._ (2023) criteria, which provides a binary classification based on the ratio of the maximum gust wind speed to the gust wind speeds preceding and following the maximum gust. 

In [None]:
def loadGustRatioData(stnNum):
    fname = pjoin(DATADIR, "gustratio", f"{stnNum:06d}.pkl")
    df = pd.read_pickle(fname)
    df['date'] = pd.to_datetime(df['date'])
    df['stnNum'] = stnNum
    df.reset_index(inplace=True)
    df.set_index(['stnNum', 'date'], inplace=True)
    return df

In [None]:
grdflist = []
for stn in stndf.index:
    df = loadGustRatioData(stn)
    grdflist.append(df)

allgrdf = pd.concat(grdflist)
allgrdf['idx'] = allgrdf.index

Here we join the manually classified set of events with the automatically classified events (based on the gust ratio). We reset the index of the dataframe holding the manually classified data to match the index of the automatically classified events and provide a suffix to separate the overlapping column names. A cross-tabulation then gives the comparison of events with different classifications. 

In [None]:
compdf = pd.merge(stormdf.set_index(['stnNum', 'date']), allgrdf, suffixes=('_s', '_g'))

In [None]:
colorder = ['Synoptic storm', 'Synoptic front', 'Storm-burst',
            'Thunderstorm', 'Front up', 'Front down',
            'Spike', 'Unclassified']
pd.crosstab(compdf['stormType'], compdf['category']).reindex(colorder).style.background_gradient(cmap='viridis_r')

In [None]:

syndf = alldf[alldf.idx==(4032, datetime(2017, 3, 23))].rename({"datetime":"tdiff"})
syfdf = alldf[alldf.idx==(66037, datetime(2001, 1, 15))].rename({"datetime":"tdiff"})
stbdf = alldf[alldf.idx==(3003, datetime(2012, 3, 17))].rename({"datetime":"tdiff"})
frudf = alldf[alldf.idx==(12038, datetime(2018, 12, 10))].rename({"datetime":"tdiff"})
frddf = alldf[alldf.idx==(3003, datetime(2017, 2, 23))].rename({"datetime":"tdiff"})
tstdf = alldf[alldf.idx==(36031, datetime(2020, 11, 12))].rename({"datetime":"tdiff"})
spkdf = alldf[alldf.idx==(15590, datetime(2010, 12, 26))].rename({"datetime":"tdiff"})
#uncdf = alldf[alldf.idx==(15590, datetime(2010, 12, 26))].rename({"datetime":"tdiff"})
#plotEvent(pltdf, ax)
#ax.legend()
#ax.set_xlabel("Time since gust [min]")

This plots selected events from the visually classified event set. Used for demonstrating the range of storm types

In [None]:
fig, axes = plt.subplot_mosaic(
    [["(a)", "(b)"],
     ["(c)", "(d)"],
     ["(e)", "(f)"]],
    sharex=True, sharey=True,
    figsize=(20, 12)
)

plotEvent(syndf, axes['(a)'], wlims=(0, 120), tlims=(-6, 6), plims=(-4, 4), pos='l')
plotEvent(syfdf, axes['(b)'], wlims=(0, 120), tlims=(-6, 6), plims=(-4, 4), pos='r')
plotEvent(stbdf, axes['(c)'], wlims=(0, 120), tlims=(-6, 6), plims=(-4, 4), pos='l')
plotEvent(tstdf, axes['(d)'], wlims=(0, 120), tlims=(-6, 6), plims=(-4, 4), pos='r')
plotEvent(frudf, axes['(e)'], wlims=(0, 120), tlims=(-6, 6), plims=(-4, 4), pos='l')
plotEvent(frddf, axes['(f)'], wlims=(0, 120), tlims=(-6, 6), plims=(-4, 4), pos='r')

identify_axes(axes, labels=
              ["Synoptic storm", "Synoptic front",
               "Storm-burst", "Thunderstorm",
               "Front-up", "Front-down",
               "Spike", "Unclassified"])
tlegendline = mlines.Line2D([], [], color='red', marker='^',
                            markerfacecolor="None",
                            label=r"Temperature [$^o$C]")
dlegendline = mlines.Line2D([], [], color='orangered',
                            marker='.', markerfacecolor="None",
                            label=r"Dew point [$^o$C]")
plegendline = mlines.Line2D([], [], color='purple',
                            ls='--', label=r"Pressure [hPa]")
wlegendline = mlines.Line2D([], [], color='#4C72B0',
                            label=r"Wind speed [km/h]")
axes['(a)'].legend(handles=[wlegendline, plegendline,
                            tlegendline, dlegendline], loc=3,
                   fontsize='small')
axes['(c)'].spines['right'].set_visible(False)
axes['(e)'].spines['right'].set_visible(False)

axes['(e)'].set_xlabel("Time since gust [min]")
axes['(f)'].set_xlabel("Time since gust [min]")
plt.tight_layout()

fig.savefig(pjoin(DATADIR, "plots", "example_visual_storm_profile.jpg"),
            bbox_inches="tight", dpi=600)

In [None]:
OUTPUTPATH = pjoin(BASEDIR, "allevents", "results")
stormClassFile = pjoin(OUTPUTPATH, "stormclass.pkl")
outputstormdf = pd.read_pickle(stormClassFile)
fullStationFile = pjoin(BASEDIR, "StationDetails.geojson")
allstndf = gpd.read_file(fullStationFile)
allstndf.set_index("stnNum", inplace=True)
allstndf['stnWMOIndex'] = allstndf['stnWMOIndex'].astype('Int64')

In [None]:
def loadAllData(stnNum, datapath):
    fname = pjoin(datapath, "events", f"{stnNum:06d}.pkl")
    df = pd.read_pickle(fname)
    df['date'] = pd.to_datetime(df['date'])
    df['stnNum'] = stnNum
    df.reset_index(inplace=True)
    # Create a MultiIndex based on station number and storm date:
    df.set_index(['stnNum', 'date'], inplace=True)
    return df

In [None]:
alldatadflist = []
n = 0
for stn in allstndf.index:
    try:
        df = loadAllData(stn, pjoin(BASEDIR, "training"))
    except FileNotFoundError:
        #print(f"No data for station: {stn}")
        n+=1
        pass
    else:
        alldatadflist.append(df)
print(n)
alldatadf = pd.concat(alldatadflist)
alldatadf["idx"] = alldatadf.index

In [None]:
outputstormdf['idx'] = outputstormdf[['stnNum', 'date']].apply(tuple, axis=1)

allsynidx = outputstormdf[outputstormdf['stormType']=="Synoptic storm"]['idx']
allsyfidx = outputstormdf[outputstormdf['stormType']=="Synoptic front"]['idx']
allsbidx = outputstormdf[outputstormdf['stormType']=="Storm-burst"]['idx']

alltsidx = outputstormdf[outputstormdf['stormType']=="Thunderstorm"]['idx']
allfuidx = outputstormdf[outputstormdf['stormType']=="Front up"]['idx']
allfdidx = outputstormdf[outputstormdf['stormType']=="Front down"]['idx']

allucidx = outputstormdf[outputstormdf['stormType']=="Unclassified"]['idx']
allspidx = outputstormdf[outputstormdf['stormType']=="Spike"]['idx']



In [None]:
allsynevents = alldatadf[alldatadf.idx.isin(allsynidx.values)]
allsyfevents = alldatadf[alldatadf.idx.isin(allsyfidx.values)]
allsbevents = alldatadf[alldatadf.idx.isin(allsbidx.values)]

alltsevents = alldatadf[alldatadf.idx.isin(alltsidx.values)]
allfuevents = alldatadf[alldatadf.idx.isin(allfuidx.values)]
allfdevents = alldatadf[alldatadf.idx.isin(allfdidx.values)]

allucevents = alldatadf[alldatadf.idx.isin(allucidx.values)]
allspevents = alldatadf[alldatadf.idx.isin(allspidx.values)]

In [None]:
allmeansyn = allsynevents.reset_index().groupby('tdiff').mean(numeric_only=True).reset_index()
allmeansyf = allsyfevents.reset_index().groupby('tdiff').mean(numeric_only=True).reset_index()
allmeansb = allsbevents.reset_index().groupby('tdiff').mean(numeric_only=True).reset_index()

allmeants = alltsevents.reset_index().groupby('tdiff').mean(numeric_only=True).reset_index()
allmeanfu = allfuevents.reset_index().groupby('tdiff').mean(numeric_only=True).reset_index()
allmeanfd = allfdevents.reset_index().groupby('tdiff').mean(numeric_only=True).reset_index()

allmeanuc = allucevents.reset_index().groupby('tdiff').mean(numeric_only=True).reset_index()
allmeansp = allspevents.reset_index().groupby('tdiff').mean(numeric_only=True).reset_index()

In [None]:
fig, axes = plt.subplot_mosaic(
    [["(a)", "(b)"],
     ["(c)", "(d)"],
     ["(e)", "(f)"],
     ["(g)", "(h)"]],
    sharex=True, sharey=True,
    figsize=(20, 16)
)

plotEvent(allmeansyn, axes['(a)'], wlims=(0, 120), tlims=(-6, 6), plims=(-4, 4), pos='l')
plotEvent(allmeansyf, axes['(b)'], wlims=(0, 120), tlims=(-6, 6), plims=(-4, 4), pos='r')
plotEvent(allmeansb, axes['(c)'], wlims=(0, 120), tlims=(-6, 6), plims=(-4, 4), pos='l')
plotEvent(allmeants, axes['(d)'], wlims=(0, 120), tlims=(-6, 6), plims=(-4, 4), pos='r')
plotEvent(allmeanfu, axes['(e)'], wlims=(0, 120), tlims=(-6, 6), plims=(-4, 4), pos='l')
plotEvent(allmeanfd, axes['(f)'], wlims=(0, 120), tlims=(-6, 6), plims=(-4, 4), pos='r')
plotEvent(allmeansp, axes['(g)'], wlims=(0, 120), tlims=(-6, 6), plims=(-4, 4), pos='l')
plotEvent(allmeanuc, axes['(h)'], wlims=(0, 120), tlims=(-6, 6), plims=(-4, 4), pos='r')
identify_axes(axes, labels=
              ["Synoptic storm", "Synoptic front",
               "Storm-burst", "Thunderstorm",
               "Front-up", "Front-down",
               "Spike", "Unclassified"])
tlegendline = mlines.Line2D([], [], color='red', marker='^',
                            markerfacecolor="None",
                            label=r"Temperature [$^o$C]")
dlegendline = mlines.Line2D([], [], color='orangered',
                            marker='.', markerfacecolor="None",
                            label=r"Dew point [$^o$C]")
plegendline = mlines.Line2D([], [], color='purple',
                            ls='--', label=r"Pressure [hPa]")
wlegendline = mlines.Line2D([], [], color='#4C72B0',
                            label=r"Wind speed [km/h]")
axes['(a)'].legend(handles=[wlegendline, plegendline,
                            tlegendline, dlegendline], loc=3,
                   fontsize='small')
axes['(g)'].set_xlabel("Time since gust [min]")
axes['(h)'].set_xlabel("Time since gust [min]")
plt.tight_layout()

fig.savefig(pjoin(DATADIR, "plots", "mean_auto_storm_profile.jpg"), dpi=600,
            bbox_inches="tight")