In [4]:
%matplotlib inline

import os
import pandas as pd
import geopandas as gpd

In [2]:
BASEDIR = r"..\data"
ALLEVENTPATH = os.path.join(BASEDIR, "allevents", "results")
GUSTEVENTPATH = os.path.join(BASEDIR, "training", "gustratio")
fullStationFile = os.path.join(BASEDIR, "StationDetails.geojson")

In [5]:
allstndf = gpd.read_file(fullStationFile)
allstndf.set_index("stnNum", inplace=True)
allstndf['stnWMOIndex'] = allstndf['stnWMOIndex'].astype('Int64')

In [6]:
outputFile = os.path.join(ALLEVENTPATH, "stormclass.pkl")
classifieddf = pd.read_pickle(outputFile)

In [14]:
classifieddf.set_index(['stnNum', 'date'])

Unnamed: 0_level_0,Unnamed: 1_level_0,stormType
stnNum,date,Unnamed: 2_level_1
300000,2002-09-08,Synoptic storm
300000,2002-09-09,Synoptic storm
300000,2002-09-10,Synoptic storm
300000,2002-09-20,Synoptic storm
300000,2002-09-21,Synoptic storm
...,...,...
109521,2023-07-20,Storm-burst
109521,2023-08-01,Front up
109521,2023-08-08,Storm-burst
109521,2023-09-02,Front up


In [8]:
def loadGustRatioData(stnNum):
    fname = os.path.join(GUSTEVENTPATH, f"{stnNum:06d}.pkl")
    df = pd.read_pickle(fname)
    df['date'] = pd.to_datetime(df['date'])
    df['stnNum'] = stnNum
    df.reset_index(inplace=True)
    df.set_index(['stnNum', 'date'], inplace=True)
    return df

In [10]:
grdflist = []
for stn in allstndf.index:
    try:
        df = loadGustRatioData(stn)
    except FileNotFoundError:
        pass
    else:
        grdflist.append(df)

allgrdf = pd.concat(grdflist)
allgrdf['idx'] = allgrdf.index

In [13]:
allgrdf

Unnamed: 0_level_0,Unnamed: 1_level_0,datetime,id,YYYY,MM,DD,HH,MI,rainfall,rainq,rain_duration,...,v2,r1,r2,category,wbtemp,wbtempq,rhq,windsd,windsdq,idx
stnNum,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
300000,2002-08-28,2002-08-28 19:51:00+00:00,hd,2002,8,28,19,51,0.0,Y,1.0,...,20.157851,1.183194,1.195564,synoptic,,,,,,"(300000, 2002-08-28 00:00:00)"
300000,2002-08-29,2002-08-29 20:15:00+00:00,hd,2002,8,29,20,15,0.0,Y,1.0,...,24.258678,1.471959,1.21606,synoptic,,,,,,"(300000, 2002-08-29 00:00:00)"
300000,2002-08-30,2002-08-30 05:28:00+00:00,hd,2002,8,30,5,28,0.0,Y,1.0,...,18.935537,1.301366,1.272739,synoptic,,,,,,"(300000, 2002-08-30 00:00:00)"
300000,2002-08-31,2002-08-31 09:05:00+00:00,hd,2002,8,31,9,5,0.0,Y,1.0,...,20.690909,1.682377,1.425747,synoptic,,,,,,"(300000, 2002-08-31 00:00:00)"
300000,2002-09-01,2002-09-01 05:05:00+00:00,hd,2002,9,1,5,5,0.0,Y,1.0,...,19.671901,1.303782,1.316599,synoptic,,,,,,"(300000, 2002-09-01 00:00:00)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140010,2023-09-06,2023-09-06 05:41:00+00:00,hd,2023,9,6,15,41,0.0,Y,1.0,...,16.455372,1.457616,1.488875,synoptic,,,,,,"(140010, 2023-09-06 00:00:00)"
140010,2023-09-07,2023-09-07 04:55:00+00:00,hd,2023,9,7,14,55,0.0,Y,1.0,...,15.245455,1.569197,1.797257,synoptic,,,,,,"(140010, 2023-09-07 00:00:00)"
140010,2023-09-08,2023-09-08 23:57:00+00:00,hd,2023,9,9,9,57,0.0,Y,1.0,...,23.92314,2.306537,1.655301,convective,,,,,,"(140010, 2023-09-08 00:00:00)"
140010,2023-09-09,2023-09-09 00:01:00+00:00,hd,2023,9,9,10,1,0.0,Y,1.0,...,23.560331,1.960841,1.498281,synoptic,,,,,,"(140010, 2023-09-09 00:00:00)"


In [15]:
compdf = pd.merge(classifieddf.set_index(['stnNum', 'date']), allgrdf, left_index=True, right_index=True, suffixes=('_s', '_g'))

In [18]:
colorder = ['Synoptic storm', 'Synoptic front', 'Storm-burst',
            'Thunderstorm', 'Front up', 'Front down',
            'Spike', 'Unclassified']
pd.crosstab(compdf['stormType'], compdf['category']).reindex(colorder).style.background_gradient(cmap='viridis_r')

category,convective,synoptic
stormType,Unnamed: 1_level_1,Unnamed: 2_level_1
Synoptic storm,14709,133010
Synoptic front,8550,3369
Storm-burst,5444,19415
Thunderstorm,6124,1158
Front up,10883,1721
Front down,2470,317
Spike,405,8
Unclassified,24,0
