# Comparison of observed and simulated ARI wind speeds

This notebook plots the average recurrence interval (ARI) wind speeds based on observed wind speeds corresponding to the passage of TCs (within 200 km of a station). It adds a plot of the fitted ARI wind speeds from a TCRM simulation.

In [1]:
%matplotlib inline

import os
import io
import sys
from os.path import join as pjoin
import numpy as np
import matplotlib.pyplot as plt

import pandas as pd
import geopandas as gpd
from datetime import datetime



from extremes import returnLevels, empReturnPeriod
from distributions import fittedPDF

# Import widgets for interactive notebook
from ipywidgets import interact, fixed
import ipywidgets as widgets

import seaborn as sns
sns.set_context("poster")
sns.set_style("whitegrid")

In [2]:
def loadObservations(stnId):
    names = ['recid', 'stnId', 'datetime', 'gust',
             'direction', 'quality', 'cycName']
    
    filename = pjoin(obsPath, "bom_{0:06d}.csv".format(stnId))
    try:
        obsdf = pd.read_csv(filename, skiprows=1, names=names,
                            parse_dates=[2], infer_datetime_format=True)
    except FileNotFoundError:
        print("No data file for stnId: {0}".format(stnId))
        return None
    return obsdf

def getStationDates(stnId):
    startYear = stndf.loc[stnId]['stnDataStart']
    endYear = stndf.loc[stnId]['stnDataEnd']
    numYears = endYear - startYear + 1
    return numYears

In [3]:
STNTYPES = [('st', 'S2'), ('stnId', 'i'), ('stnDistCode', 'S4'), ('stnName', 'S'), 
            ('stnDateOpen', 'S10'), ('stnDateClosed', 'S10'), ('stnLat', 'f8'), 
            ('stnLon', 'f8'), ('method', 'S15'), ('state', 'S3'), 
            ('stnElevation', 'f8'), ('baroElev', 'i'), ('stnWMONumber', 'i'), ('stnDataStart', 'i'), 
            ('stnDataEnd', 'i'), ('blank', 'S3'), ('percentcomplete', 'f8'), ('pcqualy', 'f8'), 
            ('pcqualn', 'f8'), ('pcqualw', 'f8'), ('pcquals', 'f8'), ('pcquali', 'f8'), ('end', 'S1')]
STNCONVERT = {'stnName' : str.rstrip}

Start with loading the observation station information. This is from the daily maximum wind gust dataset (Geosciene Australia eCat #110561), starting with the station details file.

In [4]:
obsPath = "C:/WorkSpace/data/derived/tcobs/daily"
stationFilePath = "C:/WorkSpace/data/raw/daily_max_wind_gust/"
stnfile = pjoin(stationFilePath, "DC02D_StnDet_999999999425050.txt")

stndf = pd.read_csv(stnfile, parse_dates=[4, 5],
                        usecols=(1,2,3,4,5,6,7,9,10,12,13,14,16), 
                        names = np.dtype(STNTYPES).names,
                        skiprows=1, engine='python', index_col='stnId', 
                        converters=STNCONVERT)
stationNameList = list(stndf['stnName'])
stndf.head(10)

Unnamed: 0_level_0,stnDistCode,stnName,stnDateOpen,stnDateClosed,stnLat,stnLon,state,stnElevation,stnWMONumber,stnDataStart,stnDataEnd,percentcomplete
stnId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1006,1,WYNDHAM AERO,01/1951,,-15.51,128.1503,WA,3.8,95214.0,2003,2017,96
1007,1,TROUGHTON ISLAND,09/1956,,-13.7542,126.1485,WA,6.0,94102.0,2008,2017,94
1009,1,KURI BAY,08/1961,09/2012,-15.4875,124.5222,WA,12.0,,2001,2001,100
1019,1,KALUMBURU,11/1997,,-14.2964,126.6453,WA,23.0,94100.0,2003,2017,93
1020,1,TRUSCOTT,01/1944,,-14.09,126.3867,WA,51.0,95101.0,2004,2017,93
2012,2,HALLS CREEK METEOROLOGICAL OFFICE,01/1944,,-18.2292,127.6636,WA,422.0,99201.0,1962,2017,94
2056,2,KUNUNURRA AERO,09/1971,,-15.7814,128.71,WA,44.0,94216.0,1994,2017,97
2064,2,ARGYLE AERODROME,01/1986,,-16.6381,128.4517,WA,164.0,94217.0,2002,2017,99
3003,3,BROOME AIRPORT,01/1939,,-17.9475,122.2353,WA,7.4,94203.0,1941,2017,99
3025,3,COCKATOO ISLAND,01/1948,09/2014,-16.0992,123.6161,WA,91.0,,1965,1982,100


Now load a shape file that contains the observed stations joined with the TCRM simulation locations. Note in this dataframe, we need to add an index, and so we index by both the location id number (TCRM simulation locations) *and* the station number (observations).

In [5]:
locationFilePath = "C:/WorkSpace/data/derived/tcobs/merged.shp"
locdf = gpd.read_file(locationFilePath)
locdf = locdf.set_index(["locId", 'stnId'])
locationNameList = list(locdf['stnName'])
locdf.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Cou,Elevation,ICAO,Latitude,Longitude,Place,St,WMO,geometry,percentcom,...,stnDataEnd,stnDataSta,stnDateClo,stnDateOpe,stnDistCod,stnElevati,stnLat,stnLon,stnName,stnWMONumb
locId,stnId,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
10210,300001,AAR,10,----,-67.6,62.86667,Mawson,--,89564,POINT (62.86667 -67.59999999999999),0,...,2017,1996,,01/1954,300,9.9,-67.6017,62.8753,MAWSON,89564
10213,300000,AAR,18,----,-68.58333,77.96667,Davis,--,89571,POINT (77.96666999999999 -68.58333),0,...,2017,1994,,01/1957,300,18.0,-68.5744,77.9672,DAVIS,89571
10219,300017,AAR,40,----,-66.28333,110.51667,Casey,--,89611,POINT (110.51667 -66.28333000000001),0,...,2017,1998,,02/1989,300,40.0,-66.2825,110.5231,CASEY,89611
10626,1007,AUS,6,----,-13.75,126.15,Troughton Island W. A.,--,94102,POINT (126.15 -13.75),0,...,2017,1990,,09/1956,01,6.0,-13.7542,126.1485,TROUGHTON ISLAND,94102
10627,200784,AUS,4,----,-14.11667,123.53333,Browse Island Aws,--,94103,POINT (123.53333 -14.11667),0,...,2017,2013,,01/1969,200,3.7,-14.1089,123.5472,BROWSE ISLAND,94103
10628,14314,AUS,3,----,-14.03333,121.75,Scott Reef Aws,--,94105,POINT (121.75 -14.03333),0,...,2017,2013,,06/2013,14GA,17.5,-12.6099,131.0474,NOONAMAH AIRSTRIP,94105
10630,14277,AUS,4,----,-12.63333,130.36667,Dum In Mirrie Aws,--,94116,POINT (130.36667 -12.63333),0,...,2017,2000,,04/1994,14GA,3.5,-12.635,130.3725,DUM IN MIRRIE AIRSTRIP,94116
10632,14142,AUS,12,----,-11.4,130.41667,Garden Point,--,94119,POINT (130.41667 -11.4),0,...,2017,2006,,01/1963,14GA,18.3,-11.4021,130.4217,PIRLANGIMPI AIRPORT,94119
10633,14015,AUS,31,YPDN,-12.4,130.86667,Darwin Airport,--,94120,POINT (130.86667 -12.4),0,...,2017,1985,,01/1941,14GA,30.4,-12.4239,130.8925,DARWIN AIRPORT,94120
10635,200731,AUS,14,----,-11.78333,130.01667,Bathurst Island Aws Cape Fourcroy,--,94122,POINT (130.01667 -11.78333),0,...,2017,2000,,07/1971,14GA,6.5,-11.7628,130.03,POINT FAWCETT,94122


Indexing using the `locId` first, then selecting the `index` attribute returns the `stnId`, which is used to load the observed data

In [6]:
locdf.loc[10635].index[0]

200731

Using multiple indexes means you can directly access the attribute using the DataFrame.`loc` method

In [7]:
locdf.loc[10635, 200731]['stnName']

'POINT FAWCETT'

The parameters of the fitted distribution are contained in another data file, and this is indexed using the TCRM location id number.

In [8]:
paramFile = "C:/WorkSpace/data/derived/tc/tcha/parameters.csv"
paramNames = ['locId', "locName", "it_scale", "it_shape", "it_thresh", 
              "it_rate", "gpd_rate", "gpd_shape", "gpd_thresh", "gpd_scale"]
gpddf = pd.read_csv(paramFile, names=paramNames, skiprows=1, index_col='locId')
gpddf.head(10)

Unnamed: 0_level_0,locName,it_scale,it_shape,it_thresh,it_rate,gpd_rate,gpd_shape,gpd_thresh,gpd_scale
locId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
10606,Nadzab,0.0,0.0,0.0,0.001023,0.000285,-0.064924,20.561479,5.074588
10594,Madang,-0.057574,28.696775,4.071263,3.4e-05,0.000285,-0.119735,18.843449,5.356575
10589,Koinambe,-0.061601,34.191239,3.639061,8e-06,0.000285,-0.116861,20.04178,5.002796
10588,Mount Hagen,-0.066907,26.584459,4.366419,8.3e-05,0.000285,-0.062477,21.41161,4.436294
10607,Erave,-0.041564,25.78578,4.228511,0.000161,0.000285,-0.059708,23.213079,4.51352
10590,Goroka,-0.098329,39.170346,4.277844,4e-06,0.000285,-0.101515,21.397353,5.185086
10591,Kundiawa,-0.095997,33.728931,4.53902,1.6e-05,0.000285,-0.075342,21.635276,4.814428
10595,Aiyura,-0.170982,39.65623,4.918647,4e-06,0.000285,-0.086113,21.925409,5.221558
10599,Bulolo,-0.041065,31.714521,4.435643,7e-05,0.000285,-0.10441,24.169427,5.716989
10601,Lake Kutubu,-0.043292,31.362851,3.613429,3.6e-05,0.000285,-0.099597,22.941998,4.522194


In [9]:
def plotObservedHazard(locId, ax):
    obsdf = loadObservations(locId)
    if obsdf is None:
        return ax
    numYears = getStationDates(locId)
    data = np.zeros(int(numYears * 365.25))
    wspd = np.sort(np.array(obsdf['gust']))*1.114 # Include conversion to 0.2 second wind gust
    data[-len(wspd):] = wspd
    emprp = empReturnPeriod(data)
    
    ax.scatter(emprp[emprp > 1], data[emprp > 1], s=50,
                color='k', marker='x', label="Empirical ARI")
    return ax
    """
    ax.set_xscale("log")
    ax.axhline(45.6, c='lime', linestyle='--', linewidth=2)#, label='Cat 3')
    ax.axhline(62.5, c='darkorange', linestyle='--', linewidth=2)#, label='Cat 4')
    ax.axhline(77.8, c='darkred', linestyle='--', linewidth=2)#, label='Cat 5')
    ax.text(2000, 45.6, 'Cat 3', ha='center')
    ax.text(2000, 62.5, 'Cat 4', ha='center')
    ax.text(2000, 77.8, 'Cat 5', ha='center')
    ax.legend(loc=2)
    ax.set_ylim((0, 100))
    ax.set_xlim((0, 100))
    ax.set_yticks(np.arange(0, 101, 10))
    ax.set_xlim((1, 1000))
    ax.set_ylabel('Wind speed (m/s)')
    ax.set_xlabel('Average recurrence interval (years)')
    ax.grid(which='major', linestyle='-')
    ax.grid(which='minor', linestyle='--', linewidth=1)
    return ax
    """

In [10]:
def plotFittedHazard(gpd_params, ax):
    """
    Plot a fitted distribution, with approximate 90% confidence interval
    and empirical return period values.

    :param data: :class:`numpy.ndarray` of observed data values.
    :param float mu: Selected threshold value.
    :param float xi: Fitted shape parameter.
    :param float sigma: Fitted scale parameter.
    :param str title: Title string for the plot.
    :param str figfile: Path to store the file (includes image format)

    """
    
    rp = np.array([1, 2, 5, 10, 20, 50, 100, 200,
                   500, 1000, 2000, 5000, 10000])
    mu, xi, sigma, rate = gpd_params
    rval = returnLevels(rp, mu, xi, sigma, rate)

    ax.semilogx(rp, rval, label="Fitted hazard curve")
    return ax


    

In [11]:
def loadParameters(locationName):
    locId = locdf.index[locationNameList.index(locationName)][0]   
    try:
        stnId = locdf.loc[locId].index[0]
    except KeyError:
        print("No index for given location id: {0}".format(locId))
    else:
        stnName = locdf.loc[locId, stnId]['stnName']

        stnObsFile = pjoin(obsPath, "bom_{0:06d}.csv".format(stnId))
        if os.path.exists(stnObsFile):
            print("Observation file exists for {0}".format(stnName))
        else:
            print("No observations for {0}".format(stnName))
            
    if locId in gpddf.index.values:
        gpd_rate = gpddf.loc[locId]['gpd_rate']
        gpd_shape = gpddf.loc[locId]['gpd_shape']
        gpd_scale = gpddf.loc[locId]['gpd_scale']
        gpd_thresh = gpddf.loc[locId]['gpd_thresh']
        
        fig, ax = plt.subplots(1, 1, figsize=(9,7))
        plotFittedHazard((gpd_thresh, gpd_shape, gpd_scale, gpd_rate), ax)
        plotObservedHazard(stnId, ax)
        
        title_str = (stnName)  # + "\n" +
                 #r"$\mu$ = {0:.3f}, $\xi$ = {1:.5f}, $\sigma$ = {2:.4f}".
                 #format(mu, xi, sigma))
        ax.set_title(title_str)
        ax.set_ylim((0, 100))
        ax.set_yticks(np.arange(0, 101, 10))
        ax.set_xlim((1, 10000))
        ax.set_ylabel('Wind speed (m/s)')
        ax.set_xlabel('Average recurrence interval (years)')
        ax.grid(which='major', linestyle='-')
        ax.grid(which='minor', linestyle='--', linewidth=1)
        ax.axhline(45.6, c='lime', linestyle='--', linewidth=2)#, label='Cat 3')
        ax.axhline(62.5, c='darkorange', linestyle='--', linewidth=2)#, label='Cat 4')
        ax.axhline(77.8, c='darkred', linestyle='--', linewidth=2)#, label='Cat 5')
        ax.text(20000, 45.6, 'Cat 3', ha='center')
        ax.text(20000, 62.5, 'Cat 4', ha='center')
        ax.text(20000, 77.8, 'Cat 5', ha='center')
        ax.legend(loc=2)
        plt.show()
    else:
        print("No index in GPD parameter file for {0}".format(locId))
        
    

In [12]:
interact(loadParameters, locationName=locationNameList)

<function __main__.loadParameters>