# Comparison of observed and simulated ARI wind speeds

This notebook plots the average recurrence interval (ARI) wind speeds based on observed wind speeds corresponding to the passage of TCs (within 200 km of a station). It adds a plot of the fitted ARI wind speeds from a TCRM simulation.

In [None]:
%matplotlib inline

import os
import io
import sys
from os.path import join as pjoin
import numpy as np
import matplotlib.pyplot as plt

import pandas as pd
import geopandas as gpd
from datetime import datetime



from extremes import returnLevels, empReturnPeriod
from distributions import fittedPDF

# Import widgets for interactive notebook
from ipywidgets import interact, fixed
import ipywidgets as widgets

import seaborn as sns
sns.set_context("notebook")
sns.set_style("whitegrid")

In [None]:
def loadObservations(stnId):
    """
    Load the observations from file for a given BoM station, where the observations have
    been selected from the complete digital history of daily maximum wind speeds, where a cyclone has 
    passed within 200 km of the station, and the station was open at the time of passage.
    
    :param int stnId: Bureau of Meteorology Station identification number
    
    :returns: data frame containing the gust wind speed, direction and cyclone name
              based on passage of cyclones near the selected station. If no observation
              file is found (`Exception.FileNotFoundError`), return `None`
    """
    
    names = ['recid', 'stnId', 'datetime', 'gust',
             'direction', 'quality', 'cycName', 'cycId']
    
    filename = pjoin(obsPath, "bom_{0:06d}.csv".format(stnId))
    try:
        obsdf = pd.read_csv(filename, skiprows=1, names=names,
                            parse_dates=[2], infer_datetime_format=True)
    except FileNotFoundError:
        print("No data file for stnId: {0}".format(stnId))
        return None
    return obsdf

def getStationDates(stnId):
    """
    Retrieve the length of observed record in years, based on the start and end dates
    of the observational data. 
    
    :param int stnId: Bureau of Meteorology Station identification number
    
    :returns: number of years between the start and end of the data on file.
    """
    startYear = stndf.loc[stnId]['stnDataStart']
    endYear = stndf.loc[stnId]['stnDataEnd']
    numYears = endYear - startYear + 1
    return numYears

In [None]:
STNTYPES = [('st', 'S2'), ('stnId', 'i'), ('stnDistCode', 'S4'), ('stnName', 'S'), 
            ('stnDateOpen', 'S10'), ('stnDateClosed', 'S10'), ('stnLat', 'f8'), 
            ('stnLon', 'f8'), ('method', 'S15'), ('state', 'S3'), 
            ('stnElevation', 'f8'), ('baroElev', 'i'), ('stnWMONumber', 'i'), ('stnDataStart', 'i'), 
            ('stnDataEnd', 'i'), ('blank', 'S3'), ('percentcomplete', 'f8'), ('pcqualy', 'f8'), 
            ('pcqualn', 'f8'), ('pcqualw', 'f8'), ('pcquals', 'f8'), ('pcquali', 'f8'), ('end', 'S1')]
STNCONVERT = {'stnName' : str.rstrip}

Start with loading the observation station information. This is from the daily maximum wind gust dataset (Geosciene Australia eCat #110561), starting with the station details file.

In [None]:
obsPath = "C:/WorkSpace/data/derived/tcobs/daily"
stationFilePath = "C:/WorkSpace/data/raw/daily_max_wind_gust/"
stnfile = pjoin(stationFilePath, "DC02D_StnDet_999999999425050.txt")

stndf = pd.read_csv(stnfile, parse_dates=[4, 5],
                        usecols=(1,2,3,4,5,6,7,9,10,12,13,14,16), 
                        names = np.dtype(STNTYPES).names,
                        skiprows=1, engine='python', index_col='stnId', 
                        converters=STNCONVERT)
stationNameList = list(stndf['stnName'])

Now load a shape file that contains the observed stations joined with the TCRM simulation locations. Note in this dataframe, we need to add an index, and so we index by both the location id number (TCRM simulation locations) *and* the station number (observations).

In [None]:
locationFilePath = "C:/temp/merged2.shp"
locdf = gpd.read_file(locationFilePath)
locdf = locdf.set_index(["locId", 'stnId'])
locationNameList = list(locdf['Place'])

The parameters of the fitted distribution are contained in another data file, and this is indexed using the TCRM location id number.

In [None]:
paramFile = "C:/WorkSpace/data/derived/tc/tcha2.1/parameters.csv"
paramNames = ['locId', "locName", "it_shape", "it_thresh", "it_scale", 
              "it_rate", "gpd_rate", "gpd_shape", "gpd_thresh", "gpd_scale"]
gpddf = pd.read_csv(paramFile, names=paramNames, skiprows=1, index_col='locId')

In [None]:
def plotObservedHazard(locId, ax):
    obsdf = loadObservations(locId)
    if obsdf is None:
        return ax
    numYears = getStationDates(locId)
    data = np.zeros(int(numYears * 365.25))
    wspd = np.sort(np.array(obsdf['gust']))*1.114 # Include conversion to 0.2 second wind gust
    data[-len(wspd):] = wspd
    emprp = empReturnPeriod(data)
    
    ax.scatter(emprp[emprp > 1], data[emprp > 1], s=50,
                color='k', marker='x', label="Empirical ARI")
    return ax
    """
    ax.set_xscale("log")
    ax.axhline(45.6, c='lime', linestyle='--', linewidth=2)#, label='Cat 3')
    ax.axhline(62.5, c='darkorange', linestyle='--', linewidth=2)#, label='Cat 4')
    ax.axhline(77.8, c='darkred', linestyle='--', linewidth=2)#, label='Cat 5')
    ax.text(2000, 45.6, 'Cat 3', ha='center')
    ax.text(2000, 62.5, 'Cat 4', ha='center')
    ax.text(2000, 77.8, 'Cat 5', ha='center')
    ax.legend(loc=2)
    ax.set_ylim((0, 100))
    ax.set_xlim((0, 100))
    ax.set_yticks(np.arange(0, 101, 10))
    ax.set_xlim((1, 1000))
    ax.set_ylabel('Wind speed (m/s)')
    ax.set_xlabel('Average recurrence interval (years)')
    ax.grid(which='major', linestyle='-')
    ax.grid(which='minor', linestyle='--', linewidth=1)
    return ax
    """

In [None]:
def plotFittedHazard(gpd_params, ax, label):
    """
    Plot a fitted distribution, with approximate 90% confidence interval
    and empirical return period values.

    :param data: :class:`numpy.ndarray` of observed data values.
    :param float mu: Selected threshold value.
    :param float xi: Fitted shape parameter.
    :param float sigma: Fitted scale parameter.
    :param str title: Title string for the plot.
    :param str figfile: Path to store the file (includes image format)

    """
    
    rp = np.array([1, 2, 5, 10, 20, 50, 100, 200,
                   500, 1000, 2000, 5000, 10000])
    mu, xi, sigma, rate = gpd_params
    rval = returnLevels(rp, mu, xi, sigma, rate)

    ax.semilogx(rp, rval, label=label)
    
    return ax


    

In [None]:
def loadParameters(locationName):
    locId = locdf.index[locationNameList.index(locationName)][0]   
    try:
        stnId = locdf.loc[locId].index[0]
    except KeyError:
        print("No index for given location id: {0}".format(locId))
    else:
        stnName = locdf.loc[locId, stnId]['Place']

        stnObsFile = pjoin(obsPath, "bom_{0:06d}.csv".format(stnId))
        if os.path.exists(stnObsFile):
            print("Observation file exists for {0} ({1}, {2})".format(stnName, stnId, locId))
        else:
            print("No observations for {0}".format(stnName))
            
    if locId in gpddf.index.values:
        gpd_rate = gpddf.loc[locId]['gpd_rate']
        gpd_shape = gpddf.loc[locId]['gpd_shape']
        gpd_scale = gpddf.loc[locId]['gpd_scale']
        gpd_thresh = gpddf.loc[locId]['gpd_thresh']
        it_rate = gpddf.loc[locId]['it_rate']
        it_shape = gpddf.loc[locId]['it_shape']
        it_scale = gpddf.loc[locId]['it_scale']
        it_thresh = gpddf.loc[locId]['it_thresh']
        fig, ax = plt.subplots(1, 1, figsize=(9,7))
        plotFittedHazard((gpd_thresh, gpd_shape, gpd_scale, gpd_rate), ax, "Percentile threshold fit")
        plotFittedHazard((it_thresh, it_shape, it_scale, it_rate), ax, "Iterative threshold fit")
        rp = np.array([1, 2, 5, 10, 20, 50, 100, 200,
                       500, 1000, 2000, 5000, 10000])
        rpd = 156 - 142 * np.power(rp, -0.1)
        rpc = 122 - 104 * np.power(rp, -0.1)
    
        rpd[np.where(rp>=50)] = rpd[np.where(rp>=50)]*1.1
        rpc[np.where(rp>=50)] = rpc[np.where(rp>=50)]*1.1
        ax.semilogx(rp[2:], rpd[2:], color='0.75', linestyle="--", label='AS/NZS 1170.2 Region D')
        ax.semilogx(rp[2:], rpc[2:], color='0.5', linestyle="--", label='AS/NZS 1170.2 Region C')

        plotObservedHazard(stnId, ax)
        startYear = stndf.loc[stnId]['stnDataStart']
        endYear = stndf.loc[stnId]['stnDataEnd']
        title_str = "{0} ({1}-{2})".format(stnName, startYear, endYear)  # + "\n" +
                 #r"$\mu$ = {0:.3f}, $\xi$ = {1:.5f}, $\sigma$ = {2:.4f}".
                 #format(mu, xi, sigma))
        ax.set_title(title_str)
        ax.set_ylim((0, 100))
        ax.set_yticks(np.arange(0, 101, 10))
        ax.set_xlim((1, 10000))
        ax.set_ylabel('Wind speed (m/s)')
        ax.set_xlabel('Average recurrence interval (years)')
        ax.grid(which='major', linestyle='-')
        ax.grid(which='minor', linestyle='--', linewidth=1)
        #ax.axhline(45.6, c='lime', linestyle='--', linewidth=2)#, label='Cat 3')
        #ax.axhline(62.5, c='darkorange', linestyle='--', linewidth=2)#, label='Cat 4')
        #ax.axhline(77.8, c='darkred', linestyle='--', linewidth=2)#, label='Cat 5')
        #ax.text(20000, 45.6, 'Cat 3', ha='center')
        #ax.text(20000, 62.5, 'Cat 4', ha='center')
        #ax.text(20000, 77.8, 'Cat 5', ha='center')
        ax.legend(loc=2)
        plt.savefig(pjoin("C:/WorkSpace/data/derived/tc/tcha2.1/", "ari_{0}.png".format(stnId)), bbox_inches="tight")
        plt.show()
    else:
        print("No index in GPD parameter file for {0}".format(locId))
        
    

In [None]:
interact(loadParameters, locationName=locationNameList)

In [None]:
from scipy.stats import scoreatpercentile
obsdf = loadObservations(14015)
scoreatpercentile(obsdf["gust"], 99.95)