diff --git a/CODE_OF_CONDUCT.rst b/CODE_OF_CONDUCT.rst index b5cb2374..661312ce 100644 --- a/CODE_OF_CONDUCT.rst +++ b/CODE_OF_CONDUCT.rst @@ -63,7 +63,7 @@ Enforcement ----------- Instances of abusive, harassing, or otherwise unacceptable behavior may -be reported by contacting the project team at craig.arthur@ga.gov.au. +be reported by contacting the project team at hazards@ga.gov.au. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the diff --git a/DataProcess/DataProcess.py b/DataProcess/DataProcess.py index 42f8bfe6..a869570e 100644 --- a/DataProcess/DataProcess.py +++ b/DataProcess/DataProcess.py @@ -194,8 +194,8 @@ def processData(self, restrictToWindfieldDomain=False): startSeason = config.getint('DataProcess', 'StartSeason') indicator = loadData.getInitialPositions(inputData) - lat = np.array(inputData['lat'], 'd') - lon = np.mod(np.array(inputData['lon'], 'd'), 360) + lat = np.array(inputData['lat'], 'float32') + lon = np.mod(np.array(inputData['lon'], 'float32'), 360.) if restrictToWindfieldDomain: # Filter the input arrays to only retain the tracks that diff --git a/Evaluate/locationLandfallIntensity.py b/Evaluate/locationLandfallIntensity.py new file mode 100644 index 00000000..04b1dd5e --- /dev/null +++ b/Evaluate/locationLandfallIntensity.py @@ -0,0 +1,388 @@ +#!/usr/bin/env python +# coding: utf-8 + +# # Landfall rates for simulated tropical cyclones +# +# This notebook looks at the rate of landfalling tropical cyclones simulated in +# the TCHA. "Landfall" is a loose definition, where we have set out a series of +# gates around the coastline, which are set 50 km off the coast, and each gate +# is 200 km wide. +# +# We look at landfall as this is where the impacts of TCs are felt. We will +# compare to historical rates at a later point. +# +# The gates are defined in a vector shapefile as line segments. We'll look at the +# counts, and the mean intensity at landfall to see how well it corresponds to +# historical landfall rates and intensity. +# +# Note however, we'd expect the historical record to be somewhat different to the +# mean values determined here. Arguably, the historical record should be (statistically) +# indistiguishable from the range of scenarios. It should appear to be like any other +# member of the distribution. +# +# The simulations used here are 35-year simulations of TC activity in the Australian region. +# This corresponds to the length of historical record used as the input record +# to the TCHA (1981-2016). The TCs are simulated with a 3-hour timestep. + +import os +import sys +from os import walk +from os.path import join as pjoin +import matplotlib.pyplot as plt +from Utilities import track + +from shapely.geometry import LineString, Point + +import numpy as np +import pandas as pd +import geopandas as gpd + +import logging +import seaborn as sns +sns.set_style('whitegrid') + +LOGGER = logging.getLogger(__name__) + +# Start with reading in the gates into a `GeoDataFrame`, and adding some additional +# attributes. This `GeoDataFrame` will be duplicated for each simulation, then +# aggregated for the summary statistics. + +def loadGateFile(gateFile): + try: + gates = gpd.read_file(gateFile) + except: + LOGGER.error(f"Cannot open {gateFile}") + LOGGER.error("Check the file exists and is a valid shapefile") + sys.exit() + + # Add some extra attributes + gates['sim'] = 0 + gates['count'] = 0 + gates['meanlfintensity'] = np.nan + gates['minlfintensity'] = np.nan + gates['cat1'] = 0 + gates['cat2'] = 0 + gates['cat3'] = 0 + gates['cat4'] = 0 + gates['cat5'] = 0 + + return gates + +# Define a function to read in the track files. The track files are the +# TCRM format files, which are netCDF files, with a heirarchical structure. +# The `tracks.ncReadTrackData` function does this, but we still need to convert +# each track to a `GeoDataFrame` and add a `geometry` attribute that +# represents the line of each time step in the track history. We also assign +# a category attribute to each segment, which at this point in time is simply +# based on categorising the central pressure according to the Bureau of +# Meteorology's TC intensity scale. + +def readTracks(trackFile): + """ + Read a track file and create a `GeoPandas.GeoDataFrame` from the data, with + separate polyline features for each unique track in the file. Also adds a + nominal intensity category based on the central pressure value. + + :param str trackFile: Path to a TCRM-format track file + + :returns: `GeoPandas.GeoDataFrame` of tracks + """ + + LOGGER.debug(f"Loading track data from {trackFile}") + tracks = track.ncReadTrackData(trackFile) + trackgdf = [] + for t in tracks: + segments = [] + for n in range(len(t.data) - 1): + segment = LineString([[t.Longitude[n], t.Latitude[n]],[t.Longitude[n+1], t.Latitude[n+1]]]) + segments.append(segment) + gdf = gpd.GeoDataFrame.from_records(t.data[:-1]) + gdf['geometry'] = segments + gdf['category'] = pd.cut(gdf['CentralPressure'], + bins=[0, 930, 955, 970, 985, 990, 1020], + labels=[5,4,3,2,1,0]) + trackgdf.append(gdf) + trackgdf = pd.concat(trackgdf) + return trackgdf + +def isLeft(line, point): + """ + Test whether a point is to the left of a (directed) line segment. + + :param line: :class:`Shapely.geometry.LineString` of the line feature being tested + :param point: :class:`Shapely.geometry.Point` being tested + + :returns: `True` if the point is to the left of the line segment, `False` otherwise + """ + start = Point(line.coords[0]) + end = Point(line.coords[1]) + + det = (end.x - start.x) * (point.y - start.y) - (end.y - start.y) * (point.x - start.x) + if det > 0: return True + if det <= 0: return False + +def enters(feature, line): + start = Point(line.coords[0]) + end = Point(line.coords[1]) + if not start.within(feature): + return True + else: + return False + +def isLandfall(feat, tracks): + """ + Determine the count of tracks crossing a gate segment. + + :param feat: `shapely.Geometry.Polygon` + :param tracks: `GeoPandas.GeoDataFrame` of tracks, where the `geometry` field is a + `LineSegment` + + """ + LOGGER.debug(f"Determining landfalls for track collection") + + crossings = tracks.crosses(feat.geometry) + landfall = [] + for t in tracks[crossings].itertuples(): + if enters(feat.geometry, Point(t.geometry.coords[0])): + landfall.append(True) + else: + landfall.append(False) + + return tracks[crossings][landfall] + + +# This function counts the number of track segments that cross the coastal gates. + +def countCrossings(gdf, tracks, sim): + """ + Count the crossing rate of all gates for all tracks in a given simulation. + + :param gdf: `GeoDataFrame` containing the landfall gates + :param tracks: `GeoDataFrame` containing `Track` objects + :param int sim: Ordinal simulation number + + """ + gdf['sim'] = sim + for i, feat in enumerate(gdf.itertuples(index=False)): + ncrossings = 0 + l = isLandfall(feat, tracks) + ncrossings = len(l) + if ncrossings > 0: + gdf['count'].iloc[i] = ncrossings + gdf['meanlfintensity'].iloc[i] = l['CentralPressure'].mean() + gdf['minlfintensity'].iloc[i] = l['CentralPressure'].min() + cathist, bins = np.histogram(l['category'].values, bins=[0,1,2,3,4,5, 6]) + gdf['cat1'].iloc[i] = cathist[0] + gdf['cat2'].iloc[i] = cathist[1] + gdf['cat3'].iloc[i] = cathist[2] + gdf['cat4'].iloc[i] = cathist[3] + gdf['cat5'].iloc[i] = cathist[4] + else: + gdf['count'].iloc[i] = 0 + gdf['meanlfintensity'].iloc[i] = np.nan + gdf['minlfintensity'].iloc[i] = np.nan + gdf['cat1'].iloc[i] = 0 + gdf['cat2'].iloc[i] = 0 + gdf['cat3'].iloc[i] = 0 + gdf['cat4'].iloc[i] = 0 + gdf['cat5'].iloc[i] = 0 + + return gdf + +# A bunch of helper functions to return quantiles +def q10(x): return x.quantile(0.1) +def q90(x): return x.quantile(0.9) +def q25(x): return x.quantile(0.25) +def q75(x): return x.quantile(0.75) + + + +# Get the list of track files from the input data path (this'll become a +# config option in a scripted version of theis notebook). Just find all +# files in the directory that end with "nc" - assume that you're pointing +# to a directory that only has track files. + +def loadLandfallRates(datapath, gdf): + + if not os.path.isdir(datapath): + LOGGER.error(f"{datapath} is not a valid directory") + raise IOError(f"{datapath} is not a valid directory") + filelist = [] + for (dirpath, dirnames, filenames) in walk(datapath): + filelist.extend([fn for fn in filenames if fn.endswith('nc')]) + break + nfiles = len(filelist) + if nfiles==0: + LOGGER.error(f"There are no valid trackfiles in the input path {datapath}") + raise IOError(f"There are no valid trackfiles in the input path {datapath}") + LOGGER.info(f"There are {nfiles} track files in {datapath}") + + + # Now we loop through all the gates and determine the landfall rates for each simulation. + + gatedflist = [] + for sim, f in enumerate(filelist): + q, r = np.divmod(sim*10, len(filelist)) + if r==0: + LOGGER.info(f"Loaded {sim} events ({q*10}%)") + gatedf = gdf.copy() + tracks = readTracks(pjoin(datapath,f)) + gatedf = countCrossings(gatedf, tracks, sim) + gatedflist.append(gatedf) + + gatesummary = pd.concat(gatedflist) + LOGGER.info("Grouping data") + + + gs = gatesummary.groupby('LGA_CODE19').agg({'count':['sum',np.nanmean, np.nanstd, 'min', 'max', q10, q90], + 'cat1': ['sum',np.nanmean, np.nanstd], + 'cat2': ['sum',np.nanmean, np.nanstd], + 'cat3': ['sum',np.nanmean, np.nanstd], + 'cat4': ['sum',np.nanmean, np.nanstd], + 'cat5': ['sum',np.nanmean, np.nanstd], + 'meanlfintensity':[np.nanmean, q10, q25, q75, q90], + 'minlfintensity':[np.nanmean,'min', np.nanstd]}, as_index=False) + + gs.columns = ['_'.join(col).strip() for col in gs.columns.values] + gs.reset_index(col_level=1) + gs.columns = gs.columns.get_level_values(0) + + + gatedata = gates[['LGA_CODE19', 'LGA_NAME19', 'geometry']].join(gs, on='LGA_CODE19') + + gatedata.rename(columns={ + 'count_sum':'sum', + 'count_nanmean': 'count_mean', + 'count_nanstd': 'count_std', + 'cat1_nanmean': 'cat1_mean', + 'cat2_nanmean': 'cat2_mean', + 'cat3_nanmean': 'cat3_mean', + 'cat4_nanmean': 'cat4_mean', + 'cat5_nanmean': 'cat5_mean', + 'meanlfintensity_nanmean': 'mlfimean', + 'meanlfintensity_q10': 'mlfiq10', + 'meanlfintensity_q25': 'mlfiq25', + 'meanlfintensity_q75': 'mlfiq75', + 'meanlfintensity_q90': 'mlfiq90', + 'minlfintensity_nanmean': 'minlfi', + 'minlfintensity_min': 'minlfim' + }, inplace=True) + + return gatedata + +def plotLandfallIntensity(df, datapath): + + LOGGER.info("Plotting landfall intensity") + width=0.4 + fig, ax = plt.subplots(3,1,figsize=(12,16),sharex=True) + cat12 = np.add(df['cat1_mean'], df['cat2_mean']).tolist() + cat123 = np.add(cat12, df['cat3_mean']).tolist() + cat1234 = np.add(cat123, df['cat4_mean']).tolist() + ax[0].bar(df['LGA_CODE19'], df['cat1_mean'], color='b', label="Cat 1") + ax[0].bar(df['LGA_CODE19'], df['cat2_mean'], bottom=df['cat1_mean'], color='g', label='Cat 2') + ax[0].bar(df['LGA_CODE19'], df['cat3_mean'], bottom=cat12, color='y', label='Cat 3') + ax[0].bar(df['LGA_CODE19'], df['cat4_mean'], bottom=cat123, color='orange', label='Cat 4') + ax[0].bar(df['LGA_CODE19'], df['cat5_mean'], bottom=cat1234, color='r', label='Cat 5') + + ax[0].legend() + ax[0].set_ylabel("Mean number of TCs") + ax[1].plot(df['LGA_CODE19'], df['minlfi'], + label='Minimum landfall intensity') + ax[1].plot(df['LGA_CODE19'], df['mlfimean'], color='r', + label='Mean landfall intensity') + ax[1].fill_between(df['LGA_CODE19'], df['mlfiq10'], + df['mlfiq90'], color='r', alpha=0.25) + + ax[1].legend(loc=2) + ax[1].set_ylim((900, 1020)) + ax[1].set_ylabel("Pressure (hPa)") + ax[2].plot(df['LGA_CODE19'], df['sum']/10000) + ax[2].fill_between(df['LGA_CODE19'], df['count_q90']/10000, + df['count_q10']/10000, alpha=0.25) + ax[2].set_xlim((0,81)) + ax[2].set_xticks(np.arange(0,80)) + ax[2].set_yticks(np.arange(0,5.1,.5)) + ax[2].set_xticklabels(df['LGA_NAME19'][::2], rotation='vertical') + ax[2].set_ylabel("Mean proportion of landfall") + plt.savefig(os.path.join(datapath, "landfall_intensity_lga.png"), bbox_inches='tight') + +def plotIntensityDistribution(df, plotpath): + """ + Plot a distribution of intenstity at landfall + + :param df: `DataFrame` containing the gate data + :param str plotpath: Path to where the plots will be saved/ + """ + LOGGER.info("Plotting landfall intensity distribution") + width=0.4 + fig, ax = plt.subplots(1,1, figsize=(12,6), sharex=True) + cat12 = np.add(df['cat1_mean'], df['cat2_mean']).tolist() + cat123 = np.add(cat12, df['cat3_mean']).tolist() + cat1234 = np.add(cat123, df['cat4_mean']).tolist() + ax.bar(df['LGA_CODE19'], df['cat1_mean'], color='b', label="Cat 1") + ax.bar(df['LGA_CODE19'], df['cat2_mean'], bottom=df['cat1_mean'], color='g', label='Cat 2') + ax.bar(df['LGA_CODE19'], df['cat3_mean'], bottom=cat12, color='y', label='Cat 3') + ax.bar(df['LGA_CODE19'], df['cat4_mean'], bottom=cat123, color='orange', label='Cat 4') + ax.bar(df['LGA_CODE19'], df['cat5_mean'], bottom=cat1234, color='r', label='Cat 5') + + ax.legend() + ax.set_ylabel("Number of TCs") + ax.set_xlim((0,81)) + ax.set_xticks(np.arange(0,81)) + ax.set_yticks(np.arange(0,2.5,.2)) + ax.set_xticklabels(gatedata['LGA_NAME19'], rotation='vertical') + ax.set_ylabel("Mean rate of landfall") + plt.savefig(os.path.join(plotpath, "mean_landfall_rate_intensity_lga.png"), bbox_inches='tight') + +def plotLandfallMap(df, plotpath): + + bins = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.25, 1.5, 1.75, 2.0] + ax = df.plot(column='count_mean', + figsize=(9,13.5), + cmap='OrRd', + legend=True, + edgecolor='0.5', + scheme='UserDefined', + classification_kwds={'bins':bins}) + ax.set_xlabel("Longitude") + ax.set_ylabel("Latitude") + ax.set_title("Mean TC occurence (TC/year") + plt.savefig(os.path.join(plotpath, "mean_landfall_rate_intensity_lga_map.png"), + bbox_inches='tight') + +if __name__ == "__main__": + import argparse + + logFormat = "%(asctime)s: %(funcName)s: %(message)s" + logging.basicConfig(level='INFO', + format=logFormat, + filename='landfallIntensity.log', + filemode='w', + datefmt="%Y-%m-%d %H:%M:%S") + console = logging.StreamHandler(sys.stdout) + console.setLevel(getattr(logging, 'INFO')) + formatter = logging.Formatter(logFormat, + datefmt='%H:%M:%S', ) + console.setFormatter(formatter) + LOGGER.addHandler(console) + LOGGER.info(f"Started {sys.argv[0]} (pid {os.getpid()})") + + parser = argparse.ArgumentParser() + parser.add_argument("-p", "--path") + args = parser.parse_args() + + basepath = args.path + + gatefile = "/g/data/w85/QFES_SWHA/hazard/input/QLD_LGA_2019.shp" + gates = loadGateFile(gatefile) + + datapath = pjoin(basepath, "tracks") + plotpath = pjoin(basepath, "plots") + processpath = pjoin(basepath, "process") + gatedata = loadLandfallRates(datapath, gates) + plotIntensityDistribution(gatedata, plotpath) + plotLandfallIntensity(gatedata, plotpath) + plotLandfallMap(gatedata, plotpath) + gatedata_nogeom = pd.DataFrame(gatedata.drop(columns='geometry')) + gatedata_nogeom.to_csv(os.path.join(processpath, "simulated_landfall_rates.csv"), index=False) + gatedata.to_file(os.path.join(processpath, "simulated_landfall_rates.shp")) diff --git a/PlotInterface/AutoPlotHazard.py b/PlotInterface/AutoPlotHazard.py index 6a1aa4c1..25af30ca 100644 --- a/PlotInterface/AutoPlotHazard.py +++ b/PlotInterface/AutoPlotHazard.py @@ -86,6 +86,7 @@ def __init__(self, configFile, progressbar=None): self.plotUnits = PlotUnits(config.get('Hazard', 'PlotSpeedUnits')) self.ciBounds = config.getboolean('Hazard', 'CalculateCI') self.fit = config.get('Hazard', 'ExtremeValueDistribution') + self.numsimulations = config.getint("TrackGenerator", "NumSimulations") self.progressbar = progressbar @@ -217,7 +218,7 @@ def plotHazardCurves(self, inputFile, plotPath): wspd = ncobj.variables['wspd'][:, j, i] recs = database.locationRecords(self.db, pID) - data = np.zeros(int(10000 * 365.25)) + data = np.zeros(int(self.numsimulations * 365.25)) if len(recs) > 0: data[-len(recs):] = recs['wspd'] diff --git a/PlotInterface/curves.py b/PlotInterface/curves.py index f250cfde..e38e0bb2 100755 --- a/PlotInterface/curves.py +++ b/PlotInterface/curves.py @@ -580,7 +580,7 @@ def saveFigure(figure, filename): from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas canvas = FigureCanvas(figure) - canvas.print_figure(filename, bbox_inches='tight') + canvas.print_figure(filename, bbox_inches='tight', dpi=300) def saveHazardCurve(years, events, wspd, wspdupper, wspdlower, xlabel, ylabel, title, filename, fit): diff --git a/README.rst b/README.rst index 94fbb395..c7586d25 100644 --- a/README.rst +++ b/README.rst @@ -44,37 +44,39 @@ Dependencies TCRM requires: - * `Python 2.7 `_; + * `Python 3.7 `_; * `numpy `_; * `scipy `_; - * `matplotlib 1.4.3 `_; - * `Basemap 1.0.8 `_; - * `netcdf4-python >=1.4.1 `_; + * `matplotlib `_; + * `Basemap `_; + * `netcdf4-python `_; + * `cftime `_; * `pandas `_; - * `Shapely `_; - * `seaborn 0.6 `_; - * `statsmodels 0.6.1 `_; + * `Shapely `_; + * `seaborn `_; + * `statsmodels `_; * `GitPython `_; * `GDAL/OGR `_; + * `mpi4py `_; * and `gcc`. -For parallel execution, `Pypar `_ is required; + Status ====== -.. image:: https://travis-ci.org/GeoscienceAustralia/tcrm.svg?branch=v2.1 +.. image:: https://travis-ci.org/GeoscienceAustralia/tcrm.svg?branch=develop :target: https://travis-ci.org/GeoscienceAustralia/tcrm :alt: Build status -.. image:: https://coveralls.io/repos/GeoscienceAustralia/tcrm/badge.svg?branch=v2.1 - :target: https://coveralls.io/r/GeoscienceAustralia/tcrm?branch=v2.1 +.. image:: https://coveralls.io/repos/GeoscienceAustralia/tcrm/badge.svg?branch=develop + :target: https://coveralls.io/r/GeoscienceAustralia/tcrm?branch=develop :alt: Test coverage .. image:: https://landscape.io/github/GeoscienceAustralia/tcrm/develop/landscape.svg?style=flat - :target: https://landscape.io/github/GeoscienceAustralia/tcrm/v2.1 + :target: https://landscape.io/github/GeoscienceAustralia/tcrm/develop :alt: Code Health .. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.3741493.svg @@ -101,8 +103,8 @@ for usage, and a DISCLAIMER OF ALL WARRANTIES. Contacts ======== -Craig Arthur +Community Safety Branch Geoscience Australia -craig.arthur@ga.gov.au +hazards@ga.gov.au diff --git a/StatInterface/KDEParameters.py b/StatInterface/KDEParameters.py index 9f0cb2b3..ef01d607 100644 --- a/StatInterface/KDEParameters.py +++ b/StatInterface/KDEParameters.py @@ -60,11 +60,13 @@ def __init__(self, kdeType): LOG.info("Initialising KDEParameters") kernels = kernel_switch.keys() if kdeType in kernels: - LOG.debug("Using {0} to generate distribution".format(kdeType)) + LOG.debug(f"Using {kdeType} to generate distribution") self.kdeType = kdeType else: - LOG.error("Invalid kernel type: {0}".format(kdeType)) - raise NotImplementedError("Invalid kernel type: {0}".format(kdeType)) + msg = (f"Invalid kernel type: {kdeType} \n" + f"Valid kernels are {repr(kernels)}") + LOG.error(msg) + raise NotImplementedError(msg) def generateKDE(self, parameters, kdeStep, kdeParameters=None, cdfParameters=None, angular=False, periodic=False, diff --git a/StatInterface/generateStats.py b/StatInterface/generateStats.py index e336d9e7..fa7de694 100644 --- a/StatInterface/generateStats.py +++ b/StatInterface/generateStats.py @@ -32,10 +32,11 @@ def acf(p, nlags=1): :type p: 1-d :class:`numpy.ndarray` """ - ar = np.correlate(p, p, 'full') - n = len(p) - # Grab only the lag-one autocorrelation coeff. - ar = ar[n-1:(n+nlags)]/ar.max() + ar = np.array([1]+[np.corrcoef(p[:-i], p[i:])[0,1] for i in range(1, nlags + 1)]) + #ar = np.correlate(p, p, 'full') + #n = len(p) + ## Grab only the lag-one autocorrelation coeff. + #ar = ar[n-1:(n+nlags)]/ar.max() return ar diff --git a/Utilities/grid.py b/Utilities/grid.py index 889a58df..afe3e4d8 100644 --- a/Utilities/grid.py +++ b/Utilities/grid.py @@ -256,7 +256,14 @@ def sampleGrid(self, lon, lat): :returns: Value of the nearest grid point to the given lon/lat point. """ - indi = self.lon.searchsorted(lon) + indi = self.lon.searchsorted(numpy.mod(lon, 360.)) indj = self.lat.searchsorted(lat) - return self.grid[indj, indi] + try: + value = self.grid[indj, indi] + except IndexError: + log.exception(f"Index is out of bounds for point ({lon}, {lat})") + log.exception(f"Bounds of grid are ({self.lon.min()} - {self.lon.max()}," + f"{self.lat.min()} - {self.lat.max()}") + raise + return value diff --git a/Utilities/parallel.py b/Utilities/parallel.py index 69094e59..72a82648 100755 --- a/Utilities/parallel.py +++ b/Utilities/parallel.py @@ -31,6 +31,9 @@ def __init__(self): self.tag = -1 self.error = 0 + def __call__(self): + pass + class DummyCommWorld(object): """ A dummy COMM_WORLD class that provides the bare diff --git a/convergenceTest.py b/convergenceTest.py index 91170a71..30d7d87d 100644 --- a/convergenceTest.py +++ b/convergenceTest.py @@ -30,7 +30,7 @@ import sys import matplotlib -matplotlib.use('Agg', warn=False) # Use matplotlib backend +#matplotlib.use('Agg', warn=False) # Use matplotlib backend import database import numpy as np @@ -46,7 +46,8 @@ import random import seaborn as sns -sns.set_context("notebook") +sns.set_context("paper") +figsize=(6.5, 4.5) sns.set_style("whitegrid") @@ -81,7 +82,6 @@ def addARIGrid(axes): Add a logarithmic graticule to the subplot axes. :param axes: :class:`axes` instance. """ - axes.xaxis.set_major_locator(LogLocator()) axes.xaxis.set_major_formatter(FormatStrFormatter('%d')) axes.xaxis.set_minor_locator(LogLocator(subs=[.1, .2, .3, .4, .5, .6, .7, .8, .9])) @@ -100,39 +100,42 @@ def addAEPGrid(axes): axes.grid(True, which='major', linestyle='-') axes.grid(True, which='minor', linestyle='--', linewidth=0.5) - +def calculateARI(data, years): + emprp = empReturnPeriod(np.sort(data)) + return np.sort(data)[-years:], emprp[-years:] + +def bootstrap(data, n=1000, q=[5, 95], years=10000): + d = np.empty((years, n)) + r = np.empty((years, n)) + for i in range(n): + subset = np.random.choice(data, int(len(data/2))) + d[:, i], r[:, i] = calculateARI(subset, years=10000) + return np.percentile(d, q, axis=1), np.percentile(r, q, axis=1) + def plotConvergenceTest(locName): locId = locations['locId'][locations['locName']==locName][0] locLon = locations['locLon'][locations['locId']==locId][0] locLat = locations['locLat'][locations['locId']==locId][0] - records = database.locationRecords(db, locId) + records = database.locationRecords(db, str(locId)) recs = records['wspd'][records['wspd'] > 0] data = np.zeros(int(NumSimulations*365.25)) data[-len(recs):] = recs sortedmax = np.sort(data) emprp = empReturnPeriod(data) - random.shuffle(data) - d1 = data[:int(len(data)/2)] - d2 = data[int(len(data)/2):] - sortedmax1 = np.sort(d1) - sortedmax2 = np.sort(d2) - emprp1 = empReturnPeriod(d1) - emprp2 = empReturnPeriod(d2) + dd, rr = bootstrap(data, n=100) + ep = 1./emprp - ep1 = 1./emprp1 - ep2 = 1./emprp2 - mn = (sortedmax1[emprp1 > 1] + sortedmax2[emprp2 > 1])/2. - delta = np.abs(sortedmax1[emprp1 > 1] - sortedmax2[emprp2 > 1]) + ep1 = 1./rr[0,:] + ep2 = 1./rr[1,:] + mn = dd.mean(axis=0) + delta = np.abs(np.diff(dd, axis=0)) fdelta = delta/mn - fig, ax1 = plt.subplots(1, 1) - ax1.semilogx(emprp[emprp > 1], sortedmax[emprp > 1], color='k', - label="Mean ARI") - ax1.semilogx(emprp2[emprp2> 1], sortedmax2[emprp2 > 1], color="#006983", - label="Convergence check 1") - ax1.semilogx(emprp1[emprp1> 1], sortedmax1[emprp1 > 1], color="#A33F1F", - label="Convergence check 2") + fig, ax1 = plt.subplots(1, 1, figsize=figsize) + + ax1.fill_between(rr[0,:], dd[1,:], dd[0,:], alpha=0.5, label="95th percentile") + ax1.plot(emprp[-10000:], data[-10000:], color='k', label="Mean ARI") ax1.set_xscale('log') xlabel = 'Average recurrence interval (years)' @@ -149,14 +152,9 @@ def plotConvergenceTest(locName): bbox_inches='tight') plt.close() - fig2, ax2 = plt.subplots(1, 1) - ax2.semilogy(sortedmax[emprp > 1], ep[emprp > 1], color="k", - label="Mean AEP") - - ax2.semilogy(sortedmax1[emprp1 > 1], ep1[emprp1 > 1], color="#006983", - label="Convergence check 1") - ax2.semilogy(sortedmax2[emprp2 > 1], ep2[emprp2 > 1], color="#A33F1F", - label="Convergence check 2") + fig2, ax2 = plt.subplots(1, 1, figsize=figsize) + ax2.semilogy(sortedmax[-10000:], ep[-10000:], color="k", label="Mean AEP") + ax2.fill_betweenx(1./rr[0,:], dd[0,:], dd[1,:], alpha=0.5, label="95th percentile") ax2.set_xlabel(ylabel) title = "AEP wind speeds at " + locName + \ " \n(%5.2f,%5.2f, n=%d)"%(locLon, locLat, len(recs)) @@ -169,7 +167,7 @@ def plotConvergenceTest(locName): plt.savefig(os.path.join(plotPath, "{0:05d}_AEP.png".format(locId)), bbox_inches='tight') plt.close() - +""" fig3, (ax3, ax4) = plt.subplots(2, 1, sharex=True) ax3.fill_between(emprp[emprp > 1][0:-1:2], fdelta, color="#006983", alpha=0.5) @@ -190,6 +188,7 @@ def plotConvergenceTest(locName): plt.savefig(os.path.join(plotPath, "{0:05d}_ARI_delta.png".format(locId)), bbox_inches='tight') plt.close() +""" # Run the next cell, then select a location from the dropdown list and # click the `"Run plotConvergenceTest"` button. This will take a # minute or so to run, as it needs to extract all the values from the @@ -205,48 +204,34 @@ def plotConvergenceTest(locName): 'Cairns Airport', 'Townsville Amo', 'Rockhampton Airport', 'Willis Island'] -for locName in locList: - #print(locName) - plotConvergenceTest(locName) +#for locName in locList: +# print(locName) +# plotConvergenceTest(locName) def plotConvergence(ax, locName): locId = locations['locId'][locations['locName']==locName][0] locLon = locations['locLon'][locations['locId']==locId][0] locLat = locations['locLat'][locations['locId']==locId][0] - records = database.locationRecords(db, locId) + records = database.locationRecords(db, str(locId)) recs = records['wspd'][records['wspd'] > 0] data = np.zeros(int(NumSimulations*365.25)) data[-len(recs):] = recs sortedmax = np.sort(data) emprp = empReturnPeriod(data) - random.shuffle(data) - d1 = data[:int(len(data)/2)] - d2 = data[int(len(data)/2+1):] - sortedmax1 = np.sort(d1) - sortedmax2 = np.sort(d2) - emprp1 = empReturnPeriod(d1) - emprp2 = empReturnPeriod(d2) - ep = 1./emprp - ep1 = 1./emprp1 - ep2 = 1./emprp2 - ax.semilogx(emprp[emprp > 1], sortedmax[emprp > 1], color='k', - label="Mean ARI") - ax.semilogx(emprp2[emprp2> 1], sortedmax2[emprp2 > 1], color="#006983", - label="Convergence check 1", ls='--') - ax.semilogx(emprp1[emprp1> 1], sortedmax1[emprp1 > 1], color="#A33F1F", - label="Convergence check 2", ls='--') + dd, rr = bootstrap(data, n=100) + ax.plot(emprp[-10000:], data[-10000:], color='k', label="Mean ARI") + ax.fill_between(rr[0,:], dd[1,:], dd[0,:], alpha=0.5, label="95th percentile") ax.set_xscale('log') + #xlabel = 'Average recurrence interval (years)' + #ylabel = 'Wind speed (m/s)' + title = "{0} (n={1:d})".format(locName, len(recs)) - xlabel = 'Average recurrence interval (years)' - ylabel = 'Wind speed (m/s)' - title = "{0} ({1:5.2f}E, {2:5.2f}S, n={3:d})".format(locName, locLon, locLat, len(recs)) - #ax.set_xlabel(xlabel) - #ax.set_ylabel(ylabel) ax.set_title(title) addARIGrid(ax) -fig, axes = plt.subplots(4, 2, figsize=(12,16), sharex=True, sharey=True) + +fig, axes = plt.subplots(4, 2, figsize=(6,8), sharex=True, sharey=True) axlist = axes.flatten() for i, loc in enumerate(locList): plotConvergence(axlist[i], loc) @@ -260,7 +245,6 @@ def plotConvergence(ax, locName): axlist[6].set_xlabel('Average recurrence interval (years)') axlist[7].set_xlabel('Average recurrence interval (years)') - fig.tight_layout() plt.savefig(os.path.join(plotPath, "ARI_convergence.png"), bbox_inches='tight') diff --git a/database/__init__.py b/database/__init__.py index 0dcc4c45..ca703441 100644 --- a/database/__init__.py +++ b/database/__init__.py @@ -55,6 +55,9 @@ from Utilities.parallel import attemptParallel, disableOnWorkers from Utilities.process import pAlreadyProcessed, pGetProcessedFiles from functools import reduce + +sqlite3.register_adapter(np.int64, lambda val: int(val)) +sqlite3.register_adapter(np.int32, lambda val: int(val)) log = logging.getLogger(__name__) log.addHandler(logging.NullHandler()) diff --git a/setup.py b/setup.py index 06b45669..64bd0a6b 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name = "TCRM", - version = '3.1.2', + version = '3.1.3', packages=find_packages(), scripts=['tcrm.py', 'tcevent.py'], include_package_data=True, @@ -50,7 +50,7 @@ # metadata: author = "Craig Arthur", - author_email = "craig.arthur@ga.gov.au", + author_email = "hazards@ga.gov.au", description = "Tropical Cyclone Risk Model", keywords = "Tropical cyclone risk hazard", url = "https://geoscienceaustralia.github.io/tcrm", diff --git a/tests/test_generateStats.py b/tests/test_generateStats.py index e7ff15f9..4d35d7f0 100644 --- a/tests/test_generateStats.py +++ b/tests/test_generateStats.py @@ -75,13 +75,13 @@ def test_generateStats(self): self.numpyAssertAlmostEqual(coeffs_sig_sample, coeffs_sig_test) coeffs_alpha_sample = wP.coeffs.alpha[0:10] - coeffs_alpha_test = numpy.array([0.8338471, 0.83574986, 0.87892804, 0.85010785, 0.85010785, - 0.85786992, 0.8830241, 0.86208779, 0.87815064, 0.88345714]) + coeffs_alpha_test = numpy.array([0.51818004, 0.55450803, 0.66634809, 0.61266186, 0.61266186, + 0.63192755, 0.70984709, 0.64836016, 0.69168147, 0.70101634]) self.numpyAssertAlmostEqual(coeffs_alpha_sample, coeffs_alpha_test) coeffs_phi_sample = wP.coeffs.phi[0:10] - coeffs_phi_test = numpy.array([0.55199548, 0.54911034, 0.4769544, 0.52660862, 0.52660862, - 0.51386691, 0.46932765, 0.50675895, 0.47838421, 0.46851199]) + coeffs_phi_test = numpy.array([0.85527156, 0.83217838, 0.74564081, 0.79034514, 0.79034514, + 0.77502747, 0.70435581, 0.76133376, 0.7222027 , 0.71314521]) self.numpyAssertAlmostEqual(coeffs_phi_sample, coeffs_phi_test) coeffs_min_sample = wP.coeffs.min[0:20] diff --git a/wind/__init__.py b/wind/__init__.py index 11b80a6b..f0c91972 100644 --- a/wind/__init__.py +++ b/wind/__init__.py @@ -390,8 +390,8 @@ def setGridLimit(self, track): """ - track_limits = {'xMin': 9999, 'xMax': - - 9999, 'yMin': 9999, 'yMax': -9999} + track_limits = {'xMin': 9999, 'xMax': -9999, + 'yMin': 9999, 'yMax': -9999} track_limits['xMin'] = min(track_limits['xMin'], track.Longitude.min()) track_limits['xMax'] = max(track_limits['xMax'], track.Longitude.max()) track_limits['yMin'] = min(track_limits['yMin'], track.Latitude.min()) @@ -698,8 +698,7 @@ def dumpGustsFromTrackfiles(self, trackfiles, windfieldPath, """ - tracks = loadTracksFromFiles(sorted(trackfiles)) - + tracks = loadTracksFromFiles(sorted(trackfiles), self.gridLimit, self.margin) self.dumpGustsFromTracks(tracks, windfieldPath, timeStepCallback=timeStepCallback) @@ -712,7 +711,7 @@ def calcLocalWindfield(self, results): # Load a multiplier file to determine the projection: # m4_max_file = pjoin(self.multipliers, 'm4_max.img') - log.info("Using M4 data from {0}".format(self.multipliers)) + log.info(f"Using M4 data from {self.multipliers}") for track, result in results: log.debug("Doing Multiplier for track {0:03d}-{1:05d}" @@ -728,8 +727,28 @@ def calcLocalWindfield(self, results): pM.processMult(gust, Vx, Vy, lon, lat, self.windfieldPath, self.multipliers) +def inRegion(t, gridLimit, margin): + """ + :param t: :class:`Track` object + + :returns: True if the track enters the simulation grid, False otherwise -def loadTracksFromFiles(trackfiles): + """ + xMin = gridLimit['xMin'] - margin + xMax = gridLimit['xMax'] + margin + yMin = gridLimit['yMin'] - margin + yMax = gridLimit['yMax'] + margin + return ((xMin <= t.Longitude.max()) and + (t.Longitude.min() <= xMax) and + (yMin <= t.Latitude.max()) and + (t.Latitude.min() <= yMax)) + +def filterTracks(tracks, gridLimit, margin): + if not (gridLimit is None): + validTracks = [t for t in tracks if inRegion(t, gridLimit, margin)] + return validTracks + +def loadTracksFromFiles(trackfiles, gridLimit, margin): """ Generator that yields :class:`Track` objects from a list of track filenames. @@ -746,9 +765,9 @@ def loadTracksFromFiles(trackfiles): path to the file. """ for f in balanced(trackfiles): - msg = 'Calculating wind fields for tracks in %s' % f + msg = f'Calculating wind fields for tracks in {f}' log.info(msg) - tracks = loadTracks(f) + tracks = filterTracks(loadTracks(f), gridLimit, margin) for track in tracks: yield track @@ -784,7 +803,7 @@ def loadTracksFromPath(path): """ files = os.listdir(path) trackfiles = [pjoin(path, f) for f in files if f.startswith('tracks')] - msg = 'Processing {0} track files in {1}'.format(len(trackfiles), path) + msg = f'Processing {len(trackfiles)} track files in {path}' log.info(msg) return loadTracksFromFiles(sorted(trackfiles)) @@ -873,7 +892,7 @@ def run(configFile, callback=None): multipliers=multipliers, windfieldPath=windfieldPath) - log.info('Dumping gusts to {0}'.format(windfieldPath)) + log.info(f'Dumping gusts to {windfieldPath}') # Get the trackfile names and count