# Select events from catalogue and save to individual file

__Ths is a Python3 notebook__

__Use the `py3` branch of [TCRM](https://github.com/geoscienceAustralia/tcrm/tree/py3) for the supporting modules__

The TCRM stochastic event catalgue contains records on the wind speed, track and distance of closest point of approach (CPA) for a set of over 400 locations, for all events in the event set. These records are stored in a relational database, that we can query using SQL statements and then map out. 

Using this database, we can identify events that pass within a given distance of a location, and which generate wind speeds at that location that meet some threshold (e.g. a category 3 wind speed). Usually, this will result in more than one event (depending on the distance and intensity criteria). 

This notebook provides a way to interact with the event database, select individual events from the catalogue and then save out the track file for subsequent, more detailed modelling. 

We start with the required modules for interrogating the database, plotting the data on maps, and saving the selected events to track files.

In [None]:
%matplotlib notebook
import database
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import pandas as pd
from math import cos, asin, sqrt
import os
import pickle
from copy import copy

from matplotlib.collections import LineCollection
from matplotlib.colors import BoundaryNorm, ListedColormap
from matplotlib.lines import Line2D

import cartopy.crs as ccrs
import cartopy.feature as feature
import shapely.geometry as sg

from netCDF4 import Dataset
from cftime import num2date

from Utilities.config import ConfigParser
from Utilities.track import ncReadTrackData, ncSaveTracks

import Utilities.shapefile as shapefile

# Import widgets for interactive notebook
from ipywidgets import interact, fixed, FloatSlider, Dropdown, interact_manual, Button, Layout
import ipywidgets as widgets

from IPython.display import display

import seaborn as sns
sns.set_context("poster")
sns.set_style("whitegrid")


In [None]:
def makeSegments(xx, yy):
    points = np.array([xx, yy]).T.reshape(-1, 1, 2)
    segments = np.concatenate([points[:-1], points[1:]], axis=1)

    return segments

def colorline(ax, xdata, ydata, zdata=None, alpha=0.9):
    """
    Given a collection of x,y points and optionally magnitude
    values for each point, plot the data as a collection of
    coloured line segments. Line segments are added to the given 
    :class:`matplotlib.axes` instance.
    
    .. note:: Currently, intervals are hard-coded
              for plotting the central pressure of TCs. 
              [800, 920, 935, 950, 970, 985, 1050]
    
    :params ax: :class:`matplotlib.axes` instance on which to plot the line segments
    :param xdata: array of x-coordinates of points to plot
    :param ydata: array of y-coordinates of points to plot
    :param zdata: (optional) array of magnitude values of the points to inform colouring
    :param alpha: transparency of the lines (default=0.9)
    
    """
    colours=['0.75', '#0FABF6', '#0000FF', 
             '#00FF00', '#FF8100', '#ff0000']
    intervals = [0, 17.5, 24.5, 32.5, 44.2, 55.5, 1000]
    intervals = [800, 920, 935, 950, 970, 985, 1050]
    segments = makeSegments(xdata, ydata)
    cmap = ListedColormap(colours[::-1])
    norm = BoundaryNorm(intervals, cmap.N)
    lc = LineCollection(segments, array=zdata, cmap=cmap,
                        norm=norm, alpha=alpha)

    labels = ['No data', 'Category 1', 'Category 2',
              'Category 3', 'Category 4', 'Category 5']
    handles = []
    for c, l in zip(cmap.colors, labels[::-1]):
        handles.append(Line2D([0], [0], color=c, label=l))

    ax.add_collection(lc)
    ax.legend(handles, labels[::-1], loc=2, frameon=True, prop={'size': 10})

def distance(lat1, lon1, lat2, lon2):
    """
    Calculate distance between two points on the Earth's surface
    
    """
    p = 0.017453292519943295
    a = 0.5 - np.cos((lat2-lat1)*p)/2 + np.cos(lat1*p)*np.cos(lat2*p) * (1-np.cos((lon2-lon1)*p)) / 2
    return 12742 * np.arcsin(np.sqrt(a))

def loadTrack(trackId):
    """
    Given a track id, load the data from the corresponding track file.
    
    :param str trackId: A track id code that looks like "xxx-xxxxx"
    
    :returns: A :class:`Track` object containing the track data
    """

    trackNum, trackYear = int(trackId.split('-')[0]), int(trackId.split('-')[1])
    trackFile = os.path.join(outputPath, 'tracks', 'tracks.{0:05d}.nc'.format(trackYear))
    tracks = ncReadTrackData(trackFile)
    
    return [t for t in tracks if t.trackId==(trackNum, trackYear)][0]

def getTracks(recs):
    """
    Given a list of events, load the track data from a file and append to a list of track objects
    
    :param recs: A list of records, which include an 'eventId' field, which is the track id code
    
    :returns: A list of :class:`Track` objects.
    """
    tracks = []
    for rec in recs:
        trackId = rec['eventId']
        track = loadTrack(trackId)
        tracks.append(track)
    return tracks

def plot_tracks(gridLimit, tracks):
    '''
    Plot all of the tracks contained within the 'tracks' object
    
    .. note:: This _tries_ to intelligently determine if the mapped area 
              crosses the dateline (180E), which causes some problems
              in the `cartopy` routines. 
    
    :param dict gridLimit: Extent of the map figre to plot. 
    :param list tracks: a list of :class:`Track` objects to plot. 
    '''
    fig = plt.figure(figsize=(8,6))
    ax = plt.axes(projection=ccrs.PlateCarree())
    ax.coastlines(resolution='10m', color='black', linewidth=1)
    ax.add_feature(feature.BORDERS)
    gl = ax.gridlines(linestyle=":", draw_labels=True)
    gl.xlabels_top = False
    gl.ylabels_right = False
    
    if gridLimit['xMax'] > 180:
        ax.set_xlim((gridLimit['xMin']-360, gridLimit['xMax']-360))
    else:
        ax.set_xlim((gridLimit['xMin'], gridLimit['xMax']))
    ax.set_ylim((gridLimit['yMin'], gridLimit['yMax']))
    for track in tracks:
        if gridLimit['xMax'] > 180:
            colorline(ax, track.Longitude-360., track.Latitude, track.CentralPressure)
        else:
            colorline(ax, track.Longitude, track.Latitude, track.CentralPressure)
        #ax.hold(True)
    return fig, ax


def filter_events(events, category, distance, locpt, locName):
    """
    Filter a set of evets based on intensity and distance from the location. 
    """
    extreme_events = [e for e in events if e['wspd'] > category]
    print(("There are {0} events that generate wind "
           "speeds greater than {1} m/s at {2}").format(len(extreme_events),
                                                        category, locName))
    tracks = getTracks(extreme_events)
    tracks = [t for t in tracks if t.minimumDistance([locpt]) < distance]
    extremedf = pd.DataFrame.from_records(extreme_events, columns=events.dtype.names)
    print("There are {0} events that pass within {1} km of {2}".format(len(tracks), distance, locName))
    return extremedf, tracks

TCRMFIELD_NAMES = ('CycloneNumber', 'TimeElapsed', 
                   'Longitude', 'Latitude', 'Speed', 'Bearing',
                   'pCentre', 'pEnv', 'rMax',
                   'Year', 'Month', 'Day', 'Hour', 'Minute', )
TCRMFIELD_TYPES = ("N",)*14
TCRMFIELD_WIDTH = (2, 6, 7, 7, 6, 6, 7, 7, 6, 4, 2, 2, 2, 2)
TCRMFIELD_PRECS = (0, 2, 2, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0)

TCRMFIELDS = [[n, t, w, p] for n, t, w, p in zip(TCRMFIELD_NAMES,
                                                 TCRMFIELD_TYPES,
                                                 TCRMFIELD_WIDTH,
                                                 TCRMFIELD_PRECS)]

def recdropfields(rec, names):
    names = set(names)
    newdtype = np.dtype([(name, rec.dtype[name]) for name in rec.dtype.names
                       if name not in names])

    newrec = np.recarray(rec.shape, dtype=newdtype)
    for field in newdtype.names:
        newrec[field] = rec[field]

    return newrec

def tracks2line(tracks, outputFile, dissolve=False):
    """
    Writes tracks to a shapefile as a collection of line features

    If dissolve==True, then each track feature is written as a
    single polyline feature, otherwise each track segment is
    stored as a separate feature.

    :type  tracks: list of :class:`Track` objects
    :param tracks: :class:`Track` features to store in a shape file

    :type  outputFile: str
    :param outputFile: Path to output file destination

    :type  dissolve: boolean
    :param dissolve: Store track features or track segments.

    :raises: :mod:`shapefile.ShapefileException` if there is an error
             when attempting to save the file.
    """
    sf = shapefile.Writer(shapefile.POLYLINE)
    sf.fields = TCRMFIELDS

    for track in tracks:
        track.data = recdropfields(track.data, ['Datetime'])
        if dissolve:
            if len(track.data) > 1:
                dlon = np.diff(track.Longitude)
                if dlon.min() < -180:
                    # Track crosses 0E longitude - split track
                    # into multiple parts:
                    idx = np.argmin(dlon)
                    parts = []
                    lines = zip(track.Longitude[:idx],
                                 track.Latitude[:idx])

                    parts.append(lines)
                    lines = zip(track.Longitude[idx+1:],
                                 track.Latitude[idx+1:])

                    parts.append(lines)
                    sf.line(parts)
                else:
                    lines = zip(track.Longitude, track.Latitude)
                    sf.line([lines])
            else:
                lines = zip(track.Longitude, track.Latitude)
                sf.line([lines])


            minPressure = track.trackMinPressure
            maxWind = track.trackMaxWind

            age = track.TimeElapsed.max()

            startYear = track.Year[0]
            startMonth = track.Month[0]
            startDay = track.Day[0]
            startHour = track.Hour[0]
            startMin = track.Minute[0]
            record = [track.CycloneNumber[0], startYear, startMonth, startDay,
                      startHour, startMin, age, minPressure, maxWind]
            sf.record(*record)

        else:
            if len(track.data) == 1:
                line = [[[track.Longitude, track.Latitude],
                        [track.Longitude, track.Latitude]]]
                sf.line(line)
                sf.record(*track.data[0])
            else:
                for n in range(len(track.data) - 1):
                    dlon = track.Longitude[n + 1] - track.Longitude[n]
                    if dlon < -180.:
                        # case where the track crosses 0E:
                        segment = [[[track.Longitude[n], track.Latitude[n]],
                                    [track.Longitude[n], track.Latitude[n]]]]
                    else:
                        segment = [[[track.Longitude[n],
                                     track.Latitude[n]],
                                    [track.Longitude[n + 1],
                                     track.Latitude[n + 1]]]]
                    sf.line(segment)
                    sf.record(*track.data[n])

                # Last point in the track:
                sf.line([[[track.Longitude[n + 1],
                           track.Latitude[n + 1]],
                              [track.Longitude[n + 1],
                               track.Latitude[n + 1]]]])
                sf.record(*track.data[n+1])

    try:
        sf.save(outputFile)
    except shapefile.ShapefileException:
        print("Cannot save shape file: {0}".format(outputFile))
        raise
        
        
def add_field(a, descr):
    if a.dtype.fields is None:
        raise ValueError( "`A' must be a structured numpy array")
    b = np.empty(a.shape, dtype=a.dtype.descr + descr)
    for name in a.dtype.names:
        b[name] = a[name]
    return b

def selectTrack(tracks, Lon, Lat):
    Diffs = []
    for track in tracks:
        if gridLimit['xMax'] > 180:
            min_val = min(abs(Lon - (track.Longitude - 360.)) + abs(Lat - track.Latitude))
        else:
            min_val = min(abs(Lon - track.Longitude) + abs(Lat - track.Latitude))
        Diffs.append(min_val)
    ix = Diffs.index(min(Diffs))
    first, second = tracks[ix].trackId
    first = str(first).zfill(3)
    second = str(second).zfill(5)
    eventid = '{0}-{1}'.format(first, second)

    chosen = extremedf[extremedf['eventId'] == eventid]
    chosen
    return(chosen)

### Configuring the notebook

The configuration file is needed to point to the location of the database file, and the corresponding collection of track files (in their default netCDF format). 

The configuration file will have a section `[Output]` and an option `Path`. This value needs to point to the location that you've stored both the database file, and the tracks as a subfolder. 

Edit the `working_dir` variable to point to the location where you have stored the configuration file used. 

In [None]:
working_dir = 'C:/WorkSpace/data/tcha'
configFile = os.path.join(working_dir, 'tcrm2.1.ini')

Now we open a connection to the database, and retrieve a list of locations, as well as get the output path from the configuration file. 

You may get a warning "Couldn't open dat file". This message can be safely ignored.

In [None]:
config = ConfigParser()
config.read(configFile)

db = database.HazardDatabase(configFile)
locations = db.getLocations()
locNameList = list(locations['locName'])
outputPath = config.get('Output', 'Path')
gridLimit = config.geteval('Region', 'gridLimit')

In [None]:
# Set up the thresholds for the different TC categories
TC_thresholds = {'Category 1': 30,
                 'Category 2': 40,
                 'Category 3': 50,
                 'Category 4': 70,
                 'Category 5': 88}

In [None]:
style = {'description_width': 'initial'}
locDropDown = Dropdown(options=locNameList, description="Location")
distSlider = FloatSlider(value=50, min=5, max=200, step=5, description="Distance (km)", style=style)
catDropDown = Dropdown(options=sorted(TC_thresholds.keys()), description="Intensity")

def selTrackButton(_):
    global tracks, extremedf, locpt, locName, distance,category
    locName = locDropDown.value
    distance = distSlider.value
    category = catDropDown.value
    
    locId = locations['locId'][locations['locName']==locName][0]
    locLon = locations['locLon'][locations['locId']==locId][0]
    locLat = locations['locLat'][locations['locId']==locId][0]

    events = database.locationRecords(db, str(locId))
    locpt = sg.Point(locLon, locLat)

    extremedf, tracks = filter_events(events, TC_thresholds[category], distance, locpt, locName)
    
selectorButton = Button(description="Select tracks from database",
                        tooltip=("Select a location, distance and minimum category, "
                                 "then run the query"),
                        layout=Layout(width='40%'))

selectorOut = widgets.Output()
selectorButton.on_click(selTrackButton)
# display  button and its output together
widgets.VBox([locDropDown, distSlider, catDropDown, selectorButton, selectorOut])

### Selecting one track from the collection of all events that pass close to the location 

On running this next cell, a map will be displayed with the tracks of all events that meet the criteria set above. The map is interactive, so you can zoom in and move the map around easily. 

In [None]:
fig, ax = plot_tracks(gridLimit, tracks)  
ax.scatter(locpt.x, locpt.y, color='k', s=20)
ax.set_title("{0} events passing within {1} km of {2}".format(category.capitalize(),
                                                           distance, locName), fontsize='small')
w = widgets.HTML("Zoom in and click on the track you would like to identify")
x = widgets.Output()
def onclick(event):
    global Lon, Lat
    Lon, Lat = event.xdata, event.ydata
    w.value = 'Lon: {}, Lat: {}'.format(Lon, Lat) 

cid = fig.canvas.mpl_connect('button_press_event', onclick)
display(w)

Using the interactive map, zoom in and click on a track. The coordinates of where you clicked will appear beneath the figure, and these will be used to select the track from all those in the figure, by running the next cell. This will display the location details, and the event id number.

In [None]:
mytrack = selectTrack(tracks, Lon, Lat)
mytrack

### Plotting the regional wind field

Now we have the track identifier, we can load up the wind field from the remote server, using the services made available on the NCI. All we need to provide is the file path, as a URL, to the netCDF4-python `Dataset` constructor, and we can access the data directly. 

[Note: There can be problems accessing these services from behind firewalls. This notebook has been tested in situations where the machine is behind a firewall, but not all situations have been fully tested. Please let the authors know of any issues, so we can document them.]

In [None]:
remote_dir = "http://dapds00.nci.org.au/thredds/dodsC/fj6/TCRM/TCHA18/"
gust_filename = '{0}/windfield/gust.{1}.nc'.format(remote_dir, str(mytrack['eventId'].values[0]))
try:
    nc = Dataset(gust_filename)
except IOError as err:
    print(err)
    print("{0} does not exist".format(gust_filename))
lat = nc.variables['lat'][:]
lon = nc.variables['lon'][:]
vmax = nc.variables['vmax'][:][:]

eventTrack = getTracks(mytrack.to_records())[0]

palette = [(1.000, 1.000, 1.000), (0.000, 0.627, 0.235), (0.412, 0.627, 0.235), 
           (0.663, 0.780, 0.282), (0.957, 0.812, 0.000), (0.925, 0.643, 0.016), 
           (0.835, 0.314, 0.118), (0.780, 0.086, 0.118)]
cmap = sns.blend_palette(palette, as_cmap=True)

fig = plt.figure(figsize=(10,6))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.coastlines(resolution='10m', color='grey', linewidth=1)
ax.add_feature(feature.BORDERS)
plt.scatter(locpt.x, locpt.y, color='k', s=100, zorder=100)
colorline(ax, eventTrack.Longitude, eventTrack.Latitude, eventTrack.CentralPressure)
gl = ax.gridlines(linestyle=":", draw_labels=True)
gl.xlabels_top = False
gl.ylabels_right = False
levels = np.arange(10,80,5)
ax.set_xlim((gridLimit['xMin'], gridLimit['xMax']))
ax.set_ylim((gridLimit['yMin'], gridLimit['yMax']))
plt.contourf(lon, lat, vmax, levels, cmap=cmap, transform=ccrs.PlateCarree(), extend='max')
plt.title('Modelled max gust windfield for {0} event {1}, near {2}'.\
          format(category, str(mytrack['eventId'].values[0]), 
                 str(mytrack['locName'].values[0])), size=12)
ax.set_xlabel('Longitude', fontsize = 10)
xlabel = ax.get_xticklabels()
ax.set_xticklabels(xlabel,fontsize='x-small')
ax.set_ylabel('Latitude', fontsize = 10)
ax.tick_params(labelsize=16)
cl = plt.colorbar(ax=ax, label="Windspeed (m/s)")
cl.ax.tick_params(labelsize=14)

plt.show()

### Saving track data

Now we want to save the individual event so we can do more detailed modelling. First, we save the track to a TCRM track file. This is a netCDF format file, which we can read back into the `tcevent.py` script to run at high resolution. The track file is normally saved to the local working directory (i.e. where you saved this notebook). 

The next step is to save the track to a shape file so we can display in GIS applications (QGIS, ArcMAP, etc). This is a customised version of the code I've got in the TCRM code base.

__Only click the button once!__ In the process of saving the file, the underlying data is altered, and some of the fields that are expected in the function to save to a netCDF file are removed when we save to a shape file.

In [None]:
button = Button(description="Save track file",
                tooltip=("Save track data to a netCDF file for use in TCRM, "
                         "and a shape file for use in GIS applications"),
                layout=Layout(width='40%'))
out = widgets.Output()

def button_click(_):
    ncTrack = eventTrack
    scenarioTrackFile = os.path.join(os.getcwd(), 
                                     'track.{0}.nc'.format(str(mytrack['eventId'].values[0])))
    print("Saving track data to {0}".format(scenarioTrackFile))
    ncSaveTracks(scenarioTrackFile, [ncTrack], calendar='julian')
    # Add date/time fields if not already present in the track data
    if 'Year' not in eventTrack.data.dtype.names:
        eventTrack.data = add_field(eventTrack.data, [('Year', int), ('Month', int),
                                              ('Day', int), ('Hour', int),
                                              ('Minute', int)])
    for rec in eventTrack.data:
        rec['Year'] = num2date(rec['Datetime'], 'hours since 1900-01-01 00:00', 'standard').year
        rec['Month'] = num2date(rec['Datetime'], 'hours since 1900-01-01 00:00', 'standard').month
        rec['Day'] = num2date(rec['Datetime'], 'hours since 1900-01-01 00:00', 'standard').day
        rec['Hour'] = num2date(rec['Datetime'], 'hours since 1900-01-01 00:00', 'standard').hour
        rec['Minute'] = num2date(rec['Datetime'], 'hours since 1900-01-01 00:00', 'standard').minute
        
    line_shpfile = os.path.join(os.getcwd(), 'track.{0}.line.shp'.format(str(mytrack['eventId'].values[0])))
    line_prjfile = os.path.join(os.getcwd(), 'track.{0}.line.prj'.format(str(mytrack['eventId'].values[0])))
    print("Saving track data to {0}".format(line_shpfile))
    tracks2line([eventTrack], line_shpfile)


    prjwkt = ("GEOGCS['GCS_GDA_1994',DATUM['D_GDA_1994',"
              "SPHEROID['GRS_1980',6378137.0,298.257222101]],"
              "PRIMEM['Greenwich',0.0],"
              "UNIT['Degree',0.0174532925199433]]")
    prjfh = open(line_prjfile, 'w')
    prjfh.write(prjwkt)
    prjfh.close()
    
button.on_click(button_click)
# displaying button and its output together
widgets.VBox([button,out])