In [None]:
# packages
import os
import sys
import numpy as np
from datetime import datetime
from dask.distributed import Client

In [None]:
# link to self-written packages
sys.path.append("/g/data/gb02/cj0591/hk25-AusCyclones") # change to your directory
from utils.tools import write_to_filelist, clear_dir
from utils.nci_utils import get_GADI_ERA5_filename
from tempestextremes_utils.node_utils import run_detectNodes, run_stitchNodes

TempestExtremes allows parallel running with `mpi`

In [3]:
# set dask workers
client = Client(n_workers=108)

**Set directories**  
`csv_dir` for TempestExtremes stitchNode final output  
`log_dir` for log files  
`input_dir` for input era5 variable filelist  
`output_dir` for TempestExtremes detectNode output filelist  
`output_temp_dir` for temporary TempestExtremes detectNode ouput

In [None]:
# base directory (change to your directory)
base_dir = '/g/data/gb02/cj0591/hk25-AusCyclones'

# input & output directory
csv_dir = f'{base_dir}/csv' 
log_dir = f'{base_dir}/log' # log files
input_dir = f'{base_dir}/in' # input filelist
output_dir = f'{base_dir}/out' # output filelist
output_temp_dir = f'{base_dir}/temp' # temporary for output files

# directory for TempestExtremes
os.environ['TEMPESTEXTREMESDIR']='/g/data/gb02/tempestextremes/bin'

In [5]:
# be very careful with this - it will delete everything in the directory!!!
clear_dir(log_dir)
clear_dir(input_dir)
clear_dir(output_dir)
clear_dir(output_temp_dir)

**Required variables for TC detection**  

| Variable Name                 | Level (hPa)                       |
|-------------------------------|-----------------------------------|
| Elevation (zs)                | Surface                           |
| 10-m U-component Wind (u10)   | Surface                           |
| 10-m V-component Wind (v10)   | Surface                           |
| Mean Sea Level Pressure (msl) | Surface                           |
| Geopotential (z)              | 500, 300                          |

**Create lists for inputfile and outputfile**

Inputfile consist of several files containing geopotential height (z) on pressure surfaces, mean sea level pressure (msl), 10-m zonal and meridional wind speeds (u10 and v10), and surface elevation (zs), separated by semicolons. Note that surface elevation data can found at `~/data/zs_era5_oper_sfc_invariant.nc`.

In [None]:
year_sta = 1980
year_end = 2020

month_sta = 1
month_end = 12

infilenames_list = []
outfilenames_list = []
for year in np.arange(year_sta, year_end+1):
    for month in np.arange(month_sta, month_end+1):
        # ZS
        zsfile = '/g/data/gb02/cj0591/hk25-AusCyclones/data/zs_era5_oper_sfc_invariant.nc' # change to your directory

        # u10
        u10file = get_GADI_ERA5_filename('10u',datetime(year,month,1),
                                         stream='hourly',level_type='single-levels')
        # v10
        v10file = get_GADI_ERA5_filename('10v',datetime(year,month,1),
                                         stream='hourly',level_type='single-levels')

        # msl
        mslfile = get_GADI_ERA5_filename('msl',datetime(year,month,1),
                                         stream='hourly',level_type='single-levels')
        # z
        zfile = get_GADI_ERA5_filename('z',datetime(year,month,1),
                                       stream='hourly',level_type='pressure-levels')

        infilenames_list.append(f"{zfile};{zsfile};{mslfile};{u10file};{v10file}")
        outfilenames_list.append(f"{output_temp_dir}/detectNode_TC_era5_{zfile[-20:-3]}.txt")
    
        write_to_filelist(infilenames_list,f'{input_dir}/detectNode_TC_input_era5_{str(year_sta)}-{str(year_end)}.txt')
        write_to_filelist(outfilenames_list,f'{output_dir}/detectNode_TC_output_era5_{str(year_sta)}-{str(year_end)}.txt')

**Run TempestExtremes DetectNode**

DetectNode detects nodes  


Thresholds (`closedcontour_commands`) are applied: 

(a) `msl,200.0,5.5,0` represents that mean sea level pressure must increase by 200 Pa over a 5.5 great circle distance (GCD) from the detected node;  


(b) `_DIFF(z(300millibars),z(500millibars)),-58.8,6.5,1.0` represents that the difference between geopotential (Z) on the 300 and 500 millibars surfaces must decrease by 58.8 m2 s−2 over a 6.5 GCD, using the maximum value of this field within 1 GCD as reference. This ensures a coherent upper-level warm core attached to the detected surface low


More details can be found in [Ullrich et al., 2021](https://gmd.copernicus.org/articles/14/5023/2021/)

In [8]:
run_detectNodes(f'{input_dir}/detectNode_TC_input_era5_{str(year_sta)}-{str(year_end)}.txt', # inputfile list
                f'{output_dir}/detectNode_TC_output_era5_{str(year_sta)}-{str(year_end)}.txt', # outputfile list
                108, # cores used for mpi parallel running
                detect_var="msl", # variable used to detect nodes
                merge_dist=6.0,   # merge distance of detected nodes are close to each other of 6.0 great circle distance (GCD)
                closedcontour_commands="msl,200.0,5.5,0;_DIFF(z(300millibars),z(500millibars)),-58.8,6.5,1.0",
                output_commands="msl,min,0;_VECMAG(u10,v10),max,2.0;zs,min,0",
                timeinterval="6hr",
                lonname="longitude",latname="latitude", 
                logdir=f"{log_dir}",
                quiet=True
                )

We can monitor the algrithm progress through log files under `log_dir`; 40-year TC detection took around 1.5 hrs with 108 nodes.

**Run TempestExtremes StitchNode**

StitchNode connects detected nodes in time.  


Thresholds (`threshold_condition`) are applied:  

(a) `wind,>=,10.0,10` represents that the wind magnitude must be greater than 10 m/s for at least 10 timesteps;  

(b) `lat,<=,50.0,10;lat,>=,-50.0,10` represents that the latitude for detected nodes must be within 50S and 50N for at least 10 timesteps;  

(c) `zs,<,150,10` represents that the detected Node must exit below 150 m for at least 10 timesteps  


More details can be found in [Ullrich et al., 2021](https://gmd.copernicus.org/articles/14/5023/2021/)

In [9]:
# Run TempestExtremes StitchNode
run_stitchNodes(f"{output_dir}/detectNode_TC_output_era5_{str(year_sta)}-{str(year_end)}.txt", # inputfile list
                f"{csv_dir}/stitchNode_TC_output_era5_{str(year_sta)}-{str(year_end)}.csv", # output file
                1, # cores used for mpi parallel running StitchNode run very fast with only one core
                output_filefmt="csv", # output format
                in_fmt_commands="lon,lat,msl,wind,zs", # input format of the detectnode ouput
                range_dist=8.0, # the maximum distance (in GCD) that a node can move between two timesteps
                minim_time="54h", # the minimum lifetime of each track
                maxgap_time="24h", # the maximum duration between two timesteps
                min_endpoint_dist="", # the total distance from the strat to the end of the trajectory
                threshold_condition="wind,>=,10.0,10;lat,<=,50.0,10;lat,>=,-50.0,10;zs,<,150,10", # threshold
                quiet=True)