In [None]:
# packages
import os
import sys
import numpy as np
from datetime import datetime
from dask.distributed import Client

In [None]:
# link to self-written packages
sys.path.append("/g/data/gb02/cj0591/hk25-AusCyclones") # change to your directory
from utils.tools import write_to_filelist, clear_dir
from utils.nci_utils import get_GADI_ERA5_filename
from tempestextremes_utils.node_utils import run_detectNodes, run_stitchNodes

TempestExtremes allows parallel running with `mpi`

In [3]:
# set dask workers
client = Client(n_workers=56)

**Set directories**  
`csv_dir` for TempestExtremes stitchNode final output  
`log_dir` for log files  
`input_dir` for input era5 variable filelist  
`output_dir` for TempestExtremes detectNode output filelist  
`output_temp_dir` for temporary TempestExtremes detectNode ouput

In [None]:
# base directory (change to your directory)
base_dir = '/g/data/gb02/cj0591/hk25-AusCyclones'

# input & output directory
csv_dir = f'{base_dir}/csv' 
log_dir = f'{base_dir}/log' # log files
input_dir = f'{base_dir}/in' # input filelist
output_dir = f'{base_dir}/out' # output filelist
output_temp_dir = f'{base_dir}/temp' # temporary for output files

# directory for TempestExtremes
os.environ['TEMPESTEXTREMESDIR']='/g/data/gb02/tempestextremes/bin'

In [17]:
# be very careful with this - it will delete everything in the directory!!!
clear_dir(csv_dir)
clear_dir(log_dir)
clear_dir(input_dir)
clear_dir(output_dir)

**Required variables for ETC detection**  

| Variable Name                 | Level (hPa)                       |
|-------------------------------|-----------------------------------|
| Mean Sea Level Pressure (msl) | Surface                           |

**Create lists for inputfile and outputfile**

Inputfile consist of several files containing mean sea level pressure (msl).

In [5]:
year_sta = 1980
year_end = 2020

month_sta = 1
month_end = 12

infilenames_list = []
outfilenames_list = []
for year in np.arange(year_sta, year_end+1):
    for month in np.arange(month_sta, month_end+1):
        # msl
        mslfile = get_GADI_ERA5_filename('msl',datetime(year,month,1),
                                         stream='hourly',level_type='single-levels')

        infilenames_list.append(f"{mslfile}")
        outfilenames_list.append(f"{output_temp_dir}/detectNode_ETC_era5_{mslfile[-20:-3]}.txt")
    
        write_to_filelist(infilenames_list,f'{input_dir}/detectNode_ETC_input_era5_{str(year_sta)}-{str(year_end)}.txt')
        write_to_filelist(outfilenames_list,f'{output_dir}/detectNode_ETC_output_era5_{str(year_sta)}-{str(year_end)}.txt')

**Run TempestExtremes DetectNode**

DetectNode detects nodes  


Thresholds (`closedcontour_commands`) are applied: 

(a) `msl,200.0,6.0,0` represents that mean sea level pressure must increase by 200 Pa over a 6.0 great circle distance (GCD) from the detected node;  


More details can be found in [Ullrich et al., 2021](https://gmd.copernicus.org/articles/14/5023/2021/)

In [6]:
run_detectNodes(f'{input_dir}/detectNode_ETC_input_era5_{str(year_sta)}-{str(year_end)}.txt', # inputfile list
                f'{output_dir}/detectNode_ETC_output_era5_{str(year_sta)}-{str(year_end)}.txt', # outputfile list
                56, # cores used for mpi parallel running
                detect_var="msl", # variable used to detect nodes
                merge_dist=6.0,   # merge distance of detected nodes are close to each other of 6.0 great circle distance (GCD)
                closedcontour_commands="msl,200.0,5.5,0",
                output_commands="msl,min,0",
                timeinterval="6hr",
                lonname="longitude",latname="latitude", 
                logdir=f"{log_dir}",
                quiet=True
                )

We can monitor the algrithm progress through log files under `log_dir`

**Run TempestExtremes StitchNode**

StitchNode connects detected nodes in time.  


Thresholds (`threshold_condition`) are not applied here:  


More details can be found in [Ullrich et al., 2021](https://gmd.copernicus.org/articles/14/5023/2021/)

In [7]:
# Run TempestExtremes StitchNode
run_stitchNodes(f"{output_dir}/detectNode_ETC_output_era5_{str(year_sta)}-{str(year_end)}.txt", # inputfile list
                f"{csv_dir}/stitchNode_ETC_output_era5_{str(year_sta)}-{str(year_end)}.csv", # output file
                1, # cores used for mpi parallel running StitchNode run very fast with only one core
                output_filefmt="csv", # output format
                in_fmt_commands="lon,lat,msl", # input format of the detectnode ouput
                range_dist=6.0, # the maximum distance (in GCD) that a node can move between two timesteps
                minim_time="60h", # the minimum lifetime of each track
                maxgap_time="18h", # the maximum duration between two timesteps
                min_endpoint_dist=12.0, # the total distance from the strat to the end of the trajectory
                threshold_condition="", # threshold
                quiet=True)