# Tempest Extreme experiment with NextGEMS data

This is trying to work with the intake catalog and using the regridder. Open the catalog and check that everything works correctly. Import functions we created ad-hoc for TCs


In [1]:
import sys
sys.path.append('../../')
import os
from datetime import datetime, timedelta
from glob import glob

from functionsTCs import *

from aqua.reader import catalogue
catalogue(catalog_file='../../config/catalog.yaml');

IFS	tco3999-ng5	2.8km experiment, coupled with FESOM
	- ICMGG_atm2d	
	- ICMU_atm2d	
	- ICMU_atm3d	
	- interpolated_global	
	- interpolated_np	
	- interpolated_sp	
	- interpolated_sp_ci	
IFS	tco2559-ng5	4km experiment, coupled with FESOM
	- ICMGG_atm2d	
	- ICMU_atm2d	
	- ICMU_atm3d	
	- interpolated_global	
	- interpolated_np	
	- interpolated_sp	
IFS	tco1279-orca025	9km baseline, coupled to NEMO, deep conv ON
	- ICMGG_atm2d	
	- ICMU_atm2d	
	- ICMU_atm3d	

FESOM	tco3999-ng5	2.5km experiment, coupuled with IFS
	- elem_grid	
	- node_grid	
	- np	nearest-neighbor interpolation to lat-lon grid
	- interpolated_global2d	
	- interpolated_global_TS	
	- interpolated_global_UV	
	- interpolated_np	
	- interpolated_sp	
	- original_2d	original 2d output
	- original_3d	original 3d output
FESOM	tco2559-ng5	5km experiment, coupuled with IFS
	- elem_grid	
	- node_grid	
	- interpolated_global2d	
	- interpolated_global_TS	
	- interpolated_global_UV	
	- interpolated_np	
	- interpolated_sp	
	- original_2d	orig

Load the data from the intake catalog - regridding to a regular grid for data to be tracked (r100) and for high-resolution (r010, since we cannot easily work with gaussian reduced) and the apply the detection from TempestExtremes in order to filter the high resolution data with the regions in the surroundings of the cyclone

In [2]:

# path to input directory
regdir='/home/b/b382216/scratch/regrid_intake'
tmpdir='/home/b/b382216/scratch/tmpdir_intake'
fulldir='/home/b/b382216/scratch/fullres'

# dimension of the box to be saved
boxdim=10

# resolution for high and low data
lowgrid='r100'
highgrid='r100'

# variables to be stored
varlist = ['psl', 'uas', 'vas', 'pr']
#varlist = ['pr']

# dicitonary with the original filenames
original_dictionary = {'psl': 'msl', 'uas': '10u', 'vas': '10v', 'pr': 'tp'}

# ndays to be saved
ndays = 90

#initial year and month
init_year=2020
init_month=7
init_day=1

# timestep to run on
t1=0
t2=6*4*ndays

# initial date from which start detection/tracking
initial_date=datetime(init_year, init_month, init_day, 0, 0, 0)

# loop on timerecords
for t in range(t1, t2, 6): 

    tttt = initial_date + timedelta(hours=t)
    tstep = tttt.strftime('%Y%m%dT%H')
    print(tstep)
    # read from catalog, interpolate, write to disk and create a dictionary with useful information
    tempest_dictionary = readwrite_from_intake(model='IFS', exp = 'tco2559-ng5', timestep=tttt, grid=lowgrid, tgtdir=regdir)

    # define the tempest detect nodes output
    txt_file = os.path.join(tmpdir, 'tempest_output_' + tstep + '.txt')

    # run the node detection on the low res files
    tempest_command = run_detect_nodes(tempest_dictionary, tempest_dictionary['regrid_file'], txt_file)

    # remove the low res files
    clean_files([tempest_dictionary['regrid_file']])

    # identify the nodes
    tempest_nodes = read_lonlat_nodes(txt_file)

    # load the highres files
    #reader2d = Reader(model='IFS', exp = 'tco2559-ng5', source="ICMGG_atm2d")
    reader2d = Reader(model='IFS', exp = 'tco2559-ng5', source="ICMGG_atm2d", regrid=highgrid)
    fulldata = reader2d.retrieve().sel(time=tstep)
    
    # loop on variables to write to disk only the subset of high res files
    for var in varlist : 

        varfile = original_dictionary[var]

        data = reader2d.regrid(fulldata[varfile])
        data.name = var
        xfield = store_fullres_field(0, data, tempest_nodes, boxdim)

        store_file = os.path.join(tmpdir, f'TC_{var}_{tstep}.nc')
        write_fullres_field(xfield, store_file)
        


20200701T00
Arguments:
  --in_data <string> ["/home/b/b382216/scratch/regrid_intake/regrid_20200701T00.nc"] 
  --in_data_list <string> [""] 
  --in_connect <string> [""] 
  --diag_connect <bool> [false] 
  --out <string> ["/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200701T00.txt"] 
  --out_file_list <string> [""] 
  --searchbymin <string> ["msl"] (default PSL)
  --searchbymax <string> [""] 
  --searchbythreshold <string> [""] 
  --minlon <double> [0.000000] (degrees)
  --maxlon <double> [0.000000] (degrees)
  --minlat <double> [0.000000] (degrees)
  --maxlat <double> [0.000000] (degrees)
  --minabslat <double> [0.000000] (degrees)
  --mergedist <double> [6.000000] (degrees)
  --closedcontourcmd <string> ["msl,200.0,5.5,0;_DIFF(z(30000Pa),z(50000Pa)),-58.8,6.5,1.0"] [var,delta,dist,minmaxdist;...]
  --noclosedcontourcmd <string> [""] [var,delta,dist,minmaxdist;...]
  --thresholdcmd <string> [""] [var,op,value,dist;...]
  --outputcmd <string> ["msl,min,0;_VECMAG(u10m,v10m),ma

Put together all the tracks from the detect nodes and run the stich nodes to define the final tracks

In [10]:
#now run stitch nodes only every month

import pandas as pd
tmpdir='/home/b/b382216/scratch/tmpdir_intake'

# initial year and month
init_year=2020
init_month=7

# final year and month
end_year=2020
end_month=9

yrmonths = pd.period_range(start=(str(init_year)+str(init_month).zfill(2)),end=(str(end_year)+str(end_month).zfill(2)), freq='M').strftime('%Y%m')
print (yrmonths)

for yrm in yrmonths:
    # output from detect nodes
    filenames = sorted(glob(os.path.join(tmpdir,f'tempest_output_{yrm}*')))
    print (filenames)
    # create output file with output from stitch nodes 
    track_file = os.path.join(tmpdir, f'tempest_track_{yrm}.txt')

    # run stitch nodes, MAXGAP set to 6h to match the input files res
    stitch_string = run_stitch_nodes(filenames, track_file, maxgap = '6h')

Index(['202007', '202008', '202009'], dtype='object')
['/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200701T00.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200701T06.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200701T12.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200701T18.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200702T00.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200702T06.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200702T12.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200702T18.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200703T00.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200703T06.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200703T12.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200703T18.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200704T00.txt', '/home/b/

Version with +- n days buffer

In [5]:
import pandas as pd
import os
import glob

tmpdir = '/home/b/b382216/scratch/tmpdir_intake'

# initial year, month, and day
init_year = 2020
init_month = 7
init_day = 1

# final year, month, and day
end_year = 2020
end_month = 9
end_day = 30

#number of days in which each month is extended at the beginning and at the end
n_days_ext = 10

for month in pd.date_range(start=f'{init_year}-{init_month}', end=f'{end_year}-{end_month}', freq='M').strftime('%m'):
    if month == "02":
        end_day=28
        # create DatetimeIndex with daily frequency
        dates = pd.date_range(start=f'{init_year}-{month}-{init_day}', end=f'{end_year}-{month}-{end_day}', freq='D')
        # get indexes of yrm and 10 days before and after
        yrm_index = dates[dates.year == init_year][dates.month == month].strftime('%Y%m%d')
        before_index = dates[dates.year == init_year][dates.month == month].shift(-n_days_ext, freq='D').strftime('%Y%m%d')
        after_index = dates[dates.year == end_year][dates.month == month].shift(n_days_ext, freq='D').strftime('%Y%m%d')

        # concatenate the indexes to create a single index
        date_index = before_index.append(yrm_index).append(after_index)

        # create list of file paths to include in glob pattern
        file_paths = [os.path.join(tmpdir, f'tempest_output_{date}*') for date in date_index]

        # use glob to get list of filenames that match the pattern
        filenames = []
        for file_path in file_paths:
            filenames.extend(glob.glob(file_path))
        print(filenames)

    else:
        # create DatetimeIndex with daily frequency
        dates = pd.date_range(start=f'{init_year}-{month}-{init_day}', end=f'{end_year}-{month}-{end_day}', freq='D')
        # get indexes of yrm and 10 days before and after
        yrm_index = dates[dates.year == init_year][dates.month == month].strftime('%Y%m%d')
        before_index = dates[dates.year == init_year][dates.month == month].shift(-n_days_ext, freq='D').strftime('%Y%m%d')
        after_index = dates[dates.year == end_year][dates.month == month].shift(n_days_ext, freq='D').strftime('%Y%m%d')

        # concatenate the indexes to create a single index
        date_index = before_index.append(yrm_index).append(after_index)
        # create list of file paths to include in glob pattern
        file_paths = [os.path.join(tmpdir, f'tempest_output_{date}*') for date in date_index]
        # use glob to get list of filenames that match the pattern
        filenames = []
        for file_path in file_paths:
            filenames.extend(glob.glob(file_path))
        print(filenames)


07
[]
08
[]


Load the full res field and apply further cleanign according to the stich nodes

In [25]:
# variables to be stored
varlist = ['psl', 'uas', 'vas', 'pr']
boxdim = 10

for yrm in yrmonths:
    # create output file with output from stitch nodes 
    track_file = os.path.join(tmpdir, f'tempest_track_{yrm}.txt')
    # reordered_tracks is a dict containing the concatenated (in time) tracks
    # at eatch time step are associated all lons/lats

    reordered_tracks = reorder_tracks(track_file)

    # initialise full_res fields at 0 before the loop

    for var in varlist : 
        print(var)

        xfield = 0
        for idx in reordered_tracks.keys():

            timestep = datetime.strptime(idx, '%Y%m%d%H').strftime('%Y%m%dT%H')
            
            fullres_file = os.path.join(tmpdir, f'TC_{var}_{timestep}.nc')
            fullres_field = xr.open_mfdataset(fullres_file)[var]

            # get the full res field and store the required values around the Nodes
            xfield = store_fullres_field(xfield, fullres_field, reordered_tracks[idx], boxdim)

        print('Storing output')

        # store the file
        store_file = os.path.join(tmpdir, f'tempest_tracks_{var}_{yrm}.nc')
        write_fullres_field(xfield, store_file)

psl
Storing output
uas
Storing output
vas
Storing output
pr
Storing output
psl
Storing output
uas
Storing output
vas
Storing output
pr
Storing output
psl
Storing output
uas
Storing output
vas
Storing output
pr
Storing output


IOStream.flush timed out
