# Tempest Extreme experiment with NextGEMS data

This is trying to work with the intake catalog and using the regridder. Open the catalog and check that everything works correctly. Import functions we created ad-hoc for TCs


In [1]:
import sys
sys.path.append('../../')
import os
from datetime import datetime, timedelta
from glob import glob

from functionsTCs import *

from aqua.reader import catalogue
catalogue(catalog_file='../../config/catalog.yaml');

IFS	tco3999-ng5	2.8km experiment, coupled with FESOM
	- ICMGG_atm2d	
	- ICMU_atm2d	
	- ICMU_atm3d	
	- interpolated_global	
	- interpolated_np	
	- interpolated_sp	
	- interpolated_sp_ci	
IFS	tco2559-ng5	4km experiment, coupled with FESOM
	- ICMGG_atm2d	
	- ICMU_atm2d	
	- ICMU_atm3d	
	- interpolated_global	
	- interpolated_np	
	- interpolated_sp	
IFS	tco1279-orca025	9km baseline, coupled to NEMO, deep conv ON
	- ICMGG_atm2d	
	- ICMU_atm2d	
	- ICMU_atm3d	

FESOM	tco3999-ng5	2.5km experiment, coupuled with IFS
	- elem_grid	
	- node_grid	
	- np	nearest-neighbor interpolation to lat-lon grid
	- interpolated_global2d	
	- interpolated_global_TS	
	- interpolated_global_UV	
	- interpolated_np	
	- interpolated_sp	
	- original_2d	original 2d output
	- original_3d	original 3d output
FESOM	tco2559-ng5	5km experiment, coupuled with IFS
	- elem_grid	
	- node_grid	
	- interpolated_global2d	
	- interpolated_global_TS	
	- interpolated_global_UV	
	- interpolated_np	
	- interpolated_sp	
	- original_2d	orig

Load the data from the intake catalog - regridding to a regular grid for data to be tracked (r100) and for high-resolution (r010, since we cannot easily work with gaussian reduced) and the apply the detection from TempestExtremes in order to filter the high resolution data with the regions in the surroundings of the cyclone

In [14]:

# path to input directory
regdir='/home/b/b382216/scratch/regrid_intake'
tmpdir='/home/b/b382216/scratch/tmpdir_intake'
fulldir='/home/b/b382216/scratch/fullres'

# dimension of the box to be saved
boxdim=10

# resolution for high and low data
lowgrid='r100'
highgrid='r100'

# variables to be stored
varlist = ['psl', 'uas', 'vas', 'pr']
#varlist = ['pr']

# dicitonary with the original filenames
original_dictionary = {'psl': 'msl', 'uas': '10u', 'vas': '10v', 'pr': 'tp'}

# ndays to be saved
ndays = 90

#initial year and month
init_year=2020
init_month=7
init_day=1

# timestep to run on
t1=0
t2=6*4*ndays

# initial date from which start detection/tracking
initial_date=datetime(init_year, init_month, init_day, 0, 0, 0)

# loop on timerecords
for t in range(t1, t2, 6): 

    tttt = initial_date + timedelta(hours=t)
    tstep = tttt.strftime('%Y%m%dT%H')
    print(tstep)
    # read from catalog, interpolate, write to disk and create a dictionary with useful information
    tempest_dictionary = readwrite_from_intake(model='IFS', exp = 'tco2559-ng5', timestep=tttt, grid=lowgrid, tgtdir=regdir)

    # define the tempest detect nodes output
    txt_file = os.path.join(tmpdir, 'tempest_output_' + tstep + '.txt')

    # run the node detection on the low res files
    tempest_command = run_detect_nodes(tempest_dictionary, tempest_dictionary['regrid_file'], txt_file)

    # remove the low res files
    clean_files([tempest_dictionary['regrid_file']])
    
    # identify the nodes
    tempest_nodes = read_lonlat_nodes(txt_file)

    # load the highres files
    #reader2d = Reader(model='IFS', exp = 'tco2559-ng5', source="ICMGG_atm2d")
    reader2d = Reader(model='IFS', exp = 'tco2559-ng5', source="ICMGG_atm2d", regrid=highgrid)
    fulldata = reader2d.retrieve().sel(time=tstep)
    
    # loop on variables to write to disk only the subset of high res files
    for var in varlist : 

        varfile = original_dictionary[var]

        data = reader2d.regrid(fulldata[varfile])
        data.name = var
        xfield = store_fullres_field(0, data, tempest_nodes, boxdim)

        store_file = os.path.join(tmpdir, f'TC_{var}_{tstep}.nc')
        write_fullres_field(xfield, store_file)
    


20200701T00
20200701T06
20200701T12
20200701T18
20200702T00
20200702T06
20200702T12
20200702T18
20200703T00
20200703T06
20200703T12
20200703T18
20200704T00
20200704T06
20200704T12
20200704T18
20200705T00
20200705T06
20200705T12
20200705T18
20200706T00
20200706T06
20200706T12
20200706T18
20200707T00
20200707T06
20200707T12
20200707T18
20200708T00
20200708T06
20200708T12
20200708T18
20200709T00
20200709T06
20200709T12
20200709T18
20200710T00
20200710T06
20200710T12
20200710T18
20200711T00
20200711T06
20200711T12
20200711T18
20200712T00
20200712T06
20200712T12
20200712T18
20200713T00
20200713T06
20200713T12
20200713T18
20200714T00
20200714T06
20200714T12
20200714T18
20200715T00
20200715T06
20200715T12
20200715T18
20200716T00
20200716T06
20200716T12
20200716T18
20200717T00
20200717T06
20200717T12
20200717T18
20200718T00
20200718T06
20200718T12
20200718T18
20200719T00
20200719T06
20200719T12
20200719T18
20200720T00
20200720T06
20200720T12
20200720T18
20200721T00
20200721T06
20200721T12
2020

Put together all the tracks from the detect nodes and run the stich nodes to define the final tracks

In [10]:
#now run stitch nodes only every month

import pandas as pd
tmpdir='/home/b/b382216/scratch/tmpdir_intake'

# initial year and month
init_year=2020
init_month=7

# final year and month
end_year=2020
end_month=9

yrmonths = pd.period_range(start=(str(init_year)+str(init_month).zfill(2)),end=(str(end_year)+str(end_month).zfill(2)), freq='M').strftime('%Y%m')

for yrm in yrmonths:
    # output from detect nodes
    filenames = sorted(glob(os.path.join(tmpdir,f'tempest_output_{yrm}*')))
    print (filenames)
    # create output file with output from stitch nodes 
    track_file = os.path.join(tmpdir, f'tempest_track_{yrm}.txt')

    # run stitch nodes, MAXGAP set to 6h to match the input files res
    stitch_string = run_stitch_nodes(filenames, track_file, maxgap = '6h')

Index(['202007', '202008', '202009'], dtype='object')
['/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200701T00.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200701T06.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200701T12.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200701T18.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200702T00.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200702T06.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200702T12.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200702T18.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200703T00.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200703T06.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200703T12.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200703T18.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200704T00.txt', '/home/b/

Version with +- n extended days at the beginning/end of a month to take into account tracks which may begin before or end after the beggining or end of a month

In [2]:
import pandas as pd
import calendar
import os
import glob

tmpdir = '/home/b/b382216/scratch/tmpdir_intake'

# initial year, month, and day
init_year = 2020
init_month = 7
init_day = 1

# final year, month, and day
end_year = 2020
end_month = 8

#number of days in which each month is extended at the beginning and at the end
n_days_ext = 10
n_days_freq = 30

# support variables
frequency = str(n_days_freq)+'D'
real_end_month = end_month + 1

# loop
for block in pd.date_range(start=f'{init_year}-{init_month}-{init_day}', end=f'{end_year}-{real_end_month}', freq=frequency):

    # create DatetimeIndex with daily frequency
    end_day = calendar.monthrange(block.year, block.month)[1]
    dates = pd.date_range(start=block, periods=n_days_freq, freq='D')

    before = dates.shift(-n_days_ext, freq='D')[0:n_days_ext]
    after = dates.shift(+n_days_ext, freq='D')[-n_days_ext:]

    # concatenate the indexes to create a single index
    date_index = before.append(dates).append(after)

    # create list of file paths to include in glob pattern
    file_paths = [os.path.join(tmpdir, f'tempest_output_{date}T??.txt') for date in date_index.strftime('%Y%m%d')]
    # use glob to get list of filenames that match the pattern
    filenames = []
    for file_path in file_paths:
        filenames.extend(sorted(glob.glob(file_path)))
    print(filenames)

    track_file = os.path.join(tmpdir, f'tempest_track_{block.strftime("%Y%m%d")}-{dates[-1].strftime("%Y%m%d")}.txt')

    # run stitch nodes, MAXGAP set to 6h to match the input files res
    stitch_string = run_stitch_nodes(filenames, track_file, maxgap = '6h')


['/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200701T00.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200701T06.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200701T12.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200701T18.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200702T00.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200702T06.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200702T12.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200702T18.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200703T00.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200703T06.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200703T12.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200703T18.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200704T00.txt', '/home/b/b382216/scratch/tmpdir_intake/tempest_output_20200704T

Load the full res field and apply further cleanign according to the stich nodes

In [4]:
# variables to be stored
varlist = ['psl', 'uas', 'vas', 'pr']
boxdim = 10

for block in pd.date_range(start=f'{init_year}-{init_month}-{init_day}', end=f'{end_year}-{real_end_month}', freq=frequency):

    # create DatetimeIndex with daily frequency
    end_day = calendar.monthrange(block.year, block.month)[1]
    dates = pd.date_range(start=block, periods=n_days_freq, freq='D')
    
    # create output file with output from stitch nodes 
    track_file = os.path.join(tmpdir, f'tempest_track_{block.strftime("%Y%m%d")}-{dates[-1].strftime("%Y%m%d")}.txt')

    # reordered_tracks is a dict containing the concatenated (in time) tracks
    # at eatch time step are associated all lons/lats

    reordered_tracks = reorder_tracks(track_file)

    # initialise full_res fields at 0 before the loop
    
    for var in varlist : 
        print(var)

        xfield = 0
        for idx in reordered_tracks.keys():
            #print(datetime.strptime(idx, '%Y%m%d%H').strftime('%Y%m%d'))
            #print (dates.strftime('%Y%m%d'))
            if datetime.strptime(idx, '%Y%m%d%H').strftime('%Y%m%d') in dates.strftime('%Y%m%d'):

                timestep = datetime.strptime(idx, '%Y%m%d%H').strftime('%Y%m%dT%H')
                
                fullres_file = os.path.join(tmpdir, f'TC_{var}_{timestep}.nc')
                fullres_field = xr.open_mfdataset(fullres_file)[var]

                # get the full res field and store the required values around the Nodes
                xfield = store_fullres_field(xfield, fullres_field, reordered_tracks[idx], boxdim)

        print('Storing output')

        # store the file
        store_file = os.path.join(tmpdir, f'tempest_tracks_{var}_{block.strftime("%Y%m%d")}-{dates[-1].strftime("%Y%m%d")}.nc')
        write_fullres_field(xfield, store_file)
 

psl
Storing output
uas
Storing output
vas
Storing output
pr
Storing output
psl
Storing output
uas
Storing output
vas
Storing output
pr
Storing output
psl
Storing output
uas
Storing output
vas
Storing output
pr
Storing output
