# Tempest Extreme experiment with NextGEMS data

This is trying to work with the intake catalog and using the regridder. Open the catalog and check that everything works correctly. Import functions we created ad-hoc for TCs


In [1]:
import sys
sys.path.append('../../')
import os
from datetime import datetime, timedelta
from glob import glob

from functionsTCs import *

from aqua.reader import catalogue
catalogue(catalog_file='../../config/catalog.yaml');

IFS	tco3999-ng5	2.8km experiment, coupled with FESOM
	- ICMGG_atm2d	
	- ICMU_atm2d	
	- ICMU_atm3d	
	- interpolated_global	
	- interpolated_np	
	- interpolated_sp	
	- interpolated_sp_ci	
IFS	tco2559-ng5	4km experiment, coupled with FESOM
	- ICMGG_atm2d	
	- ICMU_atm2d	
	- ICMU_atm3d	
	- interpolated_global	
	- interpolated_np	
	- interpolated_sp	
IFS	tco1279-orca025	9km baseline, coupled to NEMO, deep conv ON
	- ICMGG_atm2d	
	- ICMU_atm2d	
	- ICMU_atm3d	

FESOM	tco3999-ng5	2.5km experiment, coupuled with IFS
	- elem_grid	
	- node_grid	
	- np	nearest-neighbor interpolation to lat-lon grid
	- interpolated_global2d	
	- interpolated_global_TS	
	- interpolated_global_UV	
	- interpolated_np	
	- interpolated_sp	
	- original_2d	original 2d output
	- original_3d	original 3d output
FESOM	tco2559-ng5	5km experiment, coupuled with IFS
	- elem_grid	
	- node_grid	
	- interpolated_global2d	
	- interpolated_global_TS	
	- interpolated_global_UV	
	- interpolated_np	
	- interpolated_sp	
	- original_2d	orig

Load the data from the intake catalog - regridding to a regular grid for data to be tracked (r100) and for high-resolution (r010, since we cannot easily work with gaussian reduced) and the apply the detection from TempestExtremes in order to filter the high resolution data with the regions in the surroundings of the cyclone

In [2]:

# path to input directory
regdir='/home/b/b382216/scratch/regrid_intake'
tmpdir='/home/b/b382216/scratch/tmpdir_intake'
fulldir='/home/b/b382216/scratch/fullres'

# dimension of the box to be saved
boxdim=10

# resolution for high and low data
lowgrid='r100'
highgrid='r100'

# variables to be stored
varlist = ['psl', 'uas', 'vas', 'pr']
#varlist = ['pr']

# dicitonary with the original filenames
original_dictionary = {'psl': 'msl', 'uas': '10u', 'vas': '10v', 'pr': 'tp'}

# ndays to be saved
ndays = 345

#initial year and month
init_year=2020
init_month=1
init_day=20

# timestep to run on
t1=0
t2=6*4*ndays

# initial date from which start detection/tracking
initial_date=datetime(init_year, init_month, init_day, 0, 0, 0)

# loop on timerecords
for t in range(t1, t2, 6): 

    tttt = initial_date + timedelta(hours=t)
    tstep = tttt.strftime('%Y%m%dT%H')
    print(tstep)
    # read from catalog, interpolate, write to disk and create a dictionary with useful information
    tempest_dictionary = readwrite_from_intake(model='IFS', exp = 'tco2559-ng5', timestep=tttt, grid=lowgrid, tgtdir=regdir)

    # define the tempest detect nodes output
    txt_file = os.path.join(tmpdir, 'tempest_output_' + tstep + '.txt')

    # run the node detection on the low res files
    tempest_command = run_detect_nodes(tempest_dictionary, tempest_dictionary['regrid_file'], txt_file)

    # remove the low res files
    clean_files([tempest_dictionary['regrid_file']])

    # identify the nodes
    tempest_nodes = read_lonlat_nodes(txt_file)

    # load the highres files
    #reader2d = Reader(model='IFS', exp = 'tco2559-ng5', source="ICMGG_atm2d")
    reader2d = Reader(model='IFS', exp = 'tco2559-ng5', source="ICMGG_atm2d", regrid=highgrid)
    fulldata = reader2d.retrieve().sel(time=tstep)
    
    # loop on variables to write to disk only the subset of high res files
    for var in varlist : 

        varfile = original_dictionary[var]

        data = reader2d.regrid(fulldata[varfile])
        data.name = var
        xfield = store_fullres_field(0, data, tempest_nodes, boxdim)

        store_file = os.path.join(tmpdir, f'TC_{var}_{tstep}.nc')
        write_fullres_field(xfield, store_file)
        


20200120T00


FileNotFoundError: [Errno 2] No such file or directory: 'DetectNodes'

Put together all the tracks from the detect nodes and run the stich nodes to define the final tracks

In [5]:
#now run stitch nodes only every month

import pandas as pd
tmpdir='/home/b/b382216/scratch/tmpdir_intake'

yrmonths = pd.period_range(start='202001',end='202012', freq='M').strftime('%Y%m')
print (yrmonths)
for yrm in yrmonths:
    # output from detect nodes
    filenames = sorted(glob(os.path.join(tmpdir,'tempest_output_{yrmonths}*')))

    # open the output file and extract the required lon/lat
    track_file = os.path.join(tmpdir, 'tempest_track.txt')

    # MAXGAP set to 6h to match the input files res
    stitch_string = run_stitch_nodes(filenames, track_file, maxgap = '6h')

Index(['202001', '202002', '202003', '202004', '202005', '202006', '202007',
       '202008', '202009', '202010', '202011', '202012'],
      dtype='object')


NameError: name 'glob' is not defined

Load the full res field and apply further cleanign according to the stich nodes

In [26]:
# reordered_tracks is a dict containing the concatenated (in time) tracks
# at eatch time step are associated all lons/lats
reordered_tracks = reorder_tracks(track_file)

# initialise full_res fields at 0 before the loop

for var in varlist : 
    print(var)

    xfield = 0
    for idx in reordered_tracks.keys():

        timestep = datetime.strptime(idx, '%Y%m%d%H').strftime('%Y%m%dT%H')
        
        fullres_file = os.path.join(tmpdir, f'TC_{var}_{timestep}.nc')
        fullres_field = xr.open_mfdataset(fullres_file)[var]

        # get the full res field and store the required values around the Nodes
        xfield = store_fullres_field(xfield, fullres_field, reordered_tracks[idx], boxdim)

    print('Storing output')

    # store the file
    store_file = os.path.join(tmpdir, f'tempest_tracks_{var}.nc')
    write_fullres_field(xfield, store_file)

psl
<xarray.DataArray 'psl' (lat: 900, lon: 1800)>
dask.array<open_dataset-4d4e7b8191eba17209a86d2532cf87a2psl, shape=(900, 1800), dtype=float64, chunksize=(900, 1800), chunktype=numpy.ndarray>
Coordinates:
    time     datetime64[ns] ...
  * lat      (lat) float64 -89.9 -89.7 -89.5 -89.3 -89.1 ... 89.3 89.5 89.7 89.9
  * lon      (lon) float64 0.0 0.2 0.4 0.6 0.8 ... 359.0 359.2 359.4 359.6 359.8
<xarray.DataArray 'psl' (lat: 900, lon: 1800)>
dask.array<open_dataset-74058bb7b8e39edc0db22b325a8e6ddcpsl, shape=(900, 1800), dtype=float64, chunksize=(900, 1800), chunktype=numpy.ndarray>
Coordinates:
    time     datetime64[ns] ...
  * lat      (lat) float64 -89.9 -89.7 -89.5 -89.3 -89.1 ... 89.3 89.5 89.7 89.9
  * lon      (lon) float64 0.0 0.2 0.4 0.6 0.8 ... 359.0 359.2 359.4 359.6 359.8
<xarray.DataArray 'psl' (lat: 900, lon: 1800)>
dask.array<open_dataset-78f050845c409eb8f43bbedc5cbf0ccbpsl, shape=(900, 1800), dtype=float64, chunksize=(900, 1800), chunktype=numpy.ndarray>
Coordinates: