In [1]:
import os
import glob
import netCDF4 as nc
import numpy as np
from datetime import datetime
import xarray as xr

from aqua.util import load_yaml
#from plotting_hist import plot_hist_cat, plot_press_wind

Get variables and other parameters from configuration file and store them in a dictionary

In [2]:
# load the config file
tdict = load_yaml('../cli/config_tcs.yaml')
tdict



In [4]:
# Directory where the NetCDF files are located
fullres_data_directory = '/home/b/b382216/work/tc/fullres/IFS/tco2559-ng5-cycle3/'

# Directory with tempest (Stitch nodes) output track files
tempest_data_directory = '/home/b/b382216/work/tc/tmpdir/IFS/tco2559-ng5-cycle3/'

# Pattern to match NetCDF files
fullres_file_pattern = 'tempest_tracks_*.nc'

# Pattern to match tempest txt files
tempest_file_pattern = 'tempest_track_*.txt'

# Output directory for the new .txt files with fullres data
output_directory = '/home/b/b382216/work/tc/fullres/IFS/tco2559-ng5-cycle3/tempest_fullres_txt'


# Find and process all matching files
fullres_file_paths = sorted(glob.glob(os.path.join(fullres_data_directory, fullres_file_pattern)))
tempest_file_paths = sorted(glob.glob(os.path.join(tempest_data_directory, tempest_file_pattern)))

# List to store data arrays
data_arrays = []

# Initialize a list to store track headers
track_headers = []

# Define a function to parse the text data and separate tracks
def parse_text_file(file_path):
    data = []  # Create an empty list to store the data
    track_data = []  # List to store data for the current track
    track_header = None

    with open(file_path, 'r') as file:
        for line in file:
            parts = line.split()
            if not parts:
                continue  # Skip empty lines

            if parts[0] == 'start':
                if track_header is not None:
                    data.append((track_header, track_data))  # Add track header and data to the result
                track_header = {
                    'track_length': int(parts[1]),
                    'year': int(parts[2]),
                    'month': int(parts[3]),
                    'date': int(parts[4]),
                    'hour': int(parts[5])
                }
                track_data = []
            else:
                if track_header is not None:
                    entry = {
                        'lon_index': int(parts[0]),
                        'lat_index': int(parts[1]),
                        'longitude': float(parts[2]),
                        'latitude': float(parts[3]),
                        'year': int(parts[6]),
                        'month': int(parts[7]),
                        'date': int(parts[8]),
                        'hour': int(parts[9]),
                        'mslp': float(parts[4]),  # Assuming MSLP is in the 4th position
                        'wind_module': float(parts[5])  # Assuming wind module is in the 5th position
                    }
                    track_data.append(entry)

    if track_data:  # Add the last track header and data if not empty
        data.append((track_header, track_data))

    return data

# Loop through the NetCDF files and the corresponding text files
for fullres_file_path, tempest_file_path in zip(fullres_file_paths, tempest_file_paths):
    # Open the NetCDF file
    dataset = xr.open_dataset(fullres_file_path)
    mslp = dataset['msl']
    u10 = dataset['10u']
    v10 = dataset['10v']
    time_nc = dataset['time']
    print("Opened tempest file:", tempest_file_path)
    print("Opened NetCDF file:", fullres_file_path)

    #compute wind module
    wind_mod= (u10**2+v10**2)**0.5
    # Parse the corresponding text file
    parsed_results = parse_text_file(tempest_file_path)
    updated_lines = []
    # Loop through parsed results and print variables at each time step
    for i, (track_header, result) in enumerate(parsed_results):
        print(f"Track {i + 1}:")

        # Print the track header information
        print(f"Track Length: {track_header['track_length']}, Year: {track_header['year']}, Month: {track_header['month']}, Date: {track_header['date']}, Hour: {track_header['hour']}")

        # Write the track header information to the new .txt file
        header_line = f"start {track_header['track_length']} {track_header['year']} {track_header['month']} {track_header['date']} {track_header['hour']}\n"
        updated_lines.append(header_line)

        for entry in result:
            lon = entry['longitude']
            lat = entry['latitude']
            year = entry['year']
            month = entry['month']
            date = entry['date']
            hour = entry['hour']
            mslp_tempest = entry['mslp']
            wind_mod_tempest = entry['wind_module']

            # create a time stamp with the date in the tempest file/dict

            time_stamp = datetime(year, month, date, hour, 0, 0)
            # open the netcdf file at 
            mslp_time = mslp.sel(time=time_stamp, method="nearest")
            mslp_value = np.min(mslp_time.sel(lat=slice(lat-2,lat+2), lon=slice(lon-2,lon+2)).values)
            wind_mod_time = wind_mod.sel(time=time_stamp, method="nearest")
            wind_mod_value = np.max(wind_mod_time.sel(lat=slice(lat-2,lat+2), lon=slice(lon-2,lon+2)).values)


            print(f"Time tempest dict: {time_stamp}, lat tempest {lat}, lon tempest {lon},  MSLP tempest: {mslp_tempest}, wind_mod tempest: {wind_mod_tempest}")
            print(f"Time netcdf: {time_nc.sel(time=time_stamp, method='nearest').values}, MSLP netcdf: {mslp_value}, wind_mod netcdf: {wind_mod_value}\n")

            entry['mslp'] = mslp_value
            entry['wind_module'] = wind_mod_value

            entry_line = f"{entry['lon_index']} {entry['lat_index']} {entry['longitude']} {entry['latitude']} {entry['mslp']} {entry['wind_module']} {entry['year']} {entry['month']} {entry['date']} {entry['hour']}\n"
            updated_lines.append(entry_line)

    filename = os.path.basename(tempest_file_path)
    new_filename = f"fullres_{filename}"  # Add "fullres_" as a prefix to the original filename
    new_file_path = os.path.join(output_directory, new_filename)
    with open(new_file_path, 'w') as new_file:
        new_file.writelines(updated_lines)
    print(f"Created {new_file_path}")
        

    dataset.close()


Opened tempest file: /home/b/b382216/work/tc/tmpdir/IFS/tco2559-ng5-cycle3/tempest_track_20200120-20200218.txt
Opened NetCDF file: /home/b/b382216/work/tc/fullres/IFS/tco2559-ng5-cycle3/tempest_tracks_20200120-20200218.nc
Track 1:
Track Length: 12, Year: 2020, Month: 1, Date: 22, Hour: 0
Time tempest dict: 2020-01-22 00:00:00, lat tempest -18.5, lon tempest 68.0,  MSLP tempest: 100396.6, wind_mod tempest: 13.97
Time netcdf: 2020-01-22T00:00:00.000000000, MSLP netcdf: 100157.99193397671, wind_mod netcdf: 20.411855938955014

Time tempest dict: 2020-01-22 06:00:00, lat tempest -19.5, lon tempest 68.0,  MSLP tempest: 100515.3, wind_mod tempest: 14.42783
Time netcdf: 2020-01-22T06:00:00.000000000, MSLP netcdf: 100397.98145165609, wind_mod netcdf: 20.11255899356146

Time tempest dict: 2020-01-22 12:00:00, lat tempest -20.5, lon tempest 69.0,  MSLP tempest: 100361.3, wind_mod tempest: 12.97528
Time netcdf: 2020-01-22T12:00:00.000000000, MSLP netcdf: 100268.65367267071, wind_mod netcdf: 17.014

KeyboardInterrupt: 

In [3]:
# Directory where the NetCDF files are located
fullres_data_directory = '/home/b/b382216/work/tc/fullres/IFS/tco1279-orca025-cycle3/'

# Directory with tempest (Stitch nodes) output track files
tempest_data_directory = '/home/b/b382216/work/tc/tmpdir/IFS/tco1279-orca025-cycle3/'

# Pattern to match NetCDF files
fullres_file_pattern = 'tempest_tracks_*.nc'

# Pattern to match tempest txt files
tempest_file_pattern = 'tempest_track_*.txt'

# Output directory for the new .txt files with fullres data
output_directory = '/home/b/b382216/work/tc/fullres/IFS/tco1279-orca025-cycle3/tempest_fullres_txt'


# Find and process all matching files
fullres_file_paths = sorted(glob.glob(os.path.join(fullres_data_directory, fullres_file_pattern)))
tempest_file_paths = sorted(glob.glob(os.path.join(tempest_data_directory, tempest_file_pattern)))

# List to store data arrays
data_arrays = []

# Initialize a list to store track headers
track_headers = []

# Define a function to parse the text data and separate tracks
def parse_text_file(file_path):
    data = []  # Create an empty list to store the data
    track_data = []  # List to store data for the current track
    track_header = None

    with open(file_path, 'r') as file:
        for line in file:
            parts = line.split()
            if not parts:
                continue  # Skip empty lines

            if parts[0] == 'start':
                if track_header is not None:
                    data.append((track_header, track_data))  # Add track header and data to the result
                track_header = {
                    'track_length': int(parts[1]),
                    'year': int(parts[2]),
                    'month': int(parts[3]),
                    'date': int(parts[4]),
                    'hour': int(parts[5])
                }
                track_data = []
            else:
                if track_header is not None:
                    entry = {
                        'lon_index': int(parts[0]),
                        'lat_index': int(parts[1]),
                        'longitude': float(parts[2]),
                        'latitude': float(parts[3]),
                        'year': int(parts[6]),
                        'month': int(parts[7]),
                        'date': int(parts[8]),
                        'hour': int(parts[9]),
                        'mslp': float(parts[4]),  # Assuming MSLP is in the 4th position
                        'wind_module': float(parts[5])  # Assuming wind module is in the 5th position
                    }
                    track_data.append(entry)

    if track_data:  # Add the last track header and data if not empty
        data.append((track_header, track_data))

    return data

# Loop through the NetCDF files and the corresponding text files
for fullres_file_path in (fullres_file_paths):
    date_string = fullres_file_path.split('_')[-1].split('.')[0]  # Extract the last part after the last '_' and before the '.nc'
    tempest_file_path = tempest_data_directory + "tempest_track_" + date_string + ".txt"
    # Open the NetCDF file
    dataset = xr.open_dataset(fullres_file_path)
    mslp = dataset['msl']
    u10 = dataset['10u']
    v10 = dataset['10v']
    time_nc = dataset['time']
    print("Opened tempest file:", tempest_file_path)
    print("Opened NetCDF file:", fullres_file_path)

    #compute wind module
    wind_mod= (u10**2+v10**2)**0.5
    # Parse the corresponding text file
    parsed_results = parse_text_file(tempest_file_path)
    updated_lines = []
    # Loop through parsed results and print variables at each time step
    for i, (track_header, result) in enumerate(parsed_results):
        print(f"Track {i + 1}:")

        # Print the track header information
        print(f"Track Length: {track_header['track_length']}, Year: {track_header['year']}, Month: {track_header['month']}, Date: {track_header['date']}, Hour: {track_header['hour']}")

        # Write the track header information to the new .txt file
        header_line = f"start {track_header['track_length']} {track_header['year']} {track_header['month']} {track_header['date']} {track_header['hour']}\n"
        updated_lines.append(header_line)

        for entry in result:
            lon = entry['longitude']
            lat = entry['latitude']
            year = entry['year']
            month = entry['month']
            date = entry['date']
            hour = entry['hour']
            mslp_tempest = entry['mslp']
            wind_mod_tempest = entry['wind_module']

            # create a time stamp with the date in the tempest file/dict

            time_stamp = datetime(year, month, date, hour, 0, 0)
            # open the netcdf file at 
            mslp_time = mslp.sel(time=time_stamp, method="nearest")
            mslp_value = np.min(mslp_time.sel(lat=slice(lat-2,lat+2), lon=slice(lon-2,lon+2)).values)
            wind_mod_time = wind_mod.sel(time=time_stamp, method="nearest")
            wind_mod_value = np.max(wind_mod_time.sel(lat=slice(lat-2,lat+2), lon=slice(lon-2,lon+2)).values)


            print(f"Time tempest dict: {time_stamp}, lat tempest {lat}, lon tempest {lon},  MSLP tempest: {mslp_tempest}, wind_mod tempest: {wind_mod_tempest}")
            print(f"Time netcdf: {time_nc.sel(time=time_stamp, method='nearest').values}, MSLP netcdf: {mslp_value}, wind_mod netcdf: {wind_mod_value}\n")

            entry['mslp'] = mslp_value
            entry['wind_module'] = wind_mod_value

            entry_line = f"{entry['lon_index']} {entry['lat_index']} {entry['longitude']} {entry['latitude']} {entry['mslp']} {entry['wind_module']} {entry['year']} {entry['month']} {entry['date']} {entry['hour']}\n"
            updated_lines.append(entry_line)

    filename = os.path.basename(tempest_file_path)
    new_filename = f"fullres_{filename}"  # Add "fullres_" as a prefix to the original filename
    new_file_path = os.path.join(output_directory, new_filename)
    with open(new_file_path, 'w') as new_file:
        new_file.writelines(updated_lines)
    print(f"Created {new_file_path}")
        

    dataset.close()

Opened tempest file: /home/b/b382216/work/tc/tmpdir/IFS/tco1279-orca025-cycle3/tempest_track_20200120-20200218.txt
Opened NetCDF file: /home/b/b382216/work/tc/fullres/IFS/tco1279-orca025-cycle3/tempest_tracks_20200120-20200218.nc
Track 1:
Track Length: 15, Year: 2020, Month: 1, Date: 23, Hour: 18
Time tempest dict: 2020-01-23 18:00:00, lat tempest -20.5, lon tempest 60.0,  MSLP tempest: 100381.9, wind_mod tempest: 13.46387
Time netcdf: 2020-01-23T18:00:00.000000000, MSLP netcdf: 100308.1419771099, wind_mod netcdf: 15.815569921179678

Time tempest dict: 2020-01-24 00:00:00, lat tempest -20.5, lon tempest 62.0,  MSLP tempest: 100146.6, wind_mod tempest: 14.5857
Time netcdf: 2020-01-24T00:00:00.000000000, MSLP netcdf: 99956.2009060352, wind_mod netcdf: 16.218624408233445

Time tempest dict: 2020-01-24 06:00:00, lat tempest -21.5, lon tempest 63.0,  MSLP tempest: 100399.2, wind_mod tempest: 13.37549
Time netcdf: 2020-01-24T06:00:00.000000000, MSLP netcdf: 100201.03295385485, wind_mod netcd