In [7]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Jan  5 15:56:07 2023
This code takes a .csv file of identified cyclones from the Melbourne Uni 
tracking scheme, and outputs a .txt file presenting the cyclone information in
a format consistent with the Grosfeld et al. 2021 tracking scmeme. 
Column headings are TrackID, Year, Month, Day, Hour, Tstep, Lat, Lon. 
@author: nick
"""

# ======================== Import Modules ======================================
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.dates as dt

In [8]:
# ========================== Define Utility Functions =========================

def netcdf_time_scale(y,m,d,h,y0):
    
    '''This function takes a date in y,m,d,h format and returns the value of the
    hours since jan 1, y0 (the time convention of reanalysis). Inputs are integers. 
    Output is a float.''' 
    
    d0 = datetime(y0,1,1,0) # the datetime object representing 12am, jan 1 of the baseline year of the reanalysis
    d = datetime(y,m,d,h) # the datetime object for the day and time being tested
    
    t = 24 * (dt.date2num(d) - dt.date2num(d0)) # number of hours since 12am, jan 1 of the baseline year
    # dt.date2num gives the number of days since the python datetime time origin. 
    
    return t

In [3]:
# ========================== Set Initial Parameters ===========================

dataset = 'era5'
y0 = 1900 # Baseline year to compute the timescale value.

# Time and Space boundaries for the subset
years = (19790000, 20091100) # (19790400, 20091100) to compare against P12, (19790000, 20180000) for else.  
                            # MU tracking scheme reccords dates as yyyymmdd.
lats = (-30, -45) # (-30, -45) to compare against P12, (-25, -55) for else, and (-30, -45) for months and years plots
lons = (125, 147.5) # (125, 147.5) to compare against P12, (100, 160) for else, and (-125, -147.5) for months and years plots

# Data I/O parameters
data_path = '/home/561/nxg561/00_Tracking_Scheme_Comparison/Input_Data/Acacia_Lows/'

input_filename = 'UM_lows_ERA5_500hPa_proj100_rad2cv2_19592022_fixes.csv'
output_filename = 'closed_lows_mu_era5_2009_sea.txt'


In [4]:
# ========================== Read and Extract Data ============================
input_lows_pd = pd.read_csv(data_path + input_filename, delimiter = ',', header = 0)

input_lows_pd.columns = [' ', 'ID', 'Fix', 'Date', 'Time', 'Open', 'Lon', 'Lat', 'MSLP', 'CV', 'Depth', 'Radius', 'Up', 'Vp', 'Location']

# remove open troughs and events outside the range of lats, lons and years
closed_lows_pd = input_lows_pd.loc[(input_lows_pd['Open'] == 0) & 
                                   (input_lows_pd['Date'] > years[0]) & # These are just set to < and > because the time
                                   (input_lows_pd['Date'] < years[1]) & # is given by yyyymmdd
                                   (input_lows_pd['Lat'] <= lats[0]) &
                                   (input_lows_pd['Lat'] >= lats[1]) &
                                   (input_lows_pd['Lon'] >= lons[0]) &
                                   (input_lows_pd['Lon'] <= lons[1])]

# These columns from the MU lows are already numerical, so they just need to be
# converted to columns of a numpy array. 
track_data = closed_lows_pd.ID.to_numpy()
lat_data = closed_lows_pd.Lat.to_numpy()
lon_data = closed_lows_pd.Lon.to_numpy()
cv_data = closed_lows_pd.CV.to_numpy()
depth_data = closed_lows_pd.Depth.to_numpy()
radius_data = closed_lows_pd.Radius.to_numpy()

# these columns need to be extracted as lists of strings to work with. 
date_um = list(closed_lows_pd.Date)
hour_um = list(closed_lows_pd.Time)

In [5]:
# ======================== Convert Date and Time Data =========================

# This section loops through the date_um and hour_um lists and computes the 
# y,m,d,h  values with the netcdf_time_scale function defined at the beginning
# of the program. 

ld = np.shape(closed_lows_pd)[0]

# Set up a blank array to store the output columns in. 
new_array = np.zeros((ld,11))

looprange = range(0,ld)

for r in looprange:

    # extract the y,m,d,h from the date data.
    y = str(date_um[r])[:4]
    m = str(date_um[r])[4:6]
    d = str(date_um[r])[6:]
    h = hour_um[r][:-3]
    
    # Write the processed data into the new array. 
    new_array[r,1] = int(y)
    new_array[r,2] = int(m)
    new_array[r,3] = int(d)
    new_array[r,4] = int(h)

    # Compute a reanalysis timestep based on the era5 baseline year of 1900
    new_array[r,5] = netcdf_time_scale(int(y), int(m), int(d), int(h), y0)

In [6]:
# ======================== Add Final Data and Save Output =====================
   
new_array[:,0] = track_data
new_array[:,6] = lat_data
new_array[:,7] = lon_data
new_array[:,8] = cv_data
new_array[:,9] = depth_data
new_array[:,10] = radius_data

# Save output
fname = data_path + output_filename
np.savetxt(fname, new_array, delimiter = ',')  


In [6]:
# This code computes the month subsets. (Not used for the final paper)

months = (4,10) # (4, 10) to compare against P012, (1,12) for else

# Data I/O parameters
save_path = '/home/561/nxg561/00_Tracking_Scheme_Comparison/Input_Data/Acacia_Lows/'

output_filename = 'closed_lows_mu_era5_2017_nov-mar.txt'

# This line selects the subset of months
#im = (new_array[:,2] >= months[0]) & (new_array[:,2] <= months[1])
im = np.logical_not((new_array[:,2] >= months[0]) & (new_array[:,2] <= months[1]))

# Save output
output_array = new_array[im, :]
np.savetxt(save_path + output_filename, output_array, delimiter = ',')