In [1]:
"""
Created on Tue Jan  9, 2024

THIS PROGRAM TAKES THE SYNOPTIC CLASSIFICATION FROM MIKE POOK'S 2012 PAPER
(IN MATLAB FORM), ISOLATES ALL THE ENTRIES OF 'cutoff low', SPLITS UP 
THE DATE SO THE YEAR, MONTH AND DAY ARE IN SEPARATE COLUMNS, AND OUTPUTS THE 
RESULTIING DATA  TO A .txt FILE.

@author: Nick
"""

# ========================== Import Modules ===============================
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.dates as dt
from scipy.io import loadmat

In [2]:
# ========================= Define Utility Functions ======================
def datefrompook(num):
    '''This function converts the callendar day number from Mike Pook's 2012 dataset (which was 
    computed in Matlab) to a date in the format of year, month, day. Note this returns the day on which
    the rain was reccorded, which needs to be offset by one to refer to the synoptic weather that delivered
    the rain.'''
    
    date_obj = dt.num2date(num-719530) # This makes the first number in Mike's dataset return
    # 1956, 3, 31, and incorporates an offset so the day before the rain event is returned. 

    y = date_obj.year
    m = date_obj.month
    d = date_obj.day

    return y,m,d

In [3]:
# ========================== I/O Parameters ================================
data_directory = '/home/561/nxg561/00_Tracking_Scheme_Comparison/Input_Data/P12/'
pookfile = 'synclassification_se.mat'

startyear = 1979
cutofflow_code = 1

In [4]:
# read in the pook data file with the scipy function for .mat files
pook_data = loadmat(data_directory + pookfile)
print(pook_data)

{'__header__': b'MATLAB 5.0 MAT-file, Platform: PCWIN, Created on: Mon Dec 13 09:47:57 2010', '__version__': '1.0', '__globals__': [], 'dntype': array([[714506],
       [714507],
       [714508],
       ...,
       [734075],
       [734076],
       [734077]], dtype=int32), 'syntype': array([[1],
       [0],
       [0],
       ...,
       [0],
       [3],
       [0]], dtype=int16), 'raintype': array([[12.25  ],
       [ 0.    ],
       [ 0.6   ],
       ...,
       [ 0.    ],
       [ 0.0375],
       [ 4.75  ]]), 'strtype': array([[array(['cutoff low'], dtype='<U10')],
       [array([], dtype='<U1')],
       [array(["E'ly trough"], dtype='<U11')],
       ...,
       [array([], dtype='<U1')],
       [array(['N stream'], dtype='<U8')],
       [array(["E'ly trough"], dtype='<U11')]], dtype=object)}


In [5]:
# Extract the useful columns and convert them to 1d arrays
datecol = np.array(pook_data['dntype'])
datecol_1d = np.squeeze(datecol).transpose()

# Apply the datefrompook function to each entry in the datecol_1d column, and split the year, month and day values into 
# separate lists. 
year_list = [datefrompook(x)[0] for x in datecol_1d]
month_list = [datefrompook(x)[1] for x in datecol_1d]
day_list = [datefrompook(x)[2] for x in datecol_1d]

# The rain and synoptic columns can just go as-is. 
raincol = np.array(pook_data['raintype'])
raincol_1d = np.squeeze(raincol).transpose()

syncol = np.array(pook_data['syntype'])
syncol_1d = np.squeeze(syncol).transpose()

# Arrange the columns into a dictionary, which is easier to convert into a pandas dataframe.
pook_dict = {'year':year_list, 'month':month_list, 'day':day_list, 'synoptic':syncol_1d, 'rain':raincol_1d}

# recombine the useful columns into a pandas dataframe
pook_pd = pd.DataFrame(pook_dict)
pook_pd


Unnamed: 0,year,month,day,synoptic,rain
0,1956,3,31,1,12.2500
1,1956,4,1,0,0.0000
2,1956,4,2,0,0.6000
3,1956,4,3,0,0.1875
4,1956,4,4,0,0.0000
...,...,...,...,...,...
19567,2009,10,26,0,0.0000
19568,2009,10,27,0,0.0000
19569,2009,10,28,0,0.0000
19570,2009,10,29,3,0.0375


In [6]:
# Get the cutoff lows for the year 1979 onwards
output_pd = pook_pd.loc[(pook_pd['year'] >= startyear) & (pook_pd['synoptic'] == cutofflow_code)] 
output_pd

Unnamed: 0,year,month,day,synoptic,rain
8404,1979,4,4,1,0.4000
8405,1979,4,5,1,0.4250
8418,1979,4,18,1,0.4500
8419,1979,4,19,1,0.1250
8435,1979,5,5,1,0.7750
...,...,...,...,...,...
19552,2009,10,11,1,0.0125
19553,2009,10,12,1,1.7875
19554,2009,10,13,1,6.5750
19556,2009,10,15,1,0.5750


In [7]:
# save output
output_pd.to_csv(data_directory + 'pook12lows.txt', header=None, index=None, sep=',', mode='w')