In [1]:
%matplotlib inline
import os
from os.path import join as pjoin
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from datetime import datetime
from glob import glob
from Utilities.dynarray import DynamicRecArray as recarray
from Utilities.metutils import convert

# Import widgets for interactive notebook
from ipywidgets import interact, fixed, interactive
from IPython.html import widgets
from IPython.display import display



## Set up data formats for reading csv files

In [22]:
def parseTime(datestr, timestr):
    return datetime.strptime("{0} {1}".format(datestr, timestr), "%Y%m%d %H%M")

METAR_DTYPE=[("stnWMO", '|S5'), ("stnCode", "|S4"), ("dtDate", "|S16"), ("dtTime", "|S8"), 
       ("stnLat", "f8"), ("stnLon", "f8"), ("winddir", "f8"), ("windspeed", "f8"), 
       ("tempDB", "f8"), ("dewpt", "f8"), ("QNH", "f8"), ("RF9am", "f8"), ("RF10min", "f8"),
       ("vis", "f8"), ("Avis", "f8"), ("gust", "f8")]
# METAR_DTYPE=[ ("dtDateTime", '|S5'),("stnWMO", '|S5'), ("stnCode", "|S4"), 
#        ("stnLat", "f8"), ("stnLon", "f8"), ("winddir", "f8"), ("windspeed", "f8"), 
#        ("tempDB", "f8"), ("dewpt", "f8"), ("QNH", "f8"), ("RF9am", "f8"), ("RF10min", "f8"),
#        ("vis", "f8"), ("Avis", "f8"), ("gust", "f8")]
METAR_NAMES = [field[0] for field in METAR_DTYPE]
METAR_NAMES_MOD = ['stnWMO', 'dtDateTime', 'stnCode',  'stnLat', 'stnLon', 'winddir', 'windspeed', 'tempDB', 
                   'dewpt', 'QNH', 'RF9am', 'RF10min', 'vis', 'Avis', 'gust']
METAR_COLS = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
METAR_CONV = {'stnCode': lambda s: s.strip('"'),
              'windspeed': lambda s: convert(s, "kts", "mps"),
              'gust': lambda s: convert(s, "kts", "mps")}

SYNOP_DTYPE = [('stnWMO', '|S5'), ('stnName', '|S31'), ('stnCode', '|S4'), ('dtDate', '|S16'), 
               ('dtTime', '|S16'), ('stnLat', 'f8'), ('stnLon', 'f8'), ('winddir', 'f8'), 
               ('windspeed', 'f8'), ('vis', 'f8'), ('presentWxCode', '|S8'), ('pastWxCode', 'f8'),
               ('mslp', 'f8'), ('tempDB', 'f8'), ('dewpt', 'f8')]
SYNOP_NAMES = [field[0] for field in SYNOP_DTYPE]
SYNOP_NAMES_MOD = ['stnWMO', 'dtDateTime', 'stnName', 'stnCode', 'stnLat', 'stnLon', 'winddir', 
                   'windspeed', 'vis', 'presentWxCode', 'pastWxCode', 'mslp', 'tempDB', 'dewpt']
SYNOP_COLS = [0, 1, 2, 3, 4, 5, 6, 10, 11, 12, 13, 14, 16, 21, 22]
SYNOP_CONV = {'stnName': lambda s: s.strip('"').rstrip(' '),
              'stnCode': lambda s: s.strip('"').rstrip(' '),
              'presentWxCode': lambda s: s.strip('"').rstrip(' ')}

inputPath = "B:/CHARS/B_Wind/data/raw/obs/axf"
outputPath = "B:/CHARS/B_Wind/data/derived/obs/metar/TCDebbie"

In [3]:
Debbie_stns = ['94294', '94356', '94360', '94365', '94366', '19367', '94368', '94369', 
               '94371', '95295', '95297', '95367']
Debbie_stns_dict = {94294:'Townsville Amo', 94356:'Charters Towers Airport', 94360:'Collinsville', 
                94365:'Proserpine Airport', 94366:'Bowen Airport', 19367:'Mackay Mo', 94368:'Hamilton Island', 
                94369:'St Lawrence Post Office', 94371:'Creal Reef', 95295:'Ayr Dpi Research Station', 
                95297:'Hook Reef Aws', 95367:'Mackay Airport'}
# Stations in the area of TC Debbie's landfall. Taken from ArcGIS map. Approximately cover the extent of the TCRM
# genreated wind field

In [70]:
metarbasename = {"IDY03101.2017032*.axf":"3101", "IDY03100.2017032*.axf":"3100"}

for db in metarbasename:
    metardata = {}

    filelist = glob(pjoin(inputPath, db))
    for f in filelist:
        arr = pd.read_csv(f, skiprows = 2, header=None, usecols=METAR_COLS, names = METAR_NAMES, 
                          dtype = METAR_DTYPE, parse_dates = {'dtDateTime':[2, 3]}, na_values = -9999.0)
        cols = arr.columns.tolist()
        cols[0], cols[1] = cols[1], cols[0]
        arr = arr[cols]
        arr = arr.to_records()
        for i in range(len(arr)):
            key = "{0}".format(arr["stnWMO"][i])
            if metardata.has_key(key):
                metardata[key] = np.append(metardata[key], arr[i])
            else:
                metardata[key] = recarray(METAR_DTYPE)
                metardata[key] = arr[i]
    metarfmt = ['%s', '%s', '%s', '%6.2f', '%6.2f', '%5.1f', 
               '%5.1f', '%5.1f', '%5.1f', '%6.1f', '%5.1f', '%5.1f', 
               '%7.1f', '%7.1f', '%6.1f']
    for key in metardata.keys():
        if key in Debbie_stns:
            fname = pjoin(outputPath, "{0}_{1}.csv".format(key, metarbasename[db]))
            df = pd.DataFrame(metardata[key])
            del df['index']
            #dfdup = df.drop_duplicates(subset = ['dtDateTime'])
            da = df.to_records(index=False)
            np.savetxt(fname, da, fmt=metarfmt, delimiter=',') # NB, the numpy indexer is being written out?!?!

In [12]:
synopbasename = "IDY03000.2017032*.axf"
synopdata = {}

filelist = glob(pjoin(inputPath, db))
for f in filelist:
    arr = pd.read_csv(f, skiprows = 2, header=None, usecols = SYNOP_COLS, names = SYNOP_NAMES, 
                      dtype = SYNOP_DTYPE, na_values = -9999.0, parse_dates = {'dtDateTime':[3, 4]})
    print arr
    cols = arr.columns.tolist()
    cols[0], cols[1] = cols[1], cols[0]
    arr = arr[cols]
    arr = arr.to_records()
    for i in range(len(arr)):
        key = "{0}".format(arr["stnWMO"][i])
        if synopdata.has_key(key):
            synopdata[key] = np.append(synopdata[key], arr[i])
        else:
            synopdata[key] = recarray(SYNOP_DTYPE)
            synopdata[key] = arr[i]
synopfmt = ['%s', '%s', '%s', '%s', '%6.2f', '%6.2f', '%5.1f', 
           '%5.1f', '%5.1f', '%5.1f', '%6.1f', '%5.1f', '%5.1f', 
           '%7.1f', '%7.1f', '%6.1f']
for key in synopdata.keys():
    if key in Debbie_stns:
        fname = pjoin(outputPath, "{0}_3000.csv".format(key))
        df = pd.DataFrame(synopdata[key])
        #dfdup = df.drop_duplicates(subset = ['dtDateTime'])
        da = df.to_records(index=False)
        np.savetxt(fname, da, fmt=synopfmt, delimiter=',') # NB, the numpy indexer is being written out?!?!

IndexError: list index out of range

In [None]:
# synopbasename = "IDY03000.2017032*.axf"
# synopdata = {}

# filelist = glob(pjoin(inputPath, synopbasename))
# for f in filelist:
#     try:
#         arr = np.genfromtxt(f, dtype=SYNOP_DTYPE, delimiter=',', skip_header=2,
#                             skip_footer=1, usecols=SYNOP_COLS, names=SYNOP_NAMES,
#                             autostrip=True, converters=SYNOP_CONV)
#     except IndexError:
#         print f
#     if arr.size == 1:
#         key = "{0}".format(arr["stnWMO"])
#         if synopdata.has_key(key):
#             synopdata[key] = np.append(synopdata[key], arr)
#         else:
#             synopdata[key] = recarray(SYNOP_DTYPE)
#             synopdata[key] = arr
#     else:
#         for i in range(len(arr)):
#             key = "{0}".format(arr["stnWMO"][i])
#             if synopdata.has_key(key):
#                 synopdata[key] = np.append(synopdata[key], arr[i])
#             else:
#                 synopdata[key] = recarray(SYNOP_DTYPE)
#                 synopdata[key] = arr[i]
# synopfmt = ['%s', '%s', '%s', '%s', '%s', '%6.2f', '%6.2f', '%5.1f', 
#             '%5.1f', '%d', '%s', '%d', '%6.1f', '%4.1f', '%4.1f']
# for key in synopdata.keys():
#     if key in Debbie_stns:
#         fname = pjoin(outputPath, "{0}_3000.csv".format(key))
#         try:
#             df = pd.DataFrame(synopdata[key])
#             dfdup = df.drop_duplicates(['dtDate', 'dtTime'])
#             da = dfdup.to_records(index=False)
#             np.savetxt(fname, da, fmt=synopfmt, delimiter=',')
#         except IndexError:
#             print key

In [23]:
def determineFormat(file):
    '''
    Uses the csv file name of the input file to determine which format (metar or synop), the file is, and therefore, 
    where the various variables are located within the file.
    '''
    name_parts = file.split('_')
    name = name_parts[2]
    name = name[:-4]
    return name
def getHeaders(file):
    name = determineFormat(file)
    if name == '3100' or name == '3101':
        column_names = METAR_NAMES_MOD
    elif name == '3000':
        column_names = SYNOP_NAMES_MOD
    return column_names

In [24]:
def plotcurve(file):
    column_names = getHeaders(file)
    data = pd.read_csv(file, header=None, names = column_names)
    data['dtDateTime'] = pd.to_datetime(data['dtDateTime'])

    # Set up the figure
    fig, (ax0, ax1) = plt.subplots(2, 1, figsize=(16,12), sharex=True)
    
#     if name == '3100' or name == '3101':  
#         ax0.plot(data[7])# wind speed
#         ax1.plot(data[10]) # Pressure
#     elif name == '3000':
#         ax0.plot(data[8])# wind speed
#         ax1.plot(data[12]) # Pressure
    ax0.plot(data['dtDateTime'], data['windspeed'])
    ax1.plot(data['dtDateTime'], data['QNH'])
    ax1.set_ylim((950,1020))
    fig.tight_layout() 

In [None]:
avail_files = glob(outputPath + '/*')

In [None]:
w = interactive(plotcurve, file=widgets.Dropdown(options=avail_files))
display(w)

## Read in formatted csv files, and find the maximum gust for each file

We will use this data to correct the local wind field from TCRM

In [80]:
filelist = glob(outputPath + '\*')
max_gust_obs = pd.DataFrame()
for f in filelist:
    name = determineFormat(f)
    if name == '3100': # or name == '3101':
        data = pd.read_csv(f, names = METAR_NAMES_MOD)
        max_gust = data['gust'].idxmax()
        max_gust_obs = max_gust_obs.append(data.ix[max_gust], ignore_index = True)

In [81]:
max_gust_obs.sort_values('stnLat')

Unnamed: 0,Avis,QNH,RF10min,RF9am,dewpt,dtDateTime,gust,stnCode,stnLat,stnLon,stnWMO,tempDB,vis,winddir,windspeed
3,1300.0,996.5,1.8,82.2,24.9,20170327 1728,50.0,"""YB",-21.17,149.18,95367.0,25.4,,130.0,38.0
0,,,,,,20170328 0531,80.0,"""CR",-20.53,150.38,94371.0,26.0,,20.0,23.0
4,900.0,969.2,2.0,76.2,24.5,20170328 0300,89.0,"""YB",-20.49,148.56,94365.0,24.7,,130.0,59.0
5,200.0,968.1,1.6,15.4,36.9,20170328 0025,142.0,"""YB",-20.37,148.95,94368.0,36.9,,310.0,97.0
2,,,0.0,0.2,20.9,20170328 0430,44.0,"""AY",-19.62,147.38,95295.0,28.6,,250.0,26.0
1,10000.0,997.7,0.0,3.2,21.6,20170328 0800,38.0,"""YB",-19.25,146.77,94294.0,29.0,,260.0,22.0


### Convert BoM wind field

kts to m/s AND 3s gust to 0.2s gust

In [96]:
max_gust_obs['gust'] = max_gust_obs['gust'] * 0.514444 # kts to m/s
max_gust_obs['gust_conv'] = max_gust_obs['gust'] * (0.0006 * max_gust_obs['gust'] + 1.1105) # 3s to 0.2 s

In [97]:
max_gust_obs

Unnamed: 0,Avis,QNH,RF10min,RF9am,dewpt,dtDateTime,gust,stnCode,stnLat,stnLon,stnWMO,tempDB,vis,winddir,windspeed,gust_conv
0,,,,,,20170328 0531,41.15552,"""CR",-20.53,150.38,94371.0,26.0,,20.0,23.0,46.719471
1,10000.0,997.7,0.0,3.2,21.6,20170328 0800,19.548872,"""YB",-19.25,146.77,94294.0,29.0,,260.0,22.0,21.938317
2,,,0.0,0.2,20.9,20170328 0430,22.635536,"""AY",-19.62,147.38,95295.0,28.6,,250.0,26.0,25.444183
3,1300.0,996.5,1.8,82.2,24.9,20170327 1728,25.7222,"""YB",-21.17,149.18,95367.0,25.4,,130.0,38.0,28.961482
4,900.0,969.2,2.0,76.2,24.5,20170328 0300,45.785516,"""YB",-20.49,148.56,94365.0,24.7,,130.0,59.0,52.102604
5,200.0,968.1,1.6,15.4,36.9,20170328 0025,73.051048,"""YB",-20.37,148.95,94368.0,36.9,,310.0,97.0,84.325062


In [98]:
outputPath_obs = "B:/CHARS/B_Wind/data/derived/tc/events/bsh132016/Observations/BoM_max_gust.csv"
max_gust_obs.to_csv(outputPath_obs, sep = ',', columns = ['dtDateTime', 'stnLat', 'stnLon', 'stnWMO', 'gust_conv'],
                   index = False)