In [None]:
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import pandas as pd
from pathlib import Path
import re

import metpy.calc as mcalc
import metpy.units as metunits
# local module
import mypaths

import json

from ipywidgets import interact
from tqdm import tqdm_notebook as tqdm

In [None]:
with open('weatherpack_variable_aliases.json', 'r') as fj:
    vrbl_aliases = json.load(fj)
vrbl_aliases

In [None]:
wpk_usage = pd.read_csv('weatherpack_usage.txt',
                        sep='\s+',
                        na_values='NA',
                        parse_dates=['date'],
                        index_col='date',
#                         dtype=dict(wpk2=str),
                        ).fillna('')
wpk_usage.head()

In [None]:
def wpk_latlon_parser(s):
    latlon_re = re.compile(r'''
# Latitude part
(?P<lat_hem>[NS])\s*
(?P<lat_deg>[0-9]{2})\s*
(?P<lat_min>[0-9]{1,2}\.[0-9]{3})\s*

# Longitude part
(?P<lon_hem>[EW])\s*
(?P<lon_deg>[0-9]{3})\s*
(?P<lon_min>[0-9]{1,2}\.[0-9]{3})''', re.X)
    
    m = re.match(latlon_re, s)
    if m:
        if m.group('lat_hem') == 'S':
            lat_factor = -1
        else:
            lat_factor = 1

        if m.group('lon_hem') == 'W':
            lon_factor = -1
        else:
            lon_factor = 1

        lat = lat_factor * (float(m.group('lat_deg')) + float(m.group('lat_min')) / 60)
        lon = lon_factor * (float(m.group('lon_deg')) + float(m.group('lon_min')) / 60)
    lat, lon = np.nan, np.nan
    
    return lat, lon

In [None]:
# time_range = (pd.date_range(start=date,
#                             freq='T',
#                             end=date+timedelta(hours=23, minutes=59, seconds=59))
#               .to_series()
#               .to_frame(name='time'))

In [None]:
# inputdir = mypaths.wpk_dir / '2_Leg' / 'TRUEWIND'

In [None]:
# fname = inputdir / f'Wpk_st04@{date:%Y_%m_%d}.txt'
# print(fname.exists())
# # fname = inputdir / f'data_3_{date:%Y%m%d_%H}.log'

In [None]:
def interp_dataframe_time(df, date, freq='1T', end='auto'):
    if end == 'auto':
        end = date + timedelta(hours=23, minutes=59, seconds=59)
    time_range = (pd.date_range(start=date,
                                freq=freq,
                                end=end)
                      .to_series()
                      .to_frame(name='time'))

    labels = time_range.index
    df = (pd.concat([df, time_range])
          .sort_index()
          .interpolate(method='values', limit=1)
          .drop('time', axis=1))
    df.index = df.index.rename('time')
    df = df.loc[df.index.intersection(labels)]
    return df[~df.index.duplicated(keep='first')]

In [None]:
def read_wpk_daily(topdir, date, wpk_id):
    wpk_id = str(wpk_id)
    assert wpk_id in ['2', '4'], 'Works only for WeatherPacks  No. 2 or 4'
    
    df = pd.DataFrame()
    if wpk_id == '2':
        fname = topdir / f'Wpk_st0{wpk_id}@{date:%Y_%m_%d}.txt'
        if fname.exists():
            df = pd.read_csv(fname, parse_dates=[[1, 2]], index_col=0,
                             date_parser=lambda x: datetime.strptime(x, '%y/%m/%d %H:%M:%S'))
            # df.index.rename('DateTime', inplace=True)
            df[['latitude', 'longitude']] = (df['Ship position']
                                             .map(wpk_latlon_parser, na_action='ignore')
                                             .apply(pd.Series)
                                             .rename(mapper={0: 'latitude', 1: 'longitude'}, axis=1))
            df = df.drop(labels=['Unit ID', 'Ship position'], axis=1)
    elif wpk_id == '4':
        fname = topdir / f'AR{date:%y%m%d}.00{wpk_id}'
        if fname.exists():
            df = pd.read_csv(fname, skiprows=1, sep='\t', parse_dates=[['date', 'time']], index_col='date_time',
                             date_parser=lambda x: datetime.strptime(x, '%y/%m/%d %H:%M:%S'))        
    
    if len(df) > 0:
        # Convert wind speed and direction to u and v components
        df = add_wind_components(df)
        # Interpolate to minute time intervals
        df = interp_dataframe_time(df, date)
    return df

In [None]:
def add_wind_components(df):
    for alias in vrbl_aliases['ws']:
        try:
            wspd = df[alias]
        except KeyError:
            pass
    for alias in vrbl_aliases['wd']:
        try:
            wdir = df[alias]
        except KeyError:
            pass
        
    df['u'], df['v'] = mcalc.get_wind_components(wspd.values * metunits.units('m/s'),
                                                 wdir.values * metunits.units('degrees'))
    return df

In [None]:
def read_wpk_hourly(topdir, date, wpk_id):
    def date_parser(s):
        return datetime.strptime(s[:-4], '%Y-%m-%d %H:%M:%S')
        
    wpk_id = str(wpk_id)
    assert wpk_id in ['3', '4'], 'Works only for WeatherPacks  No. 3 or 4'
    
    # Read (raw?) data stored in hourly files and concatenate into a DataFrame for the whole day
    df = pd.DataFrame()
    for h in range(24):
        fname = topdir / wpk_id / f'{date:%Y}' / f'{date:%m}' / f'{date:%d}' / f'data_{wpk_id}_{date:%Y%m%d}_{h:02d}.log'
        time_col_name = ' zeno_date zeno_time zeno_timezone'
        if fname.exists():
            df_next = pd.read_csv(fname,
                                  error_bad_lines=False, warn_bad_lines=False,
                                  index_col=time_col_name,
                                  parse_dates=[time_col_name],
                                  date_parser=date_parser)
            df = pd.concat([df, df_next])

    if len(df) > 0:
        # Convert wind speed and direction to u and v components
        df = add_wind_components(df)
        # Interpolate to minute time intervals
        df = interp_dataframe_time(df, date)
    return df

In [None]:
# date = pd.datetime(2018, 2, 6)

In [None]:
wpk_usage = wpk_usage.applymap(lambda x: 't,rh,ws,wd,p,sr,u,v')

In [None]:
df_full = pd.DataFrame()

for date in tqdm(wpk_usage.index):
    wpk_vars = wpk_usage.loc[date]
    data = dict()

    if wpk_vars.wpk2:
        vrbls = wpk_vars.wpk2.split(',')
        topdir = mypaths.wpk_dir / 'WP02'
        df = read_wpk_daily(topdir, date, '2')

        for vrbl in vrbls:
            for alias in vrbl_aliases[vrbl]:
                try:
                    data[vrbl+'_wpk2'] = df[alias]
                except KeyError:
                    pass

    if wpk_vars.wpk3:
        df = read_wpk_hourly(mypaths.wpk_dir, date, '3')
        vrbls = wpk_vars.wpk3.split(',')
        if len(df) > 0:
            for vrbl in vrbls:
                for alias in vrbl_aliases[vrbl]:
                    try:
                        data[vrbl+'_wpk3'] = df[alias]
                    except KeyError:
                        pass

    if wpk_vars.wpk4:
        vrbls = wpk_vars.wpk4.split(',')
        if date < datetime(2018, 2, 27):
            df = read_wpk_hourly(mypaths.wpk_dir, date, '4')
        else:
            df = read_wpk_daily(mypaths.wpk_dir / '2_Leg' / 'FORESTAR', date, '4')
        if len(df) > 0:
            for vrbl in vrbls:
                for alias in vrbl_aliases[vrbl]:
                    try:
                        data[vrbl+'_wpk4'] = df[alias]
                    except KeyError:
                        pass
                    
    df_full = pd.concat([df_full, pd.DataFrame(data)], sort=True)
# df_full.interpolate(method='time', inplace=True)

In [None]:
# %matplotlib ipympl

In [None]:
# plt.close('all')

In [None]:
# fig, ax = plt.subplots()
# @interact(v=df_full.columns, day=(0, len(wpk_usage)))
# def fun(v, day=0):
#     ax.cla()
#     df_full[v].plot(ax=ax, linewidth=2, marker='.')
#     ax.set_xlim(wpk_usage.index[0]+timedelta(days=day), wpk_usage.index[0]+timedelta(days=day+1))
#     fig.draw()

In [None]:
import ipywidgets as widgets

In [None]:
%matplotlib ipympl

In [None]:
# df_flag = df_full.rename(columns={k: k+'_flag' for k in df_full.columns.values}).copy().applymap(lambda x: 0)
# df_flag = pd.read_csv(sorted(Path('.').glob('weatherpack_data_flag_*.csv'))[-1], index_col='time', parse_dates=['time'])
df_flag = pd.read_csv('weatherpack_data_flag.csv', index_col='time', parse_dates=['time'])
df_flag.tail()

In [None]:
plt.close('all')
fig, ax = plt.subplots(figsize=(12, 6))
tt = [wpk_usage.index[0], wpk_usage.index[-1]]

dd = widgets.Dropdown(
    options=df_full.columns,
    description='Variable:',
    disabled=False,
)

w = widgets.IntRangeSlider(
    value=[0, 24 * 60],
    min=0,
    max=24 * 60,
    step=1,
    description='Hours',
    orientation='horizontal',
    readout=True,
    readout_format='d',
)
slider = widgets.IntSlider(
    description='Days',
    value=0,
    step=1,
    min=0,
    max=len(wpk_usage)
)

button = widgets.Button(description="Click Me!")

caption = widgets.Label(value='Blah')

debug_view = widgets.Output(layout={'border': '1px solid black'})

@debug_view.capture(clear_output=True)
def plotter(vrbl, tdelta0, tdelta1):
    ax.cla()
    ax.set_title(vrbl, loc='left')
    tt[0] = wpk_usage.index[0] + tdelta0
    tt[1] = wpk_usage.index[0] + tdelta1
    caption.value = f'{df_full[vrbl].loc[tt[0]:tt[1]].min():2.1f}\n{df_full[vrbl].loc[tt[0]:tt[1]].max():2.1f}'
    df_full[vrbl].loc[tt[0]:tt[1]].plot(ax=ax, linewidth=0, marker='.')
    df_full[dd.value][df_flag[dd.value+'_flag']==1].loc[tt[0]:tt[1]].plot(ax=ax, linestyle='', marker='x', color='r')
#     fig.tight_layout()

# debug_view = widgets.Output(layout={'border': '1px solid black'})

@debug_view.capture(clear_output=True)
def handle_dropdown_change(change):
    td0 = timedelta(days=slider.value, minutes=w.value[0])
    td1 = timedelta(days=slider.value, minutes=w.value[1])
    plotter(change.new, td0, td1)

def handle_slider_change(change):
    td0 = timedelta(days=change.new, minutes=w.value[0])
    td1 = timedelta(days=change.new, minutes=w.value[1])
    plotter(dd.value, td0, td1)
    
def handle_range_change(change):
    td0 = timedelta(days=slider.value, minutes=change.new[0])
    td1 = timedelta(days=slider.value, minutes=change.new[1])
    plotter(dd.value, td0, td1)
    
@debug_view.capture(clear_output=True)
def on_button_clicked(b):
    df_flag[dd.value+'_flag'].loc[tt[0]:tt[1]] = 1
    df_full[dd.value].loc[tt[0]:tt[1]].plot(ax=ax, linestyle='', marker='x', color='r')

slider.observe(handle_slider_change, names='value')
w.observe(handle_range_change, names='value')
# slider.observe(handle_range_change, names='value')
button.on_click(on_button_clicked)
dd.observe(handle_dropdown_change, names='value')

display(widgets.HBox([dd, w, slider, button, caption]))

In [None]:
tt

In [None]:
df_full[dd.value][df_flag[dd.value+'_flag']==1].loc[tt[0]:tt[1]]

In [None]:
plt.close('all')

In [None]:
df_full.shape

In [None]:
# df_flag.to_csv(f'./weatherpack_data_flag_final.csv')

In [None]:
debug_view

In [None]:
# df_flag.to_csv(f'./weatherpack_data_flag_{datetime.now():%Y%m%d%H%M%S}.csv')