In [1]:
%matplotlib ipympl

In [13]:
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import pandas as pd
from pathlib import Path
import re

import metpy.calc as mpcalc
from metpy.units import units
# local module
import mypaths

import json

from ipywidgets import interact
from tqdm import tqdm_notebook as tqdm

In [3]:
def wpk_latlon_parser(s):
    latlon_re = re.compile(r'''
# Latitude part
(?P<lat_hem>[NS])\s*
(?P<lat_deg>[0-9]{2})\s*
(?P<lat_min>[0-9]{1,2}\.[0-9]{3})\s*

# Longitude part
(?P<lon_hem>[EW])\s*
(?P<lon_deg>[0-9]{3})\s*
(?P<lon_min>[0-9]{1,2}\.[0-9]{3})''', re.X)
    
    m = re.match(latlon_re, s)
    if m:
        if m.group('lat_hem') == 'S':
            lat_factor = -1
        else:
            lat_factor = 1

        if m.group('lon_hem') == 'W':
            lon_factor = -1
        else:
            lon_factor = 1

        lat = lat_factor * (float(m.group('lat_deg')) + float(m.group('lat_min')) / 60)
        lon = lon_factor * (float(m.group('lon_deg')) + float(m.group('lon_min')) / 60)
    lat, lon = np.nan, np.nan
    
    return lat, lon

In [4]:
# time_range = (pd.date_range(start=date,
#                             freq='T',
#                             end=date+timedelta(hours=23, minutes=59, seconds=59))
#               .to_series()
#               .to_frame(name='time'))

In [5]:
# inputdir = mypaths.wpk_dir / '2_Leg' / 'TRUEWIND'

In [6]:
# fname = inputdir / f'Wpk_st04@{date:%Y_%m_%d}.txt'
# print(fname.exists())
# # fname = inputdir / f'data_3_{date:%Y%m%d_%H}.log'

In [7]:
def interp_dataframe_time(df, date):
    time_range = (pd.date_range(start=date,
                                freq='T',
                                end=date+timedelta(hours=23, minutes=59, seconds=59))
                      .to_series()
                      .to_frame(name='time'))

    labels = time_range.index
    df = (pd.concat([df, time_range])
          .sort_index()
          .interpolate(method='values', limit=1)
          .drop('time', axis=1))
    df.index = df.index.rename('time')
    df = df.loc[df.index.intersection(labels)]
    return df[~df.index.duplicated(keep='first')]

In [8]:
def read_wpk_daily(topdir, date, wpk_id):
    wpk_id = str(wpk_id)
    assert wpk_id in ['2', '4'], 'Works only for WeatherPacks  No. 2 or 4'
    
    df = pd.DataFrame()
    if wpk_id == '2':
        fname = topdir / f'Wpk_st0{wpk_id}@{date:%Y_%m_%d}.txt'
        if fname.exists():
            df = pd.read_csv(fname, parse_dates=[[1, 2]], index_col=0,
                             date_parser=lambda x: datetime.strptime(x, '%y/%m/%d %H:%M:%S'))
            # df.index.rename('DateTime', inplace=True)
            df[['latitude', 'longitude']] = (df['Ship position']
                                             .map(wpk_latlon_parser, na_action='ignore')
                                             .apply(pd.Series)
                                             .rename(mapper={0: 'latitude', 1: 'longitude'}, axis=1))
            df = df.drop(labels=['Unit ID', 'Ship position'], axis=1)
        else:
            print(fname)
    elif wpk_id == '4':
        fname = topdir / f'AR{date:%y%m%d}.00{wpk_id}'
        if fname.exists():
            df = pd.read_csv(fname, skiprows=1, sep='\t', parse_dates=[['date', 'time']], index_col='date_time',
                             date_parser=lambda x: datetime.strptime(x, '%y/%m/%d %H:%M:%S'))
        else:
            print(fname)

    if len(df) > 0:
        # Interpolate to minute time intervals
        df = interp_dataframe_time(df, date)
    return df

In [9]:
def read_wpk_hourly(topdir, date, wpk_id):
    def date_parser(s):
        return datetime.strptime(s[:-4], '%Y-%m-%d %H:%M:%S')
        
    wpk_id = str(wpk_id)
    assert wpk_id in ['3', '4'], 'Works only for WeatherPacks  No. 3 or 4'
    
    # Read (raw?) data stored in hourly files and concatenate into a DataFrame for the whole day
    df = pd.DataFrame()
    for h in range(24):
        fname = topdir / wpk_id / f'{date:%Y}' / f'{date:%m}' / f'{date:%d}' / f'data_{wpk_id}_{date:%Y%m%d}_{h:02d}.log'
        time_col_name = ' zeno_date zeno_time zeno_timezone'
        if fname.exists():
            df_next = pd.read_csv(fname,
                                  error_bad_lines=False, warn_bad_lines=False,
                                  index_col=time_col_name,
                                  parse_dates=[time_col_name],
                                  date_parser=date_parser)
            df = pd.concat([df, df_next])

    if len(df) > 0:
        # Interpolate to minute time intervals
        df = interp_dataframe_time(df, date)
    return df

In [10]:
with open('weatherpack_variable_aliases.json', 'r') as fj:
    vrbl_aliases = json.load(fj)
vrbl_aliases

{'t': ['Air T (C)', 'Air temperature', 'AT_avg_C'],
 'p': ['Barometric pressure (mbar)', 'Barometric pressure', 'BP_avg_mb'],
 'rh': ['RH (%)', 'RH', 'RH_avg'],
 'ws': ['True wind sp (m/s)', 'Wind speed', 'Spd_avg_m/s'],
 'wd': ['True W Dir (Referred to N)', 'Wind direction', 'Dir_avg'],
 'sr': ['Solar (W/m^2)', 'Solar']}

In [11]:
wpk_usage = pd.read_csv('weatherpack_usage.txt',
                        sep='\s+',
                        na_values='NA',
                        parse_dates=['date'],
                        index_col='date',
#                         dtype=dict(wpk2=str),
                        ).fillna('')
wpk_usage.head()

Unnamed: 0_level_0,wpk2,wpk3,wpk4
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-02-06,,"t,p,rh","t,p,rh"
2018-02-07,,"t,p,rh","t,p,rh"
2018-02-08,,"t,p,rh","t,p,rh"
2018-02-09,,"t,p,rh","t,p,rh"
2018-02-10,,"t,p,rh","t,p,rh"


In [12]:
# date = pd.datetime(2018, 2, 6)

In [14]:
df_full = pd.DataFrame()

for date in tqdm(wpk_usage.index):
    wpk_vars = wpk_usage.loc[date]
    data = dict()

    if wpk_vars.wpk2:
        vrbls = wpk_vars.wpk2.split(',')
        topdir = mypaths.wpk_dir / 'WP02'
        df = read_wpk_daily(topdir, date, '2')

        for vrbl in vrbls:
            for alias in vrbl_aliases[vrbl]:
                try:
                    data[vrbl+'_wpk2'] = df[alias]
                except KeyError:
                    pass

    if wpk_vars.wpk3:
        df = read_wpk_hourly(mypaths.wpk_dir, date, '3')
        vrbls = wpk_vars.wpk3.split(',')
        if len(df) > 0:
            for vrbl in vrbls:
                for alias in vrbl_aliases[vrbl]:
                    try:
                        data[vrbl+'_wpk3'] = df[alias]
                    except KeyError:
                        pass

    if wpk_vars.wpk4:
        vrbls = wpk_vars.wpk4.split(',')
        if date < datetime(2018, 2, 27):
            df = read_wpk_hourly(mypaths.wpk_dir, date, '4')
        else:
            df = read_wpk_daily(mypaths.wpk_dir / '2_Leg' / 'FORESTAR', date, '4')
        if len(df) > 0:
            for vrbl in vrbls:
                for alias in vrbl_aliases[vrbl]:
                    try:
                        data[vrbl+'_wpk4'] = df[alias]
                    except KeyError:
                        pass
                    
    df_full = pd.concat([df_full, pd.DataFrame(data)])

HBox(children=(IntProgress(value=0, max=46), HTML(value='')))




In [15]:
@interact(v=df_full.columns)
def fun(v):
    fig, ax = plt.subplots()
    df_full[v].plot(ax=ax)
    ax.set_xlim(wpk_usage.index[0], wpk_usage.index[-1])

interactive(children=(Dropdown(description='v', options=('p_wpk3', 'p_wpk4', 'rh_wpk2', 'rh_wpk3', 'rh_wpk4', …

Timestamp('2018-02-06 00:00:00')