In [1]:
%matplotlib inline

import logging
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import warnings
import sys
sys.path.append('/Users/urebbapr/research/opticalcomm/MLOC/src')

from functools          import partial
from matplotlib.patches import Patch
from matplotlib.ticker  import FormatStrFormatter
from types              import SimpleNamespace as SN

# Depending on how your environment is setup, may need to tweak the utils import
from utils import (
    load_pwv,
    compile_datasets
)

# Disable warnings
warnings.filterwarnings('ignore')

# Disable logger except for errors/exceptions
logger = logging.getLogger()
logger.setLevel(logging.ERROR)

# Set context of seaborn
sns.set_context('talk')


In [None]:
def compile_pwv(h5='', resample='30 min', smooth=[]):
    """
    Ingests all of the raw data into dataframes then compiles them into a merged
    dataframe.

    Parameters
    ----------
    h5 : str
        Optional path to an h5 to write to
    resample : str
        The rate to resample the merged dataframe
    smooth : list of str
        List of columns to apply smoothing on
    """

    Logger.debug('Loading datasets from their raw files')
    # Load in the data
    data = {
        'r0/day'  : load_r0(r0_day,   kind='day',   round=True, resample=False, datenum=False),
        'r0/night': load_r0(r0_night, kind='night', round=True, resample=False, datenum=False),
        'bls'     : load_bls(bls, round=True, resample=False, datenum=False),
        'weather' : load_weather(weather)
    }
    # Postprocess
    data['r0/day']['r0'] *= 100 # Convert to centimeters
    data['r0/night'].drop(columns='polaris_count', inplace=True)

    # Save individual frames
    if h5:
        for key, df in data.items():
            df.to_hdf(h5, key)

    Logger.debug('Merging dataframes together')
    # Merge the frames together
    df = pd.merge(data['r0/day'], data['r0/night'], how='outer', suffixes=['_day', '_night'], on=['datetime', 'solar_zenith_angle'])
    df = pd.merge(df, data['bls'], how='outer', on=['datetime', 'solar_zenith_angle'])
    df = pd.merge(data['weather'], df, how='outer', on='datetime')

    # Sort the datetime index
    df.sort_index(inplace=True)

    # Create the r0 column by merging day and night
    df['r0'] = df.r0_day.combine_first(df.r0_night)

    # Apply smoothing
    for col in smooth:
        if col == 'r0':
            continue # Skip r0 to do separately
        elif col in ['Cn2', 'r0_night']:
            Logger.debug(f'Smoothing {col} with 2 minimum observations')
            df[f'{col}_10T'] = df[col].rolling('10 min', min_periods=2).median()
        else:
            # hardcoded 10 minute smoothing with a minimum of 20% observations (assuming seconds) 10*60*20%=120
            Logger.debug(f'Smoothing {col} with 120 minimum observations')
            df[f'{col}_10T'] = df[col].rolling('10 min', min_periods=120).median()

    # Smoothed r0 needs to be the merge of the smoothed day and night rather than a smooth on r0 merged
    #  due to observation requirements causing night to become fully NaN
    if 'r0' in smooth:
        Logger.debug('Creating smoothed r0 merged separately')
        # If already smoothed, use columns otherwise do smoothing
        if 'r0_day_10T' in df and 'r0_night_10T' in df:
            df['r0_10T'] = df.r0_day_10T.combine_first(df.r0_night_10T)
        else:
            df['r0_10T'] = df['r0_day'].rolling('10 min', min_periods=120).median().combine_first(
                df['r0_night'].rolling('10 min', min_periods=2).median()
            )

    Logger.debug(f'Resampling to {resample}')
    # Resample with at least 2 observations
    minobs = lambda s: np.nan if len(s) < 2 else s.median()
    df = df.resample(resample).median()#.apply(minobs)

    # Save merged
    if h5:
        df.to_hdf(h5, 'merged')

    return df