In [None]:
import os
import sys
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.width', 240)

In [None]:
import matplotlib
import matplotlib.pyplot as plt
matplotlib.rcParams['figure.figsize'] = (16.0, 9.0)

In [None]:
# Progress bar helper to indicate that slow tasks have not stalled
from tqdm.auto import tqdm

In [None]:
PICKS_PATH = r"C:\data_cache\Picks\20190219\ensemble.p.txt"
#PICKS_PATH = r"C:\data_cache\Picks\20190219\ensemble_small.p.txt"
dtype = {'#eventID': object,
    'originTimestamp': np.float64,
    'mag':                    np.float64,
    'originLon':              np.float64,
    'originLat':              np.float64,
    'originDepthKm':          np.float64,
    'net':                     object,
    'sta':                     object,
    'cha':                     object,
    'pickTimestamp':          np.float64,
    'phase':                   object,
    'stationLon':             np.float64,
    'stationLat':             np.float64,
    'az':                     np.float64,
    'baz':                    np.float64,
    'distance':               np.float64,
    'ttResidual':             np.float64,
    'snr':                    np.float64,
    'qualityMeasureCWT':      np.float64,
    'domFreq':                np.float64,
    'qualityMeasureSlope':    np.float64,
    'bandIndex':              np.int64,
    'nSigma':                 np.int64}

In [None]:
df_picks = pd.read_csv(PICKS_PATH, ' ', header=0, dtype=dtype)

In [None]:
# Remove unused columns for readability
df_picks = df_picks[['#eventID', 'originTimestamp', 'mag', 'originLon', 'originLat', 'originDepthKm', 'net', 'sta', 'cha', 'pickTimestamp', 'phase', 
                     'stationLon', 'stationLat', 'az', 'baz', 'distance', 'ttResidual', 'snr', 'qualityMeasureCWT', 'qualityMeasureSlope', 'nSigma']]

In [None]:
REF_NET = 'IR'
REF_STN = 'WRAB'
REF = {'net': [REF_NET], 'sta': [REF_STN]}
#REF = {'net': ['AU'], 'sta': ['WR0']}

In [None]:
# STN_LIST = ['MA01', 'MA42', 'MA43']
# TARGET_NET = '7X'
STN_LIST = ['WR1', 'WR2']
TARGET_NET = 'AU'
TARGET_STNS = {'net': [TARGET_NET]*len(STN_LIST), 'sta': [s for s in STN_LIST]}

## Filter to desired ref and target networks

In [None]:
mask_ref = df_picks[list(REF)].isin(REF).all(axis=1)
np.any(mask_ref)

In [None]:
mask_targ = df_picks[list(TARGET_STNS)].isin(TARGET_STNS).all(axis=1)
np.any(mask_targ)

In [None]:
mask = mask_ref | mask_targ
np.any(mask)

In [None]:
df_filt = df_picks.loc[mask]

In [None]:
len(df_filt)

## Filter to teleseismic events

In [None]:
# Column heading for the angular distance (degrees) between event and station
ANG_DIST = 'distance'

In [None]:
mask = (df_filt[ANG_DIST] >= 30.0) & (df_filt[ANG_DIST] <= 90.0)

In [None]:
df_ts = df_filt.loc[mask]

In [None]:
len(df_ts)

## Filter to constrained quality metrics

In [None]:
mask_cwt = (df_ts['qualityMeasureCWT'] >= 0) & (df_ts['qualityMeasureCWT'] <= 10)
mask_slope = (df_ts['qualityMeasureSlope'] >= 0) & (df_ts['qualityMeasureSlope'] <= 2)
mask_sigma = (df_ts['nSigma'] >= 6)
# Make sure we DON'T filter out the reference station, which may have zero quality values
mask_ref = df_ts[list(REF)].isin(REF).all(axis=1)
quality_mask = (mask_cwt & mask_slope & mask_sigma) | mask_ref

In [None]:
assert np.sum(quality_mask) > 100, 'Not enough points left after quality filtering'
df_ts = df_ts[quality_mask]
len(df_ts)

In [None]:
def display_styled_table(df):
    # Display table with blocks of same event ID highlighted
    df['lastEventID'] = df['#eventID'].shift(1)
    df['lastEventID'].iloc[0] = df['#eventID'].iloc[0]
    cols = ['#ffffff', '#e0e0ff']
    def block_highlighter(r):
        if r['lastEventID'] != r['#eventID']:
            block_highlighter.current_col = (block_highlighter.current_col + 1) % len(cols)
        return ['background-color: ' + cols[block_highlighter.current_col]]*len(r)
    block_highlighter.current_col = 0
    return df.style.apply(block_highlighter, axis=1)

In [None]:
# display_styled_table(df_ts[0:200])

## For each event, create column for reference traveltime residual

In [None]:
# Create column for entire table first
df_ts['ttResidualRef'] = np.nan

In [None]:
ref_duped = []
pbar = tqdm(total=len(df_ts), ascii=True)
# Priority order of channels
channel_pref = ['BHZ_00', 'BHZ', 'BHZ_10', 'B?Z', 'S?Z', 'SHZ', '???', '?']
for eventid, grp in df_ts.groupby('#eventID'):
    pbar.update(len(grp))
    ref_mask = (grp['net'] == REF['net'][0]) & (grp['sta'] == REF['sta'][0])
    grp_ref = grp[ref_mask]
    if grp_ref.empty:
        continue
    # Choose most favourable channel
    cha = None
    available_cha = grp_ref['cha'].values
    for c in channel_pref:
        if c in available_cha:
            cha = c
            break
    # We must find a channel
    assert cha is not None
    cha_mask = (grp_ref['cha'] == cha)
    grp_cha = grp_ref[cha_mask]
    tt_ref_series = grp_cha['ttResidual'].unique()
    if len(tt_ref_series) > 1:
#         print("WARNING: Multiple reference times found for event {}\n{},"
#               " choosing smallest absolute residual".format(eventid, grp_cha))
        ref_duped.append(grp_ref)
        # In this case, choose the first reference tt residual
        grp_cha['absTTResidual'] = np.abs(grp_cha['ttResidual'].values)
        grp_cha = grp_cha.sort_values('absTTResidual')
        tt_ref_series = grp_cha['ttResidual'].unique()
    ref_time = tt_ref_series[0]
    df_ts.loc[grp.index, 'ttResidualRef'] = ref_time
pbar.close()
if ref_duped:
    ref_duped_all = pd.concat(ref_duped)
    ref_duped_all.to_csv("REF_ARRIVAL_DUPES.txt", sep=' ', index=False)

In [None]:
# Quality check - each event should have only one unique reference tt residual
assert np.all([len(df['ttResidualRef'].unique()) == 1 for e, df in df_ts.groupby('#eventID')])

In [None]:
df_ts['relTtResidual'] = df_ts['ttResidual'] - df_ts['ttResidualRef']

In [None]:
# Re-order columns
df_ts = df_ts[['#eventID', 'originTimestamp', 'mag', 'originLon', 'originLat', 'originDepthKm', 'net', 'sta', 'cha', 'pickTimestamp', 'phase',
               'stationLon', 'stationLat', 'az', 'baz', 'distance', 'snr', 'ttResidual', 'ttResidualRef', 'relTtResidual']]

In [None]:
# display_styled_table(df_ts.iloc[0:200])

In [None]:
# Sort data by event origin time
df_ts = df_ts.sort_values('originTimestamp')
#display_styled_table(df_ts.iloc[0:500])

In [None]:
test_stn = STN_LIST[0]

In [None]:
df_sample = df_ts.loc[(df_ts['net'] == TARGET_NET) & (df_ts['sta'] == test_stn), ['#eventID', 'originTimestamp', 'net', 'sta', 'relTtResidual', 'snr']]
# df_sample
# display_styled_table(df_sample)

In [None]:
rel_tt = df_sample['relTtResidual']
mask = ~rel_tt.isna().values
df_nonnan = df_sample[mask]
df_nonnan = df_nonnan.reset_index()
# df_nonnan

In [None]:
len(df_nonnan)

In [None]:
# times = df_nonnan['originTimestamp'].values
times = range(len(df_nonnan))
vals = df_nonnan['relTtResidual'].values
qual = df_nonnan['snr']
if len(vals) > 0:
    sc = plt.scatter(times, vals, c=qual, alpha=0.5, cmap='jet', s=100)
    cb = plt.colorbar(sc)
    cb.set_label('Signal to noise ratio')
    plt.grid(color='#80808080', linestyle=':')
    plt.xlabel('Sequence order')
    plt.ylabel('Relative TT residual ({} relative to {})'.format(".".join([TARGET_NET ,test_stn]),
                                                                 ".".join([REF_NET, REF_STN])))
#     plt.legend()
None

In [None]:
#plt.hist(vals, bins=50, range=(-10,10))
None

In [None]:
event_id = 'smi:local/42941r28'

In [None]:
df_event = df_filt[df_filt['#eventID'] == event_id]

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))
display_styled_table(df_event)

In [None]:
pd.options.display.float_format = '{:.1f}'.format
print(df_event[['#eventID', 'originTimestamp', 'mag', 'originLon', 'originLat', 'originDepthKm', 'net', 'sta', 'cha', 
                'pickTimestamp', 'phase', 'stationLon', 'stationLat', 'az', 'baz', 'distance', 'ttResidual', 'snr']])