In [None]:
# Load station locations fro station metadata database and sanity check them against seismic events.
%matplotlib inline
%load_ext line_profiler

In [None]:
import os
import sys
sys.path.append(os.path.realpath('..'))
from collections import defaultdict

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (10,10)

In [None]:
import obspy
# This is a bit dodgy since it assumes sperical earth...
from obspy.geodetics.base import locations2degrees

In [None]:
from tqdm.auto import tqdm

In [None]:
import random
random.seed(20190118)

In [None]:
from dataio.catalogcsv import CatalogCSV

In [None]:
event_src_folder = r"C:\temp\catalog"
inventory_src_file = r"C:\software\passive-seismic\inventory\INVENTORY_20190116T144005.csv"
graphic_save_path = r"C:\temp\catalog"

In [None]:
# Load event catalog
import pickle as pkl
import dill
pkl_file = os.path.join(event_src_folder, "cat.pkl")
if os.path.exists(pkl_file):
    with open(pkl_file, 'rb') as f:
        cat = pkl.load(f)
else:
    cat = CatalogCSV(event_src_folder)
    with open(pkl_file, 'wb') as f:
        pkl.dump(cat, f, pkl.HIGHEST_PROTOCOL)

In [None]:
# Load station inventory
inv = pd.read_csv(inventory_src_file, sep=',', keep_default_na=False,
                  parse_dates=['StationStart','StationEnd','ChannelStart','ChannelEnd'])
# Only using BHZ channels
inv = inv[inv['ChannelCode'] == 'BHZ']

In [None]:
hour = np.timedelta64(1, 'h')

In [None]:
def processCatalog(cat, inv, num_stations=-1, distfunc=locations2degrees):
    #import pdb
    MAX_EVENT_SAMPLES = 100
    compute_dist = lambda x: distfunc(x.Latitude, x.Longitude, epicenter[0], epicenter[1])
    stations = [s for s in cat.station_dict.keys()]
    if num_stations > 0:
        stations = stations[0:num_stations]
    agg_result = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
    progress = tqdm(total=len(stations))
    # Precompute full names
    inv['FullCode'] = inv['NetworkCode'].map(str) + "." + inv['StationCode']
    for stn in stations:
        progress.update()
        #pdb.set_trace()
        df_st = inv[inv['StationCode'] == stn]
        if df_st.empty:
            continue
        # Precompute some masks
        stst = df_st['StationStart']
        sten = df_st['StationEnd']
        chst = df_st['ChannelStart']
        chen = df_st['ChannelEnd']
        stst_na = stst.isna().values
        sten_na = sten.isna().values
        chst_na = chst.isna().values
        chen_na = chen.isna().values
        station_events = cat.station_dict[stn]['P']
        # Don't need to plot all events to detect distance error, a sampling of events will do.
        num_samples = min(MAX_EVENT_SAMPLES, len(station_events))
        for event_id, event_distance_deg in random.sample(station_events, num_samples):
            event = cat.event_dict[event_id]
            epicenter = event.preferred_origin.epicenter()
            event_date = np.datetime64(event.preferred_origin.utctime)
            event_window_end = event_date + hour
            station_mask = stst_na | ((stst.values < event_date) & (sten_na | (sten.values > event_window_end)))
            channel_mask = chst_na | ((chst.values < event_date) & (chen_na | (chen.values > event_window_end)))
            df_event = df_st[station_mask & channel_mask]
            if df_event.empty:
                continue
            computed_dist_deg = df_event.apply(compute_dist, axis=1)
            for idx, dist in computed_dist_deg.iteritems():
                full_code = df_event.loc[idx, "FullCode"]
                agg_result[stn][full_code]['Distance'].append(event_distance_deg)
                agg_result[stn][full_code]['ComputedDistance'].append(dist)
    progress.close()
    return agg_result

In [None]:
do_profile = False
if do_profile:
    %lprun -f processCatalog agg_result=processCatalog(cat, inv, 100)
else:
    import pickle as pkl
    pkl_file = os.path.join(event_src_folder, "distances.pkl")
    if os.path.exists(pkl_file):
        with open(pkl_file, 'rb') as f:
            agg_result = pkl.load(f)
    else:
        agg_result = processCatalog(cat, inv)
        with open(pkl_file, 'wb') as f:
            pkl.dump(agg_result, f, pkl.HIGHEST_PROTOCOL)

In [None]:
def plotStationDistances(stn, stn_data, savepath=None):
    import itertools
    import os
    marker = itertools.cycle(('x', '1', '+', '2'))
    xval = [v['Distance'] for v in stn_data.values()]
    yval = [v['ComputedDistance'] for v in stn_data.values()]
    [plt.plot(x, y, marker=next(marker), markersize=10, linestyle='', fillstyle=None) for x, y in zip(xval, yval)]
    plt.axis('equal')
    plt.title(stn, fontsize=16)
    plt.legend(stn_data.keys(), fontsize=14)
    plt.grid()
    plt.gca().tick_params(axis='both', labelsize=14)
    plt.xlabel('Distance (deg)', fontsize=14)
    plt.ylabel('Computed Distance (deg)', fontsize=14)    
    if savepath:
        plt.savefig(os.path.join(savepath, stn + ".png"), dpi=150)
        plt.close()
    else:
        plt.show()

In [None]:
# Dump images to files
[plotStationDistances(stn, stn_data, graphic_save_path) for stn, stn_data in agg_result.items()]
pass