# Examine an event catalog and visualize representation of events from selected networks

In [None]:
import os
import sys
import math
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.width', 240)

In [None]:
import datetime
import matplotlib
import matplotlib.pyplot as plt
matplotlib.rcParams['figure.figsize'] = (16.0, 9.0)
matplotlib.rcParams['figure.max_open_warning'] = 100

In [None]:
# Progress bar helper to indicate that slow tasks have not stalled
from tqdm.auto import tqdm

In [None]:
import requests

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [None]:
PICKS_PATH = r"C:\data_cache\Picks\20190219\ensemble.p.txt"
#PICKS_PATH = r"C:\data_cache\Picks\20190219\ensemble_small.p.txt"
dtype = {'#eventID': object,
    'originTimestamp': np.float64,
    'mag':                    np.float64,
    'originLon':              np.float64,
    'originLat':              np.float64,
    'originDepthKm':          np.float64,
    'net':                     object,
    'sta':                     object,
    'cha':                     object,
    'pickTimestamp':          np.float64,
    'phase':                   object,
    'stationLon':             np.float64,
    'stationLat':             np.float64,
    'az':                     np.float64,
    'baz':                    np.float64,
    'distance':               np.float64,
    'ttResidual':             np.float64,
    'snr':                    np.float64,
    'qualityMeasureCWT':      np.float64,
    'domFreq':                np.float64,
    'qualityMeasureSlope':    np.float64,
    'bandIndex':              np.int64,
    'nSigma':                 np.int64}

In [None]:
df_raw_picks = pd.read_csv(PICKS_PATH, ' ', header=0, dtype=dtype)
len(df_raw_picks)

In [None]:
# Query time period for source dataset
import obspy

start_time = obspy.UTCDateTime(df_raw_picks['originTimestamp'].min())
end_time = obspy.UTCDateTime(df_raw_picks['originTimestamp'].max())
print((str(start_time), str(end_time)))

In [None]:
# Filter to just AU networks. The permanent AU station list here is based on FDSN web site http://www.fdsn.org/networks/detail/AU/
AU_NET = 'AU'
AU_NET_FILE = r"C:\data_cache\Picks\20190219\irisws-fedcatalog_2019-03-05T01 27 47Z.txt"
au_net_df = pd.read_csv(AU_NET_FILE, header=0, sep='|')
au_net_df.columns = [c.strip() for c in au_net_df.columns.tolist()]
AU_PERM_STATIONS = au_net_df['Station'].unique()
len(AU_PERM_STATIONS)

## Determine record count per station and plot

In [None]:
# Count matching by station code even if the network code doesn't match, since some AU stations could appear under II or IR.
# We're mainly interested in stations with low record count.
record_count = {}
df = df_raw_picks
for sta in AU_PERM_STATIONS:
    count = np.sum(df['sta'] == sta)
    record_count[sta] = count

In [None]:
# record_count

In [None]:
# np.sum([int(v == 0) for k, v in record_count.items()])

In [None]:
df = pd.DataFrame({'sta': [k for k, v in record_count.items()], 'count': [v for k, v in record_count.items()]}, columns=['sta', 'count'])

In [None]:
# df

In [None]:
def plotStationIncidence(df_, subplots=3, title=None, savefile=None):
    plt.figure(figsize=(32,12))
    chunk_size = int(math.ceil(len(df_)/float(subplots)))
    xlim = df_['count'].max()
    for i in range(subplots):
        plt.subplot(1, subplots, i+1)
        df_subplot = df_.iloc[i*chunk_size:(i+1)*chunk_size, :]
        df_subplot = df_subplot.iloc[::-1]
        plt.barh(df_subplot['sta'], df_subplot['count'])
        plt.xlim((0, xlim))
        plt.xticks(fontsize=16)
        plt.yticks(fontsize=12)
        plt.xlabel("Record Count", fontsize=16)
        if i == 0:
            plt.ylabel("Station Code", fontsize=16)
        plt.gca().xaxis.grid(color="#80808080", linestyle="--")
    if title is not None:
        plt.suptitle(title, fontsize=20, y=0.92)
    if savefile is not None:
        plt.savefig(savefile, dpi=200)

In [None]:
df_count = df.sort_values('count', ascending=False)
df_sta = df.sort_values('sta')

In [None]:
plotStationIncidence(df_count, title="Sorted incidence of AU station records in ensemble.p.txt", savefile='AU_station_incidence_pwave_event_ensemble_SORTED')

In [None]:
plotStationIncidence(df_sta, title="Alphabetic incidence of AU station records in ensemble.p.txt", savefile='AU_station_incidence_pwave_event_ensemble_ALPHA')

## Determine date range per station and plot

In [None]:
date_record = {}
df = df_raw_picks
for sta in AU_PERM_STATIONS:
    mask_sta = (df['sta'] == sta)
    min_date = df.loc[mask_sta, 'originTimestamp'].min()
    max_date = df.loc[mask_sta, 'originTimestamp'].max()
    if not np.isnan(min_date) and not np.isnan(max_date):
        date_record[sta] = (min_date, max_date)

In [None]:
df_date = pd.DataFrame({'sta': [k for k, v in date_record.items()],
                        'start_date': [v[0] for k, v in date_record.items()],
                        'end_date': [v[1] for k, v in date_record.items()]},
                       columns=['sta', 'start_date', 'end_date'])

In [None]:
len(df_date)

In [None]:
def pandasTimestampToPlottableDatetime(data):
    return data.transform(datetime.datetime.utcfromtimestamp).astype('datetime64[D]')

In [None]:
# Only plot stations that have valid dates
df_plot = df_date[::-1]
start = pandasTimestampToPlottableDatetime(df_plot['start_date']).values
end = pandasTimestampToPlottableDatetime(df_plot['end_date']).values
dur = (end - start)/np.timedelta64(1, 'D')

plt.figure(figsize=(24, 32))
plt.barh(df_plot['sta'], dur, left=start)
time_formatter = matplotlib.dates.DateFormatter("%Y-%m-%d")
years = matplotlib.dates.YearLocator()   # every year
months = matplotlib.dates.MonthLocator()  # every month
plt.axes().xaxis.set_major_formatter(time_formatter)
plt.axes().xaxis.set_major_locator(years)
plt.axes().xaxis.set_minor_locator(months)
plt.xlabel("Date range", fontsize=16)
plt.ylabel("Station Code", fontsize=16)
plt.gca().xaxis.grid(color="#80808080", linestyle="--")
plt.gca().yaxis.grid(color="#80808020", linestyle=":")
plt.title("Record date ranges per station in ensemble.p.txt", fontsize=20)
time_range = (datetime.datetime.utcfromtimestamp(df_plot['start_date'].min()), datetime.datetime.utcfromtimestamp(df_plot['end_date'].max()))
plt.text(0.01, 0.98, "Start date: {}".format(str(time_range[0])), transform=plt.gca().transAxes, fontsize=12)
plt.text(0.01, 0.97, "  End date: {}".format(str(time_range[1])), transform=plt.gca().transAxes, fontsize=12)
plt.xticks(fontsize=14, rotation=30, horizontalalignment='right')
plt.savefig('AU_station_dates_pwave_event_ensemble.png', dpi=200)