# Use public data to replicate T.Richter thesis results Figure 4.7

In [None]:
import os

import numpy as np
import pandas as pd

import rf
import rf.imaging
import matplotlib.pyplot as plt

# import scipy
# import obspy
from obspy import UTCDateTime

In [None]:
import seismic.receiver_fn.rf_util as rf_util

## Load the data file of processed traces using our RF workflow

In [None]:
# TODO: Document provenance of this data file.
data = rf_util.read_h5_rf(r"..\DATA\CX.PB01_rf_qual.h5", root='/waveforms/CX.PB01.')

In [None]:
data

## Load the raw record of which events Richter used in station PB01 RF plot

In [None]:
df_event_usage = pd.read_csv(r"..\DATA\dissertation_richter_raw_data.csv", parse_dates=[0])
df_event_usage.columns = df_event_usage.columns.map(str.strip)
df_event_usage['date and time'] = df_event_usage['date and time'].map(UTCDateTime)
df_event_usage['RF usage 123456789ABCDEFGHMPSL'] = df_event_usage['RF usage 123456789ABCDEFGHMPSL'].map(str.strip)
df_event_usage['Flinn-Engdahl region'] = df_event_usage['Flinn-Engdahl region'].map(str.strip)
df_event_usage[0:10]

## Separate the RF streams from the raw streams in the loaded data

In [None]:
data_rf = rf.RFStream(sorted([tr for tr in data if tr.stats.type == 'rf'], key=lambda v: v.stats.back_azimuth))

In [None]:
data_rf

In [None]:
data_rf[0].stats

## Check available channels and pick the predominant one

In [None]:
set([tr.stats.channel for tr in data_rf])

In [None]:
print("BHQ: " + str(np.sum(np.array([tr.stats.channel for tr in data_rf]) == 'BHQ')))
print("HHQ: " + str(np.sum(np.array([tr.stats.channel for tr in data_rf]) == 'HHQ')))
print("BHR: " + str(np.sum(np.array([tr.stats.channel for tr in data_rf]) == 'BHR')))
print("HHR: " + str(np.sum(np.array([tr.stats.channel for tr in data_rf]) == 'HHR')))

In [None]:
channel = 'HHQ'
# channel = 'HHR'

## Select events matching Richter's event database for PB01 from our loaded `data_rf`

In [None]:
def use_for_pb01(tr, df_richter):
    """Determine if an event trace was used by Richter, based on matching trace metadata to table of event usage from Richter.
    """
    trace_meta = [tr.stats.event_time, tr.stats.event_latitude, tr.stats.event_longitude, tr.stats.event_magnitude]
#     print("Trace meta: {}".format(trace_meta))
    series_timedelta = df_richter['date and time'] - tr.stats.event_time
    # Allow a difference of 10 seconds
    matching_event = ((np.abs(series_timedelta) <= 10.0) & np.isclose(df_richter['latitude (deg)'], tr.stats.event_latitude, rtol=1e-3, atol=0.2)
                      & np.isclose(df_richter['longitude (deg)'], tr.stats.event_longitude, rtol=1e-3, atol=0.2)
                      & np.isclose(df_richter['magnitude'], tr.stats.event_magnitude, rtol=0.01, atol=0.1))
    candidate_event = df_richter[matching_event]
#     print("Candidate event: {}".format(candidate_event[["date and time", "latitude (deg)", "longitude (deg)", "magnitude"]].values))
    if candidate_event.empty:
        return False
    assert len(candidate_event) == 1, "Found {} events for {}:\n{}".format(len(candidate_event), trace_meta, candidate_event)
    event_usage = candidate_event['RF usage 123456789ABCDEFGHMPSL'].iloc[0]
    return event_usage[0].lower() == 'x'

In [None]:
data_richter = rf.RFStream(sorted([tr for tr in data_rf if use_for_pb01(tr, df_event_usage) and tr.stats.channel == channel], key=lambda v: v.stats.back_azimuth))
assert len(data_richter) == 182  # This is the number of traces on Fig 4.7

## Replicate Fig 4.7, as close as possible within limits of `rf` library plotting.
Any significant differences in waveforms indicate our processing pipeline differs from Richter, since we have started from the same raw data on the same events.

In [None]:
time_window=(-5.0, 22.0)
trace_height=0.06
stack_height=0.6
scale=2.5

In [None]:
do_moveout = True
if do_moveout:
    data_richter.moveout('Ps')

In [None]:
_ = data_richter.plot_rf(fillcolors=('#000000', '#a0a0a0'), trim=time_window, scale=scale, trace_height=trace_height, stack_height=stack_height)

## TODO: Try to replicate the quality filtering criteria used by Richter

We should be able to filter down to the same 182 traces, without reverting to using Richter's usage table.

In [None]:
snr_all = np.array([tr.stats.snr for tr in data_rf])
snr_prior_all = np.array([tr.stats.snr_prior for tr in data_rf])
entropy_all = np.array([tr.stats.entropy for tr in data_rf])

In [None]:
plt.hist(snr_all, bins=20)
plt.show()
plt.hist(snr_prior_all, bins=50)
plt.xlim((0, 10))
plt.show()
plt.hist(entropy_all, bins=20)
plt.show()

In [None]:
snr_cutoff = 1.75
# snr_cutoff = 1.7
print(np.sum(snr_all >= snr_cutoff))
print(np.sum(snr_all < snr_cutoff))

In [None]:
data_good = rf.RFStream([tr for tr in data_rf if (tr.stats.snr >= snr_cutoff) and (tr.stats.channel == channel)])
# data_bad = rf.RFStream([tr for tr in data_rf if (tr.stats.snr < snr_cutoff) and (tr.stats.channel == channel)])

In [None]:
_ = data_good.plot_rf(fillcolors=('#000000', '#a0a0a0'), trim=time_window, scale=scale, trace_height=trace_height, stack_height=stack_height)

In [None]:
snr_cutoff = 1.75
print(np.sum(snr_prior_all >= snr_cutoff))
print(np.sum(snr_prior_all < snr_cutoff))

In [None]:
data_prior_good = rf.RFStream([tr for tr in data_rf if (tr.stats.snr_prior >= snr_cutoff) and (tr.stats.channel == channel)])

In [None]:
_ = data_prior_good.plot_rf(fillcolors=('#000000', '#a0a0a0'), trim=time_window, scale=scale, trace_height=trace_height, stack_height=stack_height)

In [None]:
event_ids_ga = [tr.stats.event_id for tr in data_good]
event_ids_prior_ga = [tr.stats.event_id for tr in data_prior_good]

In [None]:
event_ids_richter = [tr.stats.event_id for tr in data_richter]

In [None]:
print(np.array(sorted(event_ids_ga)))
print(np.array(sorted(event_ids_prior_ga)))

In [None]:
print(np.array(sorted(event_ids_richter)))

In [None]:
print(len(set(event_ids_ga) & set(event_ids_richter)))
print(len(set(event_ids_prior_ga) & set(event_ids_richter)))

In [None]:
print(len(event_ids_ga))
print(len(event_ids_prior_ga))

In [None]:
print(100*131/180)
print(100*139/177)

In [None]:
data_good[0].stats.processing

In [None]:
data_good[0].stats.processing[6]

In [None]:
'ZNE->LQT' in data_good[0].stats.processing[6]

In [None]:
'NE->RT' in data_good[0].stats.processing[6]

## The above results indicate it doesn't make a lot of difference whether we use SNR or prior SNR for filtering to best quality RFs. Especially once stacked, the stacks are very similar.

In [None]:
data_raw = rf.RFStream(sorted([tr for tr in data if tr.stats.type == 'raw_resampled'], key=lambda v: v.stats.back_azimuth))

In [None]:
data_raw[0:20]


In [None]:
[tr.stats.endtime for tr in data_raw[0:20]]