# Use public data to replicate S.Chevrot (2000) *Earth and Planetary Science Letters* results Figure 3

## Imports, global setup

In [None]:
import os

import numpy as np
import pandas as pd

import rf
import rf.imaging

import matplotlib.pyplot as plt
import seaborn as sns

from obspy import UTCDateTime

In [None]:
import seismic.receiver_fn.rf_util as rf_util
import seismic.receiver_fn.rf_plot_utils as rf_plot_utils
import seismic.receiver_fn.rf_stacking as rf_stacking

## Load the data file of processed RF traces for analysis

In [None]:
rf_type = 'LQT_fd'
# rf_type = 'LQT_td'
# rf_type = 'LQT_td_filtered'
# rf_type = 'LQT_td_hispike'
model = 'iasp91'
# model = 'ak135'
data = rf_util.read_h5_rf(r"..\DATA\7B_rfs_19930503T030058-19950810T012516_{}_{}_qual.h5".format(rf_type, model))
data

In [None]:
# primary_station = 'SD02'
primary_station = 'SA01'

In [None]:
# stations = set([tr.stats.station for tr in data])
# station_idx = {st: data.select(station=st) for st in stations}
# len(station_idx)

In [None]:
# stations_not_empty = set([st for st in station_idx if len(station_idx[st]) > 0])
# len(stations_not_empty)

In [None]:
# print(sorted(stations_not_empty))

## Process data and present RF stacking

### Filter data down to only RF traces (filter out raw traces)

In [None]:
data_sta = data.select(station=primary_station)
len(data_sta)

In [None]:
data_rf = rf.RFStream([tr for tr in data_sta if tr.stats.type == 'rf'])
data_rf

In [None]:
_ = rf_plot_utils.plot_rf_stack(data_rf.sort(['back_azimuth'])[0:100], time_window=(-5,30))

In [None]:
data_raw = rf.RFStream([tr for tr in data_sta if tr.stats.type == 'raw_resampled'])
data_raw

### Check the main data channel code and set channel accordingly

In [None]:
set([tr.stats.channel for tr in data_rf])

In [None]:
channel = 'BHQ'

### Check distribution of SNR values to help us choose a cutoff

In [None]:
snr_all = np.array([tr.stats.snr for tr in data_rf])
snr_prior_all = np.array([tr.stats.snr_prior for tr in data_rf])

In [None]:
plt.hist(snr_all, bins=20)
# plt.xlim((0, 20))
plt.show()
# plt.hist(snr_prior_all, bins=50)
# plt.xlim((0, 6))
# plt.show()

In [None]:
# Pick a cutoff to get about 20 traces
cutoff_snr = 1.9
cutoff_snr_prior = 2

### Filter RFs to those with good enough SNR

In [None]:
data_good = rf.RFStream(sorted([tr for tr in data_rf if tr.stats.snr >= cutoff_snr], key=lambda v: v.stats.distance))
# data_good = rf.RFStream(sorted([tr for tr in data_rf if tr.stats.snr_prior >= cutoff_snr_prior], key=lambda v: v.stats.distance))
len(data_good)

### Plot the good RFs

In [None]:
time_window=(-1.0, 30.0)
trace_height=0.15
stack_height=0.6
scale=2

In [None]:
save_file = 'RF_stack_{}.{}_{}_{}_validation.png'.format(primary_station, channel, rf_type, model)
_ = data_good.plot_rf(fillcolors=('#000000', '#a0a0a0'), trim=time_window, scale=scale, trace_height=trace_height, stack_height=stack_height,
                      fname=save_file, show_vlines=True, dpi=300)

In [None]:
data_good_events = [tr.stats.event_id for tr in data_good]

In [None]:
data_raw_events = rf.RFStream([tr for tr in data_raw if tr.stats.event_id in data_good_events and tr.stats.channel == 'BHZ'])

In [None]:
# _ = data_raw_events.plot_rf(fillcolors=('#000000', '#a0a0a0'), trim=(-10, 30), scale=scale, trace_height=trace_height, stack_height=stack_height, show_vlines=True)

In [None]:
# Experimental: scale traces by phase-weighting. Since there is no moveout, this phase weighting dilutes the strength of the PpPs and higher multiples,
# so this does not help the stacked result.
# pw = rf_util.phase_weights(data_good)
# data_good_pw = data_good.copy()
# for tr in data_good_pw:
#     tr.data = tr.data*pw
# _ = data_good_pw.plot_rf(fillcolors=('#000000', '#a0a0a0'), trim=time_window, scale=scale, trace_height=trace_height, stack_height=stack_height)

## H-k stacking

In [None]:
db = rf_util.rf_to_dict(data_good)
data_sta = db[primary_station]

In [None]:
weighting = (0.5, 0.5, 0.0)

V_p = 6.4
k_grid, h_grid, hk_stack = rf_stacking.compute_hk_stack(data_sta, channel, h_range=np.linspace(20.0, 70.0, 501), root_order=2, V_p=V_p)

# Sum the phases
hk_stack_sum = rf_stacking.compute_weighted_stack(hk_stack, weighting)

# Raise the final sum over phases to power >1 to increase contrast
hk_stack_sum = rf_util.signed_nth_power(hk_stack_sum, 2)
hk_stack_sum = hk_stack_sum/np.max(hk_stack_sum[:])

sta = data_sta[channel][0].stats.station
num = len(data_sta[channel])
save_file = 'Hk_stack_{}.{}_{}_{}_validation.png'.format(sta, channel, rf_type, model)
rf_plot_utils.plot_hk_stack(k_grid, h_grid, hk_stack_sum, title='Station ' + sta + '.{}'.format(channel), num=num, save_file=save_file)

## Further analysis to disambiguate which is the "correct" selection of *H-k* maximum

### Generate simple picks on $t_1$, $t_2$ and use analytic solution for $(H, \kappa)$ to compute scattergram based on picks

In [None]:
# Use larger dataset for denser scattergram
adequate_cutoff = 2
data_adequate = rf.RFStream(sorted([tr for tr in data_rf if tr.stats.snr >= adequate_cutoff], key=lambda v: v.stats.distance))
len(data_adequate)

In [None]:
# Get t1, t2 from picking maxima within narrow time bands based on RF plot above
t1 = []
t2 = []
incl = []
slowness_secperkm = []
km_per_deg = 111.1949
snr_ad = np.array([tr.stats.snr for tr in data_adequate])
for tr in data_adequate:
    t_offset = tr.stats.onset - tr.stats.starttime
    t_rel = tr.times() - t_offset
    t1_mask = ((t_rel >= 3) & (t_rel <= 5))
    t2_mask = ((t_rel >= 14) & (t_rel <= 16))
    t1_max = np.max(tr.data[t1_mask])
    t2_max = np.max(tr.data[t2_mask])
    t1_index = np.where(tr.data[t1_mask] == t1_max)
    t2_index = np.where(tr.data[t2_mask] == t2_max)
    t1_val = t_rel[t1_mask][t1_index]
    t2_val = t_rel[t2_mask][t2_index]
    t1.append(t1_val.mean())
    t2.append(t2_val.mean())
    incl.append(tr.stats.inclination*np.pi/180.0)
    slowness_secperkm.append(tr.stats.slowness/km_per_deg)
t1 = np.array(t1)
t2 = np.array(t2)
incl = np.array(incl)
slowness_secperkm = np.array(slowness_secperkm)
ray_param = np.sin(incl)*slowness_secperkm

In [None]:
alpha = t2 - t1
beta = t1 + t2
print(V_p)

In [None]:
p = ray_param
H = alpha/(2*np.sqrt(1/V_p**2 - p*p))
k = V_p*np.sqrt((beta/(2*H))**2 + p*p)

In [None]:
from matplotlib.patches import Ellipse
import matplotlib.transforms as transforms

plt.figure(figsize=(13,12))
ax = plt.gca()
sns.scatterplot(k, H, hue=snr_ad, size=snr_ad, sizes=(50, 250), s=1000, alpha=0.9, ax=ax)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.xlabel(r'$\kappa = \frac{V_p}{V_s}$ (ratio)', fontsize=14)
plt.ylabel('H = Moho depth (km)', fontsize=14)
plt.grid(color="#80808080", linestyle=":")
plt.xlim(1.3, 2.1)
plt.ylim(20, 70)
plt.legend(title='SNR')
plt.title('Per-event $H-\kappa$ scattergram based on $t_1$, $t_2$ picks', fontsize=20)
# ellipse = Ellipse((0, 0), width=0.4, height=5, facecolor=None, edgecolor='#202020', linestyle='--', fill=False)
# transf = transforms.Affine2D().rotate_deg(0).translate(1.85, 39)
# ellipse.set_transform(transf + ax.transData)
# ax.add_patch(ellipse)
plt.savefig('Hk_scattergram_{}.{}_{}_{}_validation.png'.format(primary_station, channel, rf_type, model), dpi=300)
plt.show()

--------------

## Extend validation to cover other stations of Skippy deployment

In [None]:
db_7b = rf_util.rf_to_dict(data)
test_station = 'SA01'
channel = 'BHQ'

In [None]:
db_station = db_7b[test_station][channel]
test_rf = rf.RFStream(db_station)

In [None]:
[c for c in db_7b[test_station] if c[-1] in 'RQ']

In [None]:
snr_rf = np.array([tr.stats.snr for tr in test_rf])
plt.hist(snr_rf, bins=20)
plt.show()
# snr_rf_prior = np.array([tr.stats.snr_prior for tr in test_rf])
# plt.hist(snr_rf_prior, bins=20)
# plt.show()

In [None]:
cutoff_snr = 1.5
data_good = rf.RFStream([tr for tr in test_rf if tr.stats.snr >= cutoff_snr]).sort(['distance'])
# cutoff_snr_prior = 2.0
# data_good = rf.RFStream([tr for tr in test_rf if tr.stats.snr_prior >= cutoff_snr_prior]).sort(['distance'])
len(data_good)

In [None]:
_ = data_good.plot_rf(fillcolors=('#000000', '#a0a0a0'), trim=time_window, scale=scale, trace_height=trace_height, stack_height=stack_height)

In [None]:
db_good = rf_util.rf_to_dict(data_good)
data_sta = db_good[test_station]

In [None]:
weighting = (0.5, 0.5, 0.0)

k_grid, h_grid, hk_stack = rf_stacking.compute_hk_stack(data_sta, channel, h_range=np.linspace(20.0, 70.0, 501), root_order=2, V_p=6.4)

# Normalize the stacked amplitudes of each phase before computing weighted sum, to ensure the
# weights are meaningful in an absolute sense. Otherwise the weightings are relative to the mean
# amplitude of the return of a given phase, which is somewhat arbitrary.
# for i in range(3):
#     hk_stack[i, :, :] = hk_stack[i, :, :]/np.max(np.abs(hk_stack[i, :, :]))

# Sum the phases
hk_stack_sum = rf_stacking.compute_weighted_stack(hk_stack, weighting)

# Raise the final sum over phases to power >1 to increase contrast
hk_stack_sum = rf_util.signed_nth_power(hk_stack_sum, 2)
hk_stack_sum = hk_stack_sum/np.max(hk_stack_sum[:])

sta = data_sta[channel][0].stats.station
num = len(data_sta[channel])
save_file = None
save_file = 'Hk_stack_{}.{}_{}_{}_validation.png'.format(sta, channel, rf_type, model)
rf_plot_utils.plot_hk_stack(k_grid, h_grid, hk_stack_sum, title='Station ' + sta + '.{}'.format(channel), num=num, save_file=save_file)

--------------

## APPENDIX: Derivation of solution for $(H, \kappa)$ from $(t_1, t_2)$

There are various similar and equivalent formulations for the theoretical arrival times $t_1, t_2$ of the Ps and PpPs phases respectively. The formulation chosen here is that of Youlin Chen *et al.*, "Crustal structure beneath China from receiver function analysis", *Journal of Geophysical Research*, Vol.115, B03307 (2010), doi:10.1029/2009JB006386.

Starting with:

$t_1 = H \left[\sqrt{\left(\frac{\kappa}{V_p}\right)^2 - p^2} - \sqrt{\left(\frac{1}{V_p}\right)^2 - p^2} \right] \qquad \text{Ps}$

and

$t_2 = H \left[\sqrt{\left(\frac{\kappa}{V_p}\right)^2 - p^2} + \sqrt{\left(\frac{1}{V_p}\right)^2 - p^2} \right] \qquad \text{PpPs}$

where $H$ = depth to Moho, $\kappa = \frac{V_p}{V_s}$, $V_p$ = P-wave velocity, $p$ = ray parameter (per ray/event).

The let $\alpha = t_2 - t_1$, $\beta = t_1 + t_2$. Algebraic solution yields

$\alpha = 2H\sqrt{\left(\frac{1}{V_p}\right)^2 - p^2}$

$\implies H = \frac{\alpha}{2\sqrt{\left(\frac{1}{V_p}\right)^2 - p^2}}$

in which one can see that $\alpha$ is independent of $\kappa$ and depends only on $H$.

For $\beta$,

$\beta = 2H\sqrt{\left(\frac{\kappa}{V_p}\right)^2 - p^2}$

$\implies \kappa = \frac{V_p}{V_s} = V_p \sqrt{\left(\frac{\beta}{2H}\right)^2 + p^2}$