# Use public data to replicate C.Sippl (2016) *Technophysics* results Figure 3

## Imports, global setup

In [None]:
import os

import numpy as np
import pandas as pd

import rf
import rf.imaging

import matplotlib.pyplot as plt
import seaborn as sns

from obspy import UTCDateTime

In [None]:
import seismic.receiver_fn.rf_util as rf_util
import seismic.receiver_fn.rf_plot_utils as rf_plot_utils
import seismic.receiver_fn.rf_stacking as rf_stacking

## Load the data file of processed RF traces for analysis

In [None]:
network = '7W'
# rf_type = 'ZRT_fd'
rf_type = 'ZRT_td'
data = rf_util.read_h5_rf(r"..\DATA\7W_rfs_20080827T000136-20101231T235620_{}_qual.h5".format(rf_type))
data

In [None]:
stations = set([tr.stats.station for tr in data])
station_idx = {st: data.select(station=st) for st in stations}
len(station_idx)

In [None]:
stations_not_empty = set([st for st in station_idx if len(station_idx[st]) > 0])
len(stations_not_empty)

In [None]:
test_stations = sorted(stations_not_empty)
print(test_stations)

In [None]:
primary_station = 'BL05'
# primary_station = 'BL20'

## Process data and present RF stacking

### Filter data down to only RF traces (filter out raw traces)

In [None]:
data_rf = data.select(station=primary_station)
len(data_rf)

In [None]:
# Preview first 100 RF plots
# _ = rf_plot_utils.plot_rf_stack(data_rf.sort(['back_azimuth'])[0:100], time_window=(-5,30))

### Check the main data channel code and set channel accordingly

In [None]:
set([tr.stats.channel for tr in data_rf])

In [None]:
channel = 'BHR'

In [None]:
data_rf = data_rf.select(channel=channel)
len(data_rf)

## Replicate as much of Sippl's trace filtering logic as is amenable

We do not replicate the criteria Sippl used of 80% match between R convolved with Z against the original (unconvolved) rotated R component, as this would require splitting apart the `rf` library function to compute RFs (separating the rotation and deconvolution steps).

### Filter RFs with too large amplitude

In [None]:
max_amp_cutoff = 2.0
data_good = rf.RFStream([tr for tr in data_rf if tr.stats.log10_amp_max <= np.log10(max_amp_cutoff)])
len(data_good)

### Filter by cross-correlation coefficient against other traces

In [None]:
def filter_crosscorr_coeff(rf_stream, time_window=(-2, 25)):
    """For each trace in the stream, compute its correlation coefficient with the other traces.
    Return only traces matching cross correlation coefficient criteria based on Sippl.
    """
    # Trim good RFs to time range so that subsequent cross-correlation computations relate to the relevant period around and after onset.
    data_cc = rf_stream.copy().trim2(*time_window, reftime='onset')
    # Gather all RFs into a single array for efficient computation of correlation coefficients between all traces
    data_array = np.array([tr.data for tr in data_cc])
    # Compute cross-correlation coefficients. cc matrix will be symmetric.
    # Each row of cc indicates the degree of correlation between each other trace.
    cc = np.corrcoef(data_array)
    # Determine mask of which traces meet the similarity filtering criteria
    threshold_cc = 0.70  # Denoted Xi in Sippl, who used value 0.80
    fraction_above_threshold = np.sum(cc >= threshold_cc, axis=1)/len(data_cc)
    min_fraction = 0.15  # Denoted tau in Sippl, who used value 0.15
    keep_trace_mask = (fraction_above_threshold >= min_fraction)
    kept_data = rf.RFStream([tr for i, tr in enumerate(rf_stream) if keep_trace_mask[i]])
    return kept_data

In [None]:
data_good = filter_crosscorr_coeff(data_good, time_window=(-2, 20)).sort(['back_azimuth'])
len(data_good)

In [None]:
_ = rf_plot_utils.plot_station_rf_overlays({channel: data_good}, time_range=(-5, 25))

### Plot the good RFs

In [None]:
time_window=(-5.0, 30.0)

In [None]:
save_file = 'RF_stack_{}.{}.{}_{}_validation.png'.format(network, primary_station, channel, rf_type)
fig = rf_plot_utils.plot_rf_stack(data_good, save_file=save_file, dpi=300, time_window=time_window)
plt.show()

## H-k stacking

In [None]:
db = rf_util.rf_to_dict(data_good)
data_sta = db[primary_station]

In [None]:
# weighting = (0.5, 0.5, 0.0)
weighting = (0.33, 0.33, 0.33)

V_p = 6.4
k_grid, h_grid, hk_stack = rf_stacking.compute_hk_stack(data_sta, channel, V_p=V_p, h_range=np.linspace(25.0, 75.0, 251),
                                                        k_range=np.linspace(1.5, 2.0, 251), root_order=2)
# Inferred V_p:
# k_grid, h_grid, hk_stack = rf_stacking.compute_hk_stack(data_sta, channel, h_range=np.linspace(25.0, 75.0, 251),
#                                                         k_range=np.linspace(1.5, 2.0, 251), root_order=2)

# Sum the phases
hk_stack_sum = rf_stacking.compute_weighted_stack(hk_stack, weighting)

# Raise the final sum over phases to power >1 to increase contrast
hk_stack_sum = rf_util.signed_nth_power(hk_stack_sum, 2)
hk_stack_sum = hk_stack_sum/np.max(hk_stack_sum[:])

# Numerically find location of maximum
h_max, k_max = rf_stacking.find_global_hk_maximum(k_grid, h_grid, hk_stack_sum)
print("Numerical solution (H, k) = ({:.3f}, {:.3f})".format(h_max, k_max))

sta = data_sta[channel][0].stats.station
num = len(data_sta[channel])
save_file = 'Hk_stack_{}.{}.{}_{}_validation.png'.format(network, sta, channel, rf_type)
_ = rf_plot_utils.plot_hk_stack(k_grid, h_grid, hk_stack_sum, title='Station ' + sta + '.{}'.format(channel), num=num, save_file=save_file)

## Extend validation to cover other stations of Bilby deployment

In [None]:
db_7W = rf_util.rf_to_dict(data)

In [None]:
output_folder = 'csippl_validation_td_rev1'
# output_folder = 'csippl_validation_fd_rev1'
if not os.path.exists(output_folder):
    os.mkdir(output_folder)

In [None]:
for test_station in test_stations:
    try:
        db_station = db_7W[test_station]
        channel = rf_util.choose_rf_source_channel(rf_type, db_station)

        db_channel = db_station[channel]
        test_rf = rf.RFStream(db_channel)

        data_good = filter_crosscorr_coeff(data_good, time_window=(-2, 20)).sort(['back_azimuth'])
        print("Num traces = {}".format(len(data_good)))

        save_file = 'RF_stack_{}.{}.{}_{}_validation.png'.format(network, test_station, channel, rf_type)
        save_file = os.path.join(output_folder, save_file)
        fig = rf_plot_utils.plot_rf_stack(data_good, save_file=save_file, dpi=300, time_window=time_window)

        db_good = rf_util.rf_to_dict(data_good)
        data_sta = db_good[test_station]

#         weighting = (0.5, 0.5, 0.0)
        weighting = (0.33, 0.33, 0.33)

        V_p = 6.4
        k_grid, h_grid, hk_stack = rf_stacking.compute_hk_stack(data_sta, channel, V_p=V_p, h_range=np.linspace(25.0, 75.0, 251),
                                                                k_range=np.linspace(1.5, 2.0, 251), root_order=2)

        # Sum the phases
        hk_stack_sum = rf_stacking.compute_weighted_stack(hk_stack, weighting)

        # Raise the final sum over phases to power >1 to increase contrast
        hk_stack_sum = rf_util.signed_nth_power(hk_stack_sum, 2)
        hk_stack_sum = hk_stack_sum/np.max(hk_stack_sum[:])

        # Numerically find location of maximum
        h_max, k_max = rf_stacking.find_global_hk_maximum(k_grid, h_grid, hk_stack_sum)
        print("{}: Numerical solution (H, k) = ({:.3f}, {:.3f})".format(test_station, h_max, k_max))

        sta = test_station
        num = len(data_sta[channel])
        save_file = 'Hk_stack_{}.{}.{}_{}_validation.png'.format(network, sta, channel, rf_type)
        save_file = os.path.join(output_folder, save_file)
        _ = rf_plot_utils.plot_hk_stack(k_grid, h_grid, hk_stack_sum, title='Station ' + sta + '.{}'.format(channel), num=num, save_file=save_file)
        plt.show()
    except Exception as e:
        print("Failed on station {} with error:\n{}".format(test_station, str(e)))
    # end try
# end for

--------------