In [None]:
import os
# from collections import defaultdict
# import time
import pickle as pkl

import numpy as np
import rf
import rf.imaging
import matplotlib.pyplot as plt
import scipy
from scipy import signal
from scipy.signal import hilbert
from scipy.stats import moment
# from scipy.interpolate import interp1d
from sklearn.neural_network import MLPClassifier
import obspy
import seaborn as sns
import pandas as pd
from tqdm.auto import tqdm

In [None]:
# Bring in interactive widgets capability. See https://towardsdatascience.com/interactive-controls-for-jupyter-notebooks-f5c94829aee6
import ipywidgets as widgets
from ipywidgets import interact, interact_manual

In [None]:
import seismic.receiver_fn.rf_util as rf_util
import seismic.receiver_fn.rf_plot_utils as rf_plot_utils
import seismic.receiver_fn.rf_stacking as rf_stacking

## Read source file

In [None]:
rf_type = 'ZRT_td'
# rf_type = 'LQT_td'
# rf_type = 'ZRT_fd'
# rf_type = 'LQT_fd'

In [None]:
network = '7X'

In [None]:
# src_file = r"..\DATA\OA_rfs_20170911T000036-20181128T230620_{}_rev5_qual.h5".format(rf_type)
# data_all = rf_util.read_h5_rf(src_file, 'OA', 'BT23')

src_file = r"..\DATA\7X_rfs_20090616T034200-20110401T231849_{}_rev1_qual.h5".format(rf_type)
data_all = rf_util.read_h5_rf(src_file)

In [None]:
type(data_all)

## Load quality classifier

In [None]:
model_file = "BT23_classifier_{}.pkl".format(rf_type)
with open(model_file, 'rb') as f:
    qc = pkl.load(f)

## Convert RFStream to dict database for convenient iteration and addressing

In [None]:
db = rf_util.rf_to_dict(data_all)

## Select test station and channel

In [None]:
test_station = 'BT23'
# test_station = 'BS27'
# test_station = 'BZ20'
# test_station = 'SD02'
hk_test = db[test_station]

In [None]:
if rf_type[0:3] == 'ZRT':
    prospective_channels = ['HHR', 'BHR']
elif rf_type[0:3] == 'LQT':
    prospective_channels = ['HHQ', 'BHQ']
else:
    prospective_channels = []
# end if
channel = None
for c in prospective_channels:
    if c in hk_test:
        channel = c
        break
# end for
print("Selected channel: {}".format(channel))
channel_data = hk_test[channel]
len(channel_data)

In [None]:
# Check if there are any traces with NaNs in them. RF quality filtering prior to this SHOULD have removed any such traces.
np.sum([np.any(np.isnan(tr.data)) for tr in channel_data])

## Add additional statistics for prediction of trace quality

In [None]:
# This needs to be done before running quality classifier over the traces.
rf_util.compute_extra_rf_stats(hk_test)

## Examine available metadata in each trace

In [None]:
type(channel_data[0])

In [None]:
channel_data[0].stats

## Apply quality filter to traces

In [None]:
stats_metrics = ["SNR", "Entropy", "Coherence", "Max_amp", "Amp_20pc", "Amp_80pc", "RMS_amp", "Mean_amp"]
X = np.array([[tr.stats.snr, tr.stats.entropy, tr.stats.max_coherence, tr.stats.amax,
               tr.stats.amp_20pc, tr.stats.amp_80pc, tr.stats.rms_amp, tr.stats.mean_cplx_amp] for tr in channel_data])
X[np.isnan(X)] = 0

In [None]:
# Predict quality class
predicted_quality = qc.predict(X)
# Compute confidence in quality class prediction
confidences = qc.predict_proba(X)
confidence_index = np.zeros(predicted_quality.shape).astype(np.int)
confidence_index[(predicted_quality == 'b')] = 1
prediction_confidence = confidences[range(confidence_index.size), confidence_index]
assert len(prediction_confidence) == len(predicted_quality)

for i, tr in enumerate(channel_data):
    tr.stats.predicted_quality = predicted_quality[i]
    tr.stats.prediction_confidence = prediction_confidence[i]

In [None]:
rf_stream_A = rf.RFStream([tr for tr in channel_data if tr.stats.predicted_quality == 'a'])
# rf_stream_A = rf.RFStream([tr for tr in channel_data if tr.stats.predicted_quality == 'a' and tr.stats.prediction_confidence >= 0.75])
len(rf_stream_A)

In [None]:
# rf_stream_A = rf.RFStream([tr for tr in channel_data if tr.stats.snr >= 2])
# len(rf_stream_A)

## Plot RFs for traces filtered by various quality metrics

### Narrow data to events of a certain minimum magnitude and teleseismic distance

In [None]:
max_traces = 50

In [None]:
min_mag = 5.5
rf_stream_A = rf.RFStream([tr for tr in rf_stream_A if 5.5 <= tr.stats.event_magnitude <= 6.5]).sort(['back_azimuth'])
print(len(rf_stream_A))
rf_plot_utils.plot_rf_stack(rf_stream_A[0:max_traces])

### Quality A

In [None]:
try:
    rf_data = [tr for tr in channel_data if tr.stats.quality == 'a']
    rf_data = sorted(rf_data, key=lambda v: v.stats.back_azimuth)
    rf_stream = rf.RFStream(rf_data)
    rf_plot_utils.plot_rf_stack(rf_stream[0:max_traces])
except AttributeError:
    print("Data has no ground truth quality labels")

### Quality B

In [None]:
try:
    rf_data = [tr for tr in channel_data if tr.stats.quality == 'b']
    rf_data = sorted(rf_data, key=lambda v: v.stats.back_azimuth)
    rf_stream = rf.RFStream(rf_data)
    rf_plot_utils.plot_rf_stack(rf_stream[0:max_traces])
except AttributeError:
    print("Data has no ground truth quality labels")

### Predicted Quality A

In [None]:
rf_data = [tr for tr in rf_stream_A]
rf_data = sorted(rf_data, key=lambda v: v.stats.back_azimuth)
rf_stream = rf.RFStream(rf_data)
rf_plot_utils.plot_rf_stack(rf_stream, trace_height=0.2)

### Predicted Quality B

In [None]:
rf_data = [tr for tr in channel_data if tr.stats.predicted_quality == 'b']
rf_data = sorted(rf_data, key=lambda v: v.stats.back_azimuth)
rf_stream = rf.RFStream(rf_data)
rf_plot_utils.plot_rf_stack(rf_stream[0:max_traces])

***

## Plot overlay of all traces in test channel (no filtering)

In [None]:
hk_quality = {channel: [tr for tr in rf_stream_A]}

In [None]:
num_traces = len(hk_quality[channel])
trace_mean = rf_plot_utils.plot_station_rf_overlays(hk_quality, '(all {} traces)'.format(num_traces), (-10, 25))

## Plot only traces with similarity to the mean

In [None]:
# hk_quality_filt, corrs = rf_util.filter_station_to_mean_signal(hk_quality, min_correlation=0.05)

In [None]:
# plt.hist(corrs, bins=50)
# plt.show()

In [None]:
# num_traces = len(hk_quality_filt[channel])
# test_filt_mean = rf_plot_utils.plot_station_rf_overlays(hk_quality_filt, '({} traces similar to mean)'.format(num_traces), (-10, 25))

## Demonstrate the effect of phase-weighting the traces

In [None]:
# from seismic.receiver_fn.rf_util import phase_weights

In [None]:
# pw = phase_weights(hk_quality_filt[channel])

In [None]:
# s0 = hk_quality_filt[channel][0]
# time_offset = s0.stats.onset - s0.stats.starttime
# plt.figure(figsize=(16,9))
# plt.plot(s0.times() - time_offset, pw)
# plt.title('Phase weightings')
# plt.grid()
# plt.show()

In [None]:
# # Demonstrate effect of phase weighting to suppress areas where phases tend to be random.
# pw_exponent = 2
# plt.figure(figsize=(16,9))
# plt.plot(s0.times() - time_offset, s0.data, linewidth=2)
# plt.plot(s0.times() - time_offset, s0.data*pw**pw_exponent, '--', linewidth=2)
# plt.legend(['Original', 'Phase weighted'])
# plt.title('Phase weighting applied to a single trace')
# plt.grid()
# plt.show()

In [None]:
# # Apply phase weighting to data for H-k stacking
# # NOTE: This will overwrite the original filtered data
# for tr in hk_quality_filt[channel]:
#     tr.data = tr.data*pw**pw_exponent

# num_traces = len(hk_quality_filt[channel])
# test_filt_mean = rf_plot_utils.plot_station_rf_overlays(hk_quality_filt, '({} traces similar to mean, phase weighted)'.format(num_traces))

# Plot HK stacks

In [None]:
# hk_src_data = hk_quality_filt
hk_src_data = hk_quality
len(hk_src_data[channel])

In [None]:
# Plot stack
weighting = (0.35, 0.35, 0.30)
# weighting = (0.5, 0.5, 0.0)

for cha in [channel]:
    k_grid, h_grid, hk_stack = rf_stacking.compute_hk_stack(hk_src_data, cha, h_range=np.linspace(20.0, 70.0, 501), root_order=2, V_p=6.4)

    # Normalize the stacked amplitudes of each phase before computing weighted sum, to ensure the
    # weights are meaningful in an absolute sense. Otherwise the weightings are relative to the mean
    # amplitude of the return of a given phase, which is somewhat arbitrary.
    for i in range(3):
        hk_stack[i, :, :] = hk_stack[i, :, :]/np.max(np.abs(hk_stack[i, :, :]))

    # Sum the phases
    hk_stack_sum = rf_stacking.compute_weighted_stack(hk_stack, weighting)

    # Raise the final sum over phases to power >1 to increase contrast
    hk_stack_sum = rf_util.signed_nth_power(hk_stack_sum, 2)
    hk_stack_sum = hk_stack_sum/np.max(hk_stack_sum[:])
    
    sta = hk_src_data[cha][0].stats.station
    num = len(hk_src_data[cha])
    save_file = None
    rf_plot_utils.plot_hk_stack(k_grid, h_grid, hk_stack_sum, title=sta + '.{}'.format(cha), num=num, save_file=save_file)
#     rf_plot_utils.plot_hk_stack(k_grid, h_grid, hk_stack[0], title=sta + '.{} Ps'.format(cha), num=num, clip_negative=False)
#     rf_plot_utils.plot_hk_stack(k_grid, h_grid, hk_stack[1], title=sta + '.{} PpPs'.format(cha), num=num, clip_negative=False)
#     rf_plot_utils.plot_hk_stack(k_grid, h_grid, hk_stack[2], title=sta + '.{} PpSs + PsPs'.format(cha), num=num, clip_negative=False)

***

# Loop over all OA stations and plot HK-stacks

In [None]:
pbar = tqdm(total=len(db))
show = False
weighting = (0.35, 0.35, 0.30)

output_folder = 'hk_stacks'
if not os.path.isdir(output_folder):
    os.mkdir(output_folder)

for sta, db_sta in db.items():
    pbar.set_description(sta)
    pbar.update()
    
    cha = rf_util.choose_rf_source_channel(rf_type, db_sta)

    # Get selected channel data
    channel_data = db_sta[cha]

#     # Compute metrics needed for classifier
#     rf_util.compute_extra_rf_stats(db_sta)
    
#     # Format channel data for classifier
#     X = np.array([[tr.stats.snr, tr.stats.entropy, tr.stats.max_coherence, tr.stats.amax,
#                    tr.stats.amp_20pc, tr.stats.amp_80pc, tr.stats.rms_amp, tr.stats.mean_cplx_amp] for tr in channel_data])
#     X[np.isnan(X)] = 0
    
#     # Predict quality class
#     predicted_quality = qc.predict(X)
#     # Compute confidence in quality class prediction
#     confidences = qc.predict_proba(X)
#     confidence_index = np.zeros(predicted_quality.shape).astype(np.int)
#     confidence_index[(predicted_quality == 'b')] = 1
#     prediction_confidence = confidences[range(confidence_index.size), confidence_index]
#     assert len(prediction_confidence) == len(predicted_quality)
#     for i, tr in enumerate(channel_data):
#         tr.stats.predicted_quality = predicted_quality[i]
#         tr.stats.prediction_confidence = prediction_confidence[i]

    # Simple quality filter instead of classifier
    rf_util.label_rf_quality_simple_amplitude(rf_type, channel_data)
    rf_stream = rf.RFStream([tr for tr in channel_data if tr.stats.predicted_quality == 'a']).sort(['back_azimuth'])
    if not rf_stream:
        continue

    rf_quality = {cha: [tr for tr in rf_stream]}

    k_grid, h_grid, hk_stack = rf_stacking.compute_hk_stack(rf_quality, cha, h_range=np.linspace(20.0, 70.0, 501), root_order=2)

    # Normalize the stacked amplitudes of each phase before computing weighted sum, to ensure the
    # weights are meaningful in an absolute sense. Otherwise the weightings are relative to the mean
    # amplitude of the return of a given phase, which is somewhat arbitrary.
    for i in range(3):
        hk_stack[i, :, :] = hk_stack[i, :, :]/np.max(np.abs(hk_stack[i, :, :]))

    # Sum the phases
    hk_stack_sum = rf_stacking.compute_weighted_stack(hk_stack, weighting)

    # Raise the final sum over phases to power >1 to increase contrast
    hk_stack_sum = rf_util.signed_nth_power(hk_stack_sum, 2)
    hk_stack_sum = hk_stack_sum/np.max(hk_stack_sum[:])

    st_code = ".".join([network, sta])
    save_file = os.path.join(output_folder, st_code + "_{}_{}_hk_stack.png".format(rf_type, cha))
    num = len(rf_quality[cha])
    rf_plot_utils.plot_hk_stack(k_grid, h_grid, hk_stack_sum, title=st_code + '_{}.{}'.format(rf_type, cha), save_file=save_file, show=show, num=num)
# end for
pbar.close()