## Notebook to optimize detection

In [37]:
from pnwstore.mseed import WaveformClient
import torch
import numpy as np
from tqdm import tqdm
import time 
import gc
import seisbench.models as sbm
from ELEP.elep.ensemble_statistics import ensemble_statistics
from ELEP.elep.ensemble_coherence import ensemble_semblance 
from ELEP.elep.trigger_func import picks_summary_simple

In [38]:
device = torch.device("cpu")

# 1. Set up the job

* Make a list of stations
* make a list pf days
* set up parallel job using Dask (ask Zoe&Yiyu)

In [39]:
twin = 6000     # length of time window
step = 3000     # step length
l_blnd, r_blnd = 500, 500

## 2. Load data

In [40]:
client = WaveformClient()

In [41]:
# Get waveforms and filter
s_J57A = client.get_waveforms(network="7D", station="J57A", channel="?H?", year=2012, month=7, day=10)
s_J57A.filter(type='bandpass',freqmin=4,freqmax=15)
s_J57A

3 Trace(s) in Stream:
7D.J57A..BH1 | 2012-07-10T00:00:00.010700Z - 2012-07-10T23:59:59.990700Z | 50.0 Hz, 4320000 samples
7D.J57A..BH2 | 2012-07-10T00:00:00.010700Z - 2012-07-10T23:59:59.990700Z | 50.0 Hz, 4320000 samples
7D.J57A..BHZ | 2012-07-10T00:00:00.010700Z - 2012-07-10T23:59:59.990700Z | 50.0 Hz, 4320000 samples

In [42]:
# Define the start time and delta 
delta = s_J57A[0].stats.delta
starttime = s_J57A[0].stats.starttime

In [43]:
# download models
pretrain_list = ["pnw","ethz","instance","scedc","stead","geofon"]
pn_pnw_model = sbm.EQTransformer.from_pretrained('pnw')
pn_ethz_model = sbm.EQTransformer.from_pretrained("ethz")
pn_instance_model = sbm.EQTransformer.from_pretrained("instance")
pn_scedc_model = sbm.EQTransformer.from_pretrained("scedc")
pn_stead_model = sbm.EQTransformer.from_pretrained("stead")
pn_geofon_model = sbm.EQTransformer.from_pretrained("geofon")

In [44]:
fs = s_J57A[0].stats.sampling_rate
dt = 1/fs

## Reshaping data 


In [45]:
sdata = np.array(s_J57A)
npts = sdata.shape[1]
nseg = int(np.ceil((npts - twin) / step)) + 1
windows = np.zeros(shape=(nseg, 3, twin), dtype= np.float32)
tap = 0.5 * (1 + np.cos(np.linspace(np.pi, 2 * np.pi, 6)))


windows_std = np.zeros(shape=(nseg, 3, twin), dtype= np.float32)
windows_max = np.zeros(shape=(nseg, 3, twin), dtype= np.float32)
windows = np.zeros(shape=(nseg, 3, twin), dtype= np.float32)
windows_idx = np.zeros(nseg, dtype=np.int32)

for iseg in range(nseg):
    idx = iseg * step
    windows[iseg, :] = sdata[:, idx:idx + twin]
    windows[iseg, :] -= np.mean(windows[iseg, :], axis=-1, keepdims=True)
    # original use std norm
    windows_std[iseg, :] = windows[iseg, :] / np.std(windows[iseg, :]) + 1e-10
    # others use max norm
    windows_max[iseg, :] = windows[iseg, :] / (np.max(np.abs(windows[iseg, :]), axis=-1, keepdims=True))
    windows_idx[iseg] = idx

# taper
windows_std[:, :, :6] *= tap; windows_std[:, :, -6:] *= tap[::-1]; 
windows_max[:, :, :6] *= tap; windows_max[:, :, -6:] *= tap[::-1];
del windows

print(f"Window data shape: {windows_std.shape}")

Window data shape: (1439, 3, 6000)


## Predict on base models

In [46]:
pretrain_list = ['original', 'ethz', 'instance', 'scedc', 'stead']

# dim 0: 0 = P, 1 = S
batch_pred = np.zeros([2, len(pretrain_list), nseg, twin], dtype = np.float32) 

for ipre, pretrain in enumerate(pretrain_list):
    t0 = time.time()
    eqt = sbm.EQTransformer.from_pretrained(pretrain)
    eqt.to(device);
    eqt._annotate_args['overlap'] = ('Overlap between prediction windows in samples \
                                    (only for window prediction models)', step)
    eqt._annotate_args['blinding'] = ('Number of prediction samples to discard on \
                                     each side of each window prediction', (l_blnd, r_blnd))
    eqt.eval();
    if pretrain == 'original':
        # batch prediction through torch model
        windows_std_tt = torch.Tensor(windows_std)
        _torch_pred = eqt(windows_std_tt.to(device))
    else:
        windows_max_tt = torch.Tensor(windows_max)
        _torch_pred = eqt(windows_max_tt.to(device))
    batch_pred[0, ipre, :] = _torch_pred[1].detach().cpu().numpy()
    batch_pred[1, ipre, :] = _torch_pred[2].detach().cpu().numpy()
    
    t1 = time.time()
    print(f"picking using [{pretrain}] model: %.3f second" % (t1 - t0))
    
# clean up memory
del _torch_pred, windows_max_tt, windows_std_tt
del windows_std, windows_max
gc.collect()
torch.cuda.empty_cache()

print(f"All prediction shape: {batch_pred.shape}")

picking using [original] model: 32.422 second
picking using [ethz] model: 18.822 second
picking using [instance] model: 17.925 second
picking using [scedc] model: 18.933 second
picking using [stead] model: 18.676 second
All prediction shape: (2, 5, 1439, 6000)


In [47]:
def stacking(data, npts, l_blnd, r_blnd):
    _data = data.copy()
    stack = np.full(npts, np.nan, dtype = np.float32)
    _data[:, :l_blnd] = np.nan; _data[:, -r_blnd:] = np.nan
    stack[:twin] = _data[0, :]
    for iseg in range(nseg-1):
        idx = step*(iseg+1)
        stack[idx:idx + twin] = \
                np.nanmax([stack[idx:idx + twin], _data[iseg+1, :]], axis = 0)
    return stack

In [48]:
pretrain_pred = np.zeros([2, len(pretrain_list), npts], dtype = np.float32)
for ipre, pretrain in enumerate(pretrain_list):
    # 0 for P-wave
    pretrain_pred[0, ipre, :] = stacking(batch_pred[0, ipre, :], npts, l_blnd, r_blnd)
    
    # 1 for S-wave
    pretrain_pred[1, ipre, :] = stacking(batch_pred[1, ipre, :], npts, l_blnd, r_blnd)

  np.nanmax([stack[idx:idx + twin], _data[iseg+1, :]], axis = 0)


In [49]:
paras_semblance = {'dt':dt, 'semblance_order':2, 'window_flag':True, 
                   'semblance_win':0.5, 'weight_flag':'max'}
p_thrd, s_thrd = 0.05, 0.05

smb_pred = np.zeros([2, nseg, twin], dtype = np.float32)

In [50]:
# calculate the semblance
## the semblance may takes a while bit to calculate
for iseg in tqdm(range(nseg)):
    # 0 for P-wave
    smb_pred[0, iseg, :] = ensemble_semblance(batch_pred[0, :, iseg, :], paras_semblance)
    
    # 1 for P-wave
    smb_pred[1, iseg, :] = ensemble_semblance(batch_pred[1, :, iseg, :], paras_semblance)

## ... and stack
# 0 for P-wave
smb_p = stacking(smb_pred[0, :], npts, l_blnd, r_blnd)

# 1 for P-wave
smb_s = stacking(smb_pred[1, :], npts, l_blnd, r_blnd)

# clean-up RAM
del smb_pred, batch_pred

100%|██████████| 1439/1439 [01:24<00:00, 17.01it/s]
  np.nanmax([stack[idx:idx + twin], _data[iseg+1, :]], axis = 0)


## Create a csv file
- Create a dictionary and the keys for the station name, network,station latitude, station longitude, depth, P, S, pick time, 
- Create a dictionary and the keys for the station name, station_network_code, station_channel_code, station_latitude_deg, station_longitude_deg, source_depth_km, P, S, pick time, 
- The keys used in the CamCat dataset in the seisbench format:  event_id,source_origin_time,source_latitude_deg,source_longitude_deg,source_type,source_depth_km,preferred_source_magnitude,preferred_source_magnitude_type,preferred_source_magnitude_uncertainty,source_depth_uncertainty_km,source_horizontal_uncertainty_km,station_network_code,station_channel_code,station_code,station_location_code,station_latitude_deg,station_longitude_deg,station_elevation_m,trace_name,trace_sampling_rate_hz,trace_start_time,trace_S_arrival_sample,trace_P_arrival_sample,trace_S_arrival_uncertainty_s,trace_P_arrival_uncertainty_s,trace_P_polarity,trace_S_onset,trace_P_onset,trace_snr_db,source_type_pnsn_label,source_local_magnitude,source_local_magnitude_uncertainty,source_duration_magnitude,source_duration_magnitude_uncertainty,source_hand_magnitude,trace_missing_channel,trace_has_offset

In [51]:
p_index = picks_summary_simple(smb_p, p_thrd)
s_index = picks_summary_simple(smb_s, s_thrd)
print(f"{len(p_index)} P picks\n{len(s_index)} S picks")

48 P picks
3 S picks
