# Detection and Phase Picking in Cascadia using ELEP

This notebook deploys in parallel a detection+phase picking job on Cascadia Initiative stations using an ensemble deep learning ELEP (Yuan et al, 2023).
Written by Hiroto Bito.
Modified by CoolTeam (2/14/24)

In [28]:
#####################################
import logging
#####################################
from obspy.clients.fdsn import Client
import numpy as np
import obspy
import matplotlib.pyplot as plt
from obspy.clients.fdsn import Client
import datetime
import pandas as pd
import dask
from dask.diagnostics import ProgressBar

from obspy.clients.fdsn.client import Client
from pnwstore.mseed import WaveformClient
import torch
import numpy as np
from tqdm import tqdm
import time 
import pandas as pd
import gc
import seisbench.models as sbm
from ELEP.elep.ensemble_statistics import ensemble_statistics
from ELEP.elep.ensemble_coherence import ensemble_semblance 
from ELEP.elep.trigger_func import picks_summary_simple

In [29]:
device = torch.device("cpu")

In [30]:
# Define clients
client_inventory = Client('IRIS')
client_waveform = WaveformClient()
client_ncedc = Client('NCEDC')

## Make an array of networks and stations

In [31]:
# Read Morton's catalog
cat_ds03 = pd.read_excel("../data/ds03.xlsx")

In [32]:
# Show the catalog
cat_ds03

Unnamed: 0,CI YEAR,CLUSTER ID,STATION,NETWORK CODE,FILTER USED (Hz)
0,1,OC1,J25A,7D,HP 5
1,1,,M08A,7D,HP 5
2,1,,I02D,TA,HP 5
3,1,OC2,J25A,7D,BP 5-10
4,1,,J33A,7D,BP 5-15
...,...,...,...,...,...
247,4,CI4-45,FC03D,7D,BP 1-15
248,4,,HEBO,UW,BP 2-10
249,4,CI4-47,NLWA,US,HP 3
250,4,CI4-48,OCP,UW,BP 3-10


In [33]:
# Get the names of the networks
cat_ds03_CI2 = cat_ds03[cat_ds03['CI YEAR']==2]
cat_ds03_CI2

Unnamed: 0,CI YEAR,CLUSTER ID,STATION,NETWORK CODE,FILTER USED (Hz)
49,2,SM,I02D,TA,HP 5
50,2,,M09B,7D,BP 5-10
51,2,,J25B,7D,HP 5
52,2,Nclust,J25B,7D,HP5
53,2,,J33B,7D,BP 2-10
...,...,...,...,...,...
111,2,,KEB,NC,BP 3-10
112,2,CI2-27,J09B,7D,HP 5
113,2,,J01E,TA,HP 5
114,2,CI2-28,BABR,UW,HP 5


In [34]:
# Assign the network codes
networks= cat_ds03_CI2['NETWORK CODE'].unique()

In [35]:
# Make a list of networks and stations
networks_stas = []
for i in range(len(networks)):
    network = networks[i]
    sta = cat_ds03_CI2.loc[cat_ds03_CI2['NETWORK CODE']==network, 'STATION']
    
    sta = [i.replace(" ", "") for i in sta]
    sta = set(sta)
    sta =list(sta)

    for j in range(len(sta)):
        networks_stas.append([network,sta[j]])

networks_stas =np.array(networks_stas)
networks_stas

array([['TA', 'I02D'],
       ['TA', 'J01E'],
       ['7D', 'FS16B'],
       ['7D', 'J33B'],
       ['7D', 'M14B'],
       ['7D', 'FS20B'],
       ['7D', 'G26B'],
       ['7D', 'M09B'],
       ['7D', 'FS17B'],
       ['7D', 'G27B'],
       ['7D', 'M12B'],
       ['7D', 'FS13B'],
       ['7D', 'J09B'],
       ['7D', 'G17B'],
       ['7D', 'J25B'],
       ['7D', 'G19B'],
       ['7D', 'FS09B'],
       ['7D', 'G10B'],
       ['7D', 'G25B'],
       ['UW', 'I02D'],
       ['UW', 'BABR'],
       ['NC', 'KBO'],
       ['NC', 'KOM'],
       ['NC', 'KRMB'],
       ['NC', 'KHMB'],
       ['NC', 'KHBB'],
       ['NC', 'KSXB'],
       ['NC', 'KEB'],
       ['NC', 'KMPB'],
       ['BK', 'JCC']], dtype='<U5')

In [36]:
print(len(networks_stas))

30


In [37]:
index =[]
client= client_inventory
for i, ii in enumerate(networks_stas):
    print(networks_stas[i])
    index.append(i)
    inventory = client.get_stations(network=networks_stas[i][0], station=networks_stas[i][1])
    

['TA' 'I02D']
['TA' 'J01E']
['7D' 'FS16B']
['7D' 'J33B']
['7D' 'M14B']
['7D' 'FS20B']
['7D' 'G26B']
['7D' 'M09B']
['7D' 'FS17B']
['7D' 'G27B']
['7D' 'M12B']
['7D' 'FS13B']
['7D' 'J09B']
['7D' 'G17B']
['7D' 'J25B']
['7D' 'G19B']
['7D' 'FS09B']
['7D' 'G10B']
['7D' 'G25B']
['UW' 'I02D']


FDSNNoDataException: No data available for request.
HTTP Status code: 204
Detailed response of server:



In [38]:
index

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]

In [39]:
# Since the ['UW' 'I02D'] pair give an error when receiveing their data through inventory, remove this pair.
networks_stas = np.delete(networks_stas, index[-1],axis=0)
networks_stas

array([['TA', 'I02D'],
       ['TA', 'J01E'],
       ['7D', 'FS16B'],
       ['7D', 'J33B'],
       ['7D', 'M14B'],
       ['7D', 'FS20B'],
       ['7D', 'G26B'],
       ['7D', 'M09B'],
       ['7D', 'FS17B'],
       ['7D', 'G27B'],
       ['7D', 'M12B'],
       ['7D', 'FS13B'],
       ['7D', 'J09B'],
       ['7D', 'G17B'],
       ['7D', 'J25B'],
       ['7D', 'G19B'],
       ['7D', 'FS09B'],
       ['7D', 'G10B'],
       ['7D', 'G25B'],
       ['UW', 'BABR'],
       ['NC', 'KBO'],
       ['NC', 'KOM'],
       ['NC', 'KRMB'],
       ['NC', 'KHMB'],
       ['NC', 'KHBB'],
       ['NC', 'KSXB'],
       ['NC', 'KEB'],
       ['NC', 'KMPB'],
       ['BK', 'JCC']], dtype='<U5')

In [40]:
# Test if the array removed the  ['UW' 'I02D'] works through inventory this time. It works.
index =[]
client= client_inventory
for i, ii in enumerate(networks_stas):
    print(networks_stas[i])
    index.append(i)
    inventory = client.get_stations(network=networks_stas[i][0], station=networks_stas[i][1])

['TA' 'I02D']
['TA' 'J01E']
['7D' 'FS16B']
['7D' 'J33B']
['7D' 'M14B']
['7D' 'FS20B']
['7D' 'G26B']
['7D' 'M09B']
['7D' 'FS17B']
['7D' 'G27B']
['7D' 'M12B']
['7D' 'FS13B']
['7D' 'J09B']
['7D' 'G17B']
['7D' 'J25B']
['7D' 'G19B']
['7D' 'FS09B']
['7D' 'G10B']
['7D' 'G25B']
['UW' 'BABR']
['NC' 'KBO']
['NC' 'KOM']
['NC' 'KRMB']
['NC' 'KHMB']
['NC' 'KHBB']
['NC' 'KSXB']
['NC' 'KEB']
['NC' 'KMPB']
['BK' 'JCC']


In [41]:
len(networks_stas)

29

## Actual Picking

In [42]:
# Now create your list of days to loop over!
t1 = datetime.datetime(year=2012,month=10,day=1,hour=0,minute=0,second=0,microsecond=0)
t2 = datetime.datetime(year=2012,month=10,day=31,hour=0,minute=0,second=0,microsecond=0)
time_bins = pd.to_datetime(np.arange(t1,t2+datetime.timedelta(days=1),pd.Timedelta(1,'days')))

In [19]:
import os
os.makedirs("../data/catalogs_elep_all_stations_20121001_to_20121031",exist_ok=True)

In [43]:
# Specify some parameters - you can change what you specify here vs. within the large function, this is just an example.
# Depending on whether the pertained models take a long time to load every time, you may want to load those outside the function and just feed them to the function rather than loading them every time in parallel.
twin = 6000     # length of time window
step = 3000     # step length
l_blnd, r_blnd = 500, 500

###########################
# This is not a valid file path to save files. 
filepath = "../data/catalogs_elep_all_stations_20121001_to_20121031_temp/"

# If we define:
station = "M14B"
# t1 = datetime(2012,10,1)
tstring = t1.strftime('%Y%m%d')

# You use this arguments below as (it should be filepath, not file_path)
file_name = filepath+station+'_'+tstring+'.csv'

# Then your file name will be:
print(file_name)
#https://cascadia.ess.washington.edu/jhub/user/hbito/notebooks/elep-test/surface_events/src7D_20121001.csv

# 1. There is a missing "/" after src in filepath.
# 2. This string beginning with https:// is a web link. You can replace filepath as:
# filepath = "/home/hbito/elep-test/surface_events/src/"
# 3. it may not be recommended to save data product to a src (source) folder. 
#    People usually make other directories to save those final files. 
#    Just a notice: not our top prioirty though.
###########################

../data/catalogs_elep_all_stations_20121001_to_20121031_temp/M14B_20121001.csv


In [44]:
# download models
pretrain_list = ["pnw","ethz","instance","scedc","stead","geofon"]
pn_pnw_model = sbm.EQTransformer.from_pretrained('pnw')
pn_ethz_model = sbm.EQTransformer.from_pretrained("ethz")
pn_instance_model = sbm.EQTransformer.from_pretrained("instance")
pn_scedc_model = sbm.EQTransformer.from_pretrained("scedc")
pn_stead_model = sbm.EQTransformer.from_pretrained("stead")
pn_geofon_model = sbm.EQTransformer.from_pretrained("geofon")

In [45]:
# Define the function for stacking the segmented time windows after prediction
##################################### add a nseg argument here
# def stacking(data, npts, l_blnd, r_blnd):
def stacking(data, npts, l_blnd, r_blnd, nseg):
#####################################
    _data = data.copy()
    stack = np.full(npts, np.nan, dtype = np.float32)
    _data[:, :l_blnd] = np.nan; _data[:, -r_blnd:] = np.nan
    stack[:twin] = _data[0, :]
    for iseg in range(nseg-1):
        idx = step*(iseg+1)
        stack[idx:idx + twin] = \
                np.nanmax([stack[idx:idx + twin], _data[iseg+1, :]], axis = 0)
    return stack

In [55]:
# Trying to debug the error when 
sdata = client_ncedc.get_waveforms(network='7D', station="FS13B", channel="?H?",starttime=t1, endtime=t2) 

TypeError: get_waveforms() missing 1 required positional argument: 'location'

In [54]:
# client_waveform = WaveformClient()
sdata = client_waveform.get_waveforms(network='7D', station="FS13B", channel="?H?", 
                                          year=t1.strftime('%Y'), month=t1.strftime('%m'), 
                                          day=t1.strftime('%d'))

In [46]:
# Write your function that you want to run in parallel: I recommend you design this to essentially perform your entire workflow on one station for one day, and write a csv file for that station, much the way you already have it.
# This is what will run in parallel!
# So, the only inputs are the station name, the start and end times you want to detect for, the path of the folder you want to write the results to, and the parameters you already specified. Here is where you could also feed in the preloaded models if that becomes important.
def run_detection(network,station,t1,t2,filepath,twin,step,l_blnd,r_blnd):
    # Define tstring
    tstring = t1.strftime('%Y%m%d')

    if os.path.exists(filepath+station+'_'+tstring+'.csv'):
        return 
	# Load data
	# Reshape data
	# Predict on base models
	# Stack
	# Create and write csv file. Define file name using the station code and the input filepath
    
    # Get the inventory for the stations
    ###############################
    #stations = station           # Seems you never use this "stations" again
                                  # so you can just use "station"
    ###############################
    network = network
#     channels = '[HB][HN][BH]?'
#     channels = 'HH?,HN?,BH?' 
    channels = '?H?'
    
    client = client_inventory
    inventory = client.get_stations(network=network, station=station)
    
    # Get waveforms and filter
    ###############################
   #sdata = client_waveform.get_waveforms(network="7D", station=station, channel="BH?", starttime=t1, 
   #                                      year=t1.strftime('%Y'), month=t1.strftime('%m'), 
   #                                      day=t1.strftime('%d'))
   # You've already defined stations, network, channels above, so you can use them here.
   # You don't need "starttime = t1" argument.
    if network == 'NC' or 'BK':
        sdata = client_ncedc.get_waveforms(network=network, station=station, channel=channels, 
                                          year=t1.strftime('%Y'), month=t1.strftime('%m'), 
                                          day=t1.strftime('%d')) 
    else: 
        sdata = client_waveform.get_waveforms(network=network, station=station, channel=channels, 
                                          year=t1.strftime('%Y'), month=t1.strftime('%m'), 
                                          day=t1.strftime('%d'))
    
    
    sdata = sdata.select(channel = "[HB]H?")
    ###############################
    # If no data returned, skipping
    if len(sdata) == 0:
        logging.warning("No stream returned. Skipping.")
        return
    ###############################
    
    sdata.filter(type='bandpass',freqmin=4,freqmax=15)
    
    ###############################
    sdata.merge(fill_value='interpolate') # fill gaps if there are any.
    ###############################

    # Get the necassary information about the station
    delta = sdata[0].stats.delta
    starttime = sdata[0].stats.starttime
    fs = sdata[0].stats.sampling_rate
    dt = 1/fs
    

    # Make all the traces in the stream have the same lengths
    max_starttime = max([tr.stats.starttime for tr in sdata])
    min_endtime = min([tr.stats.endtime for tr in sdata])
    
    for tr in sdata:
        tr.trim(starttime=max_starttime,endtime=min_endtime, nearest_sample=True)    
        
    # Reshaping data
    arr_sdata = np.array(sdata)
    npts = arr_sdata.shape[1]
    ############################### avoiding errors at the end of a stream
   #nseg = int(np.ceil((npts - twin) / step)) + 1
    nseg = int(np.floor((npts - twin) / step)) + 1
    ###############################
    windows = np.zeros(shape=(nseg, 3, twin), dtype= np.float32)
    tap = 0.5 * (1 + np.cos(np.linspace(np.pi, 2 * np.pi, 6)))
    
    # Define the parameters for semblance
    paras_semblance = {'dt':dt, 'semblance_order':2, 'window_flag':True, 
                   'semblance_win':0.5, 'weight_flag':'max'}
    p_thrd, s_thrd = 0.05, 0.05

    windows_std = np.zeros(shape=(nseg, 3, twin), dtype= np.float32)
    windows_max = np.zeros(shape=(nseg, 3, twin), dtype= np.float32)
    windows = np.zeros(shape=(nseg, 3, twin), dtype= np.float32)
    windows_idx = np.zeros(nseg, dtype=np.int32)

    for iseg in range(nseg):
        idx = iseg * step
        windows[iseg, :] = arr_sdata[:, idx:idx + twin]
        windows[iseg, :] -= np.mean(windows[iseg, :], axis=-1, keepdims=True)
        # original use std norm
        windows_std[iseg, :] = windows[iseg, :] / np.std(windows[iseg, :]) + 1e-10
        # others use max norm
        windows_max[iseg, :] = windows[iseg, :] / (np.max(np.abs(windows[iseg, :]), axis=-1, keepdims=True))
        windows_idx[iseg] = idx

    # taper
    windows_std[:, :, :6] *= tap; windows_std[:, :, -6:] *= tap[::-1]; 
    windows_max[:, :, :6] *= tap; windows_max[:, :, -6:] *= tap[::-1];
    del windows

#     print(f"Window data shape: {windows_std.shape}")
    
    # Predict on base models
    
    pretrain_list = ['original', 'ethz', 'instance', 'scedc', 'stead']

    # dim 0: 0 = P, 1 = S
    batch_pred = np.zeros([2, len(pretrain_list), nseg, twin], dtype = np.float32) 
    for ipre, pretrain in enumerate(pretrain_list):
        t0 = time.time()
        eqt = sbm.EQTransformer.from_pretrained(pretrain)
        eqt.to(device);
        eqt._annotate_args['overlap'] = ('Overlap between prediction windows in samples \
                                        (only for window prediction models)', step)
        eqt._annotate_args['blinding'] = ('Number of prediction samples to discard on \
                                         each side of each window prediction', (l_blnd, r_blnd))
        eqt.eval();
        if pretrain == 'original':
            # batch prediction through torch model
            windows_std_tt = torch.Tensor(windows_std)
            _torch_pred = eqt(windows_std_tt.to(device))
        else:
            windows_max_tt = torch.Tensor(windows_max)
            _torch_pred = eqt(windows_max_tt.to(device))
        batch_pred[0, ipre, :] = _torch_pred[1].detach().cpu().numpy()
        batch_pred[1, ipre, :] = _torch_pred[2].detach().cpu().numpy()

    # clean up memory
    del _torch_pred, windows_max_tt, windows_std_tt
    del windows_std, windows_max
    gc.collect()
    torch.cuda.empty_cache()

    print(f"All prediction shape: {batch_pred.shape}")
    
    ####################### You don't need this
#     pretrain_pred = np.zeros([2, len(pretrain_list), npts], dtype = np.float32)
#     for ipre, pretrain in enumerate(pretrain_list):
#        # 0 for P-wave
#         pretrain_pred[0, ipre, :] = stacking(batch_pred[0, ipre, :], npts, l_blnd, r_blnd)
# 
#        # 1 for S-wave
#        pretrain_pred[1, ipre, :] = stacking(batch_pred[1, ipre, :], npts, l_blnd, r_blnd)
    ####################### You don't need this
    
    smb_pred = np.zeros([2, nseg, twin], dtype = np.float32)
    # calculate the semblance
    ## the semblance may takes a while bit to calculate
    
    ############################# remove tqdm (extra progress bar)
#     for iseg in tqdm(range(nseg)):
    for iseg in range(nseg):
    #############################
        # 0 for P-wave
        smb_pred[0, iseg, :] = ensemble_semblance(batch_pred[0, :, iseg, :], paras_semblance)

        # 1 for P-wave
        smb_pred[1, iseg, :] = ensemble_semblance(batch_pred[1, :, iseg, :], paras_semblance)

    ## ... and stack
    # 0 for P-wave
    ####################### add a nseg argument here
    #smb_p = stacking(smb_pred[0, :], npts, l_blnd, r_blnd)
    smb_p = stacking(smb_pred[0, :], npts, l_blnd, r_blnd, nseg)

    # 1 for P-wave
    #smb_s = stacking(smb_pred[1, :], npts, l_blnd, r_blnd)
    smb_s = stacking(smb_pred[1, :], npts, l_blnd, r_blnd, nseg)
    #######################
    # clean-up RAM
    del smb_pred, batch_pred

    p_index = picks_summary_simple(smb_p, p_thrd)
    s_index = picks_summary_simple(smb_s, s_thrd)
    print(f"{len(p_index)} P picks\n{len(s_index)} S picks")
    
    # Create lists and a data frame
    event_id = []
    source_type = []
    station_network_code = []
    station_channel_code = []
    station_code = []
    station_location_code = []
    station_latitude_deg= []
    station_longitude_deg = []
    station_elevation_m = []
    trace_name = []
    trace_sampling_rate_hz = []
    trace_start_time = []
    trace_S_arrival_sample = []
    trace_P_arrival_sample = []
    trace_S_onset = []
    trace_P_onset = []
    trace_snr_db = []
    trace_p_arrival = []
    trace_s_arrival = []

    for i, idx in enumerate(p_index):
        event_id.append(' ')
        source_type.append(' ')
        station_network_code.append(network)   # Change to otehr networks
        station_channel_code.append(' ')
        station_code.append(station)
        station_location_code.append(sdata[0].stats.location)   
        station_latitude_deg.append(inventory[0][0].latitude)
        station_longitude_deg.append(inventory[0][0].longitude)   
        station_elevation_m.append(inventory[0][0].elevation)
        trace_name.append(' ')
        trace_sampling_rate_hz.append(sdata[0].stats.sampling_rate)
        trace_start_time.append(sdata[0].stats.starttime)
        trace_S_arrival_sample.append(' ')
        trace_P_arrival_sample.append(' ')
        trace_S_onset.append(' ')
        trace_P_onset.append(' ')
        trace_snr_db.append(' ')
        trace_s_arrival.append(np.nan)
        trace_p_arrival.append(str(starttime  + idx * delta))

    for i, idx in enumerate(s_index):
        event_id.append(' ')
        source_type.append(' ')
        station_network_code.append(network) # Change to otehr networks
        station_channel_code.append(' ')
        station_code.append(station)
        station_location_code.append(sdata[0].stats.location)   
        station_latitude_deg.append(inventory[0][0].latitude)
        station_longitude_deg.append(inventory[0][0].longitude)   
        station_elevation_m.append(inventory[0][0].elevation)
        trace_name.append(' ')
        trace_sampling_rate_hz.append(sdata[0].stats.sampling_rate)
        trace_start_time.append(sdata[0].stats.starttime)
        trace_S_arrival_sample.append(' ')
        trace_P_arrival_sample.append(' ')
        trace_S_onset.append(' ')
        trace_P_onset.append(' ')
        trace_snr_db.append(' ')
        trace_s_arrival.append(str(starttime  + idx * delta))
        trace_p_arrival.append(np.nan)

    # dictionary of lists
    dict = {'event_id':event_id,'source_type':source_type,'station_network_code':station_network_code,\
            'station_channel_code':station_channel_code,'station_code':station_code,'station_location_code':station_location_code,\
            'station_latitude_deg':station_latitude_deg,'station_longitude_deg':station_longitude_deg, \
            'station_elevation_m':station_elevation_m,'trace_name':trace_name,'trace_sampling_rate_hz':trace_sampling_rate_hz,\
            'trace_start_time':trace_start_time,'trace_S_arrival_sample':trace_S_arrival_sample,\
            'trace_P_arrival_sample':trace_P_arrival_sample, 'trace_S_onset':trace_S_onset,'trace_P_onset':trace_P_onset,\
            'trace_snr_db':trace_snr_db, 'trace_s_arrival':trace_s_arrival, 'trace_p_arrival':trace_p_arrival}

    df = pd.DataFrame(dict)

    # Make the specific day into a string:
    tstring = t1.strftime('%Y%m%d')
    # Build the full file name:
    ##################################################
#     file_name = file_path+station+'_'+tstring+'.csv'
    file_name = filepath+station+'_'+tstring+'.csv'
    ##################################################
    # Write to file using that name
    df.to_csv(file_name)

In [47]:
# Combine that list of days with the list of stations
# We are essentially creating a list of the number of tasks we have to do with the information that is unique to each task; we will do them in parallel
task_list = []
for i in range(len(networks_stas)):
	for t in time_bins:
		task_list.append([networks_stas[i][0], networks_stas[i][1],t])

In [48]:
task_list

[['TA', 'I02D', Timestamp('2012-10-01 00:00:00')],
 ['TA', 'I02D', Timestamp('2012-10-02 00:00:00')],
 ['TA', 'I02D', Timestamp('2012-10-03 00:00:00')],
 ['TA', 'I02D', Timestamp('2012-10-04 00:00:00')],
 ['TA', 'I02D', Timestamp('2012-10-05 00:00:00')],
 ['TA', 'I02D', Timestamp('2012-10-06 00:00:00')],
 ['TA', 'I02D', Timestamp('2012-10-07 00:00:00')],
 ['TA', 'I02D', Timestamp('2012-10-08 00:00:00')],
 ['TA', 'I02D', Timestamp('2012-10-09 00:00:00')],
 ['TA', 'I02D', Timestamp('2012-10-10 00:00:00')],
 ['TA', 'I02D', Timestamp('2012-10-11 00:00:00')],
 ['TA', 'I02D', Timestamp('2012-10-12 00:00:00')],
 ['TA', 'I02D', Timestamp('2012-10-13 00:00:00')],
 ['TA', 'I02D', Timestamp('2012-10-14 00:00:00')],
 ['TA', 'I02D', Timestamp('2012-10-15 00:00:00')],
 ['TA', 'I02D', Timestamp('2012-10-16 00:00:00')],
 ['TA', 'I02D', Timestamp('2012-10-17 00:00:00')],
 ['TA', 'I02D', Timestamp('2012-10-18 00:00:00')],
 ['TA', 'I02D', Timestamp('2012-10-19 00:00:00')],
 ['TA', 'I02D', Timestamp('2012

In [49]:
# Now we start setting up a parallel operation using a package called Dask.

# Start by writing a new a function that is specifically designed to be run in parallel through dask. All it essentially does is define the inputs to the larger run_detection function and then runs the function itself, but because we "decorate" it with @dask.delayed to start, the code will recognize that it should be run in parallel.

@dask.delayed
def loop_days(task,filepath,twin,step,l_blnd,r_blnd):

	# Define the parameters that are specific to each task
	t1 = obspy.UTCDateTime(task[2])
	t2 = obspy.UTCDateTime(t1 + pd.Timedelta(1,'days'))
	network = task[0]
	station = task[1]
    
    #print network and station
	print([network,station])
	# Call to the function that will perform the operation and write the results to file
	run_detection(network,station,t1,t2,filepath,twin,step,l_blnd,r_blnd)
	

# Now we set up the parallel operation
# The below builds a framework for the computer to run in parallel. This doesn't actually execute anything.
lazy_results = [loop_days(task,filepath,twin,step,l_blnd,r_blnd) for task in task_list]
    

# The below actually executes the parallel operation!
# It's nice to do it with the ProgressBar so you can see how long things are taking.
# Each operation should also write a file so that is another way to check on progress.
with ProgressBar():
    #################################
    # Add scheduler = 'single-threaded'
	dask.compute(lazy_results, scheduler='single-threaded') 
    

[                                        ] | 0% Completed | 275.74 us['7D', 'M14B']
['NC', 'KBO']
[                                        ] | 0% Completed | 263.82 ms


TypeError: get_waveforms() missing 3 required positional arguments: 'location', 'starttime', and 'endtime'

In [27]:
# Now we start setting up a parallel operation using a package called Dask.

# Start by writing a new a function that is specifically designed to be run in parallel through dask. All it essentially does is define the inputs to the larger run_detection function and then runs the function itself, but because we "decorate" it with @dask.delayed to start, the code will recognize that it should be run in parallel.

@dask.delayed
def loop_days(task,filepath,twin,step,l_blnd,r_blnd):

    # Define the parameters that are specific to each task
    t1 = obspy.UTCDateTime(task[2])
    t2 = obspy.UTCDateTime(t1 + pd.Timedelta(1,'days'))
    network = task[0]
    station = task[1]

    #print network and station
    print([network,station])
    # Call to the function that will perform the operation and write the results to file
    try: 
        run_detection(network,station,t1,t2,filepath,twin,step,l_blnd,r_blnd)
    except:
        return


# Now we set up the parallel operation
# The below builds a framework for the computer to run in parallel. This doesn't actually execute anything.
lazy_results = [loop_days(task,filepath,twin,step,l_blnd,r_blnd) for task in task_list]
    

# The below actually executes the parallel operation!
# It's nice to do it with the ProgressBar so you can see how long things are taking.
# Each operation should also write a file so that is another way to check on progress.
with ProgressBar():
    #################################
    # Add scheduler = 'single-threaded'
	dask.compute(lazy_results, scheduler='single-threaded') 
    

[                                        ] | 0% Completed | 205.16 us['7D', 'M12B']
['7D', 'J33B']
['7D', 'FS20B']
['NC', 'KOM']
['UW', 'BABR']
['NC', 'KBO']
[                                        ] | 0% Completed | 105.94 ms['NC', 'KHMB']
['UW', 'BABR']
['7D', 'G19B']
['7D', 'FS16B']
['7D', 'G26B']
['7D', 'M09B']
['TA', 'I02D']
['7D', 'G27B']
['7D', 'FS13B']
['NC', 'KRMB']
[                                        ] | 1% Completed | 206.53 ms['NC', 'KSXB']
[                                        ] | 1% Completed | 306.86 ms['NC', 'KOM']
['7D', 'G27B']
['7D', 'J33B']
['7D', 'G25B']
['7D', 'FS13B']
['NC', 'KRMB']
[                                        ] | 2% Completed | 407.35 ms['7D', 'J33B']
['7D', 'G26B']
['7D', 'M12B']
['NC', 'KMPB']
[#                                       ] | 2% Completed | 507.66 ms['NC', 'KBO']
[#                                       ] | 3% Completed | 608.02 ms['7D', 'G26B']
['7D', 'FS09B']
['NC', 'KRMB']
['NC', 'KBO']
[#                                   



['7D', 'G19B']
['7D', 'J09B']
['BK', 'JCC']
[##                                      ] | 6% Completed | 2.72 s



['7D', 'G17B']
['NC', 'KOM']
['7D', 'FS17B']
['NC', 'KRMB']
[##                                      ] | 6% Completed | 2.82 s['7D', 'G27B']
['NC', 'KRMB']
['NC', 'KBO']
[##                                      ] | 6% Completed | 2.92 s['7D', 'J09B']
['NC', 'KOM']
[##                                      ] | 7% Completed | 3.02 s['7D', 'FS17B']
['BK', 'JCC']




['7D', 'FS20B']
['7D', 'G27B']
['7D', 'G27B']
['7D', 'FS17B']
['7D', 'J25B']
['NC', 'KOM']
[###                                     ] | 8% Completed | 3.12 s['NC', 'KEB']
['7D', 'G26B']
['TA', 'J01E']
[###                                     ] | 8% Completed | 3.22 s



['7D', 'G10B']
['7D', 'M14B']
['NC', 'KRMB']
[###                                     ] | 8% Completed | 3.32 s['NC', 'KRMB']
['7D', 'G10B']
['7D', 'J25B']
['NC', 'KRMB']
[###                                     ] | 9% Completed | 3.42 s['7D', 'G10B']
['7D', 'G25B']
['7D', 'FS09B']
['7D', 'FS13B']
['7D', 'J09B']
['7D', 'J33B']
['7D', 'J09B']
['NC', 'KHMB']
[####                                    ] | 10% Completed | 3.53 s['NC', 'KEB']
['7D', 'G19B']
['UW', 'BABR']
['7D', 'G17B']
['NC', 'KSXB']
[####                                    ] | 10% Completed | 3.63 s['7D', 'FS20B']
['NC', 'KBO']
['7D', 'M12B']
['NC', 'KMPB']
[####                                    ] | 11% Completed | 3.73 s['NC', 'KSXB']
['TA', 'I02D']
['TA', 'I02D']
['TA', 'J01E']
[####                                    ] | 11% Completed | 3.83 s['7D', 'FS17B']
['7D', 'FS17B']
['7D', 'FS09B']
['7D', 'FS09B']
['NC', 'KOM']
['7D', 'FS16B']
['7D', 'M12B']
[####                                    ] | 12% Completed | 3.93 s['7

['7D', 'G17B']
['7D', 'M09B']
['NC', 'KHMB']
[#################                       ] | 42% Completed | 8.76 s['TA', 'J01E']
['7D', 'G10B']
['7D', 'FS17B']
['TA', 'I02D']
['NC', 'KEB']
['7D', 'M12B']
['TA', 'I02D']
['NC', 'KMPB']
[#################                       ] | 43% Completed | 8.86 s['NC', 'KRMB']
[#################                       ] | 43% Completed | 8.96 s['NC', 'KRMB']
['7D', 'FS20B']
['7D', 'FS13B']
['7D', 'G25B']
['NC', 'KHMB']
[#################                       ] | 44% Completed | 9.06 s['7D', 'G10B']
['7D', 'J09B']
['NC', 'KOM']
[#################                       ] | 44% Completed | 9.16 s['7D', 'FS20B']
['TA', 'J01E']
['7D', 'G10B']
['7D', 'G17B']
['7D', 'J33B']
['7D', 'G27B']
['7D', 'FS13B']
['7D', 'J09B']
['7D', 'G25B']
['TA', 'I02D']
['7D', 'FS17B']
['BK', 'JCC']
[##################                      ] | 45% Completed | 9.26 s['7D', 'FS17B']
['7D', 'G25B']
['7D', 'G10B']
['7D', 'M09B']
['7D', 'FS13B']
['7D', 'G19B']
['NC', 'KSXB']
[#######

[#############################           ] | 73% Completed | 14.59 s['7D', 'J33B']
['7D', 'FS16B']
['7D', 'M14B']
['7D', 'J09B']
['7D', 'G17B']
['7D', 'G26B']
['NC', 'KMPB']
['UW', 'BABR']
['TA', 'I02D']
['TA', 'I02D']
['7D', 'FS16B']
['NC', 'KOM']
[##############################          ] | 75% Completed | 14.70 s['BK', 'JCC']
['7D', 'G10B']
['7D', 'G10B']
['TA', 'I02D']
['NC', 'KSXB']
[##############################          ] | 75% Completed | 14.80 s['7D', 'G19B']
['7D', 'FS09B']
['7D', 'FS13B']
['7D', 'G26B']
['7D', 'J33B']
['NC', 'KSXB']
[##############################          ] | 76% Completed | 14.90 s['7D', 'J33B']
['7D', 'FS09B']
['7D', 'FS20B']
['7D', 'M09B']
['UW', 'BABR']
['7D', 'M12B']
['7D', 'G25B']
['NC', 'KEB']
['7D', 'G19B']
['NC', 'KBO']
[###############################         ] | 77% Completed | 15.00 s['7D', 'G26B']
['7D', 'G26B']
['UW', 'BABR']
['7D', 'M12B']
['7D', 'FS09B']
['BK', 'JCC']
[###############################         ] | 78% Completed | 15.10 s['NC'