#  Sort Through Detections and Only Keep Certain Ones

### Set Up

Import Everything

In [None]:
import pandas as pd
import numpy as np
import obspy
from obspy import UTCDateTime
from obspy.clients.fdsn import Client
import matplotlib.pyplot as plt
from time import time
from glob import glob
from obspy.signal.trigger import classic_sta_lta, plot_trigger, trigger_onset
import csv
import re

from obspy.core.utcdatetime import UTCDateTime

Set Parameters

In [None]:
ta = 13 #time in seconds after detection time
# 13 because the templates are 13 seconds long, any longer and it could find the wrong signal
tb = 10 #time in seconds before detection time
# 10 because there needs to be 10 seconds of time before the signal for sta/lta to calibrate
fs = 40 #sampling rate
fqmin = 1 #minimum frequency for bandpass filter
fqmax = 10 #maximum frequency for bandpass filter
thr_on = 1.35 #threshold to turn the signal trigger on
thr_off = 0.75 #threshold to turn the signal trigger off
year = 2019 #year of detections to clean
nsta = 5. #length of short window for signal detection in seconds
nlta = 10. #length of long window for signal detection in seconds
pr = 98 #percentile for SNR
rpwi = 15 #time in seconds before and after REDpy catalog datetimes to exclude detections from, window length=2*rpwi
homedir = '/home/smocz/redpy_expand_new_files/' #home directory or directory to save new files to
datadir = '/data/wsd01/HOOD_data/UW/'+str(year)+'/' #directory to get data from

Read the REDpy Catalogs and Volcano Metadata

In [None]:
Baker = pd.read_csv('Baker_catalog.csv')
Hood = pd.read_csv('Hood_catalog.csv')


St_Helens = pd.read_csv('MountStHelens_catalog.csv')

# Combining borehole and local catalogs with St_Helens

Helens_Borehole = pd.read_csv('MSHborehole_catalog.csv')
Helens_Borehole['Clustered'] += 2000 
# Cluster 0 in Helens_Borehole is now Cluster 2000 in St_Helens
Helens_Local = pd.read_csv('MSHlocal_catalog.csv')
Helens_Local['Clustered'] += 3000
# Cluster 0 in Helens_Local is now Cluster 3000 in St_Helens

# Use St_Helens to access all three St Helens catalogs
St_Helens = pd.concat([St_Helens,Helens_Borehole,Helens_Local])

Newberry = pd.read_csv('Newberry_catalog.csv')
Rainier = pd.read_csv('Rainier_catalog.csv')

volc_md = pd.read_csv('Volcano_Metadata.csv')
# read metadata file to create dataframe of labels

Use Volcano Metadata to Create Lists of Stations for Each Volcano

In [None]:
Baker_sta = volc_md[volc_md['Volcano_Name'] == 'Mt_Baker']['Station'].values.tolist()
Hood_sta = volc_md[volc_md['Volcano_Name'] == 'Mt_Hood']['Station'].values.tolist() 
St_Helens_sta = volc_md[volc_md['Volcano_Name'] == 'Mt_St_Helens']['Station'].values.tolist()
Newberry_sta = volc_md[volc_md['Volcano_Name'] == 'Newberry']['Station'].values.tolist() 
Rainier_sta = volc_md[volc_md['Volcano_Name'] == 'Mt_Rainier']['Station'].values.tolist()

Create Lists of Volcano Information

In [None]:
#enumerate [0,1,2,3,4]
volc_list = [Baker,Hood,Newberry,Rainier,St_Helens] # list of dataframes for each volcano
volc_list_names = ['Baker','Hood','Newberry','Rainier','St_Helens'] # list of names of each volcano
volc_sta = [Baker_sta,Hood_sta,Newberry_sta,Rainier_sta,St_Helens_sta] # lists of stations connected to respective volcanoes

### Sort Detections - Jul 28, 2022

Updated 28, not tested yet - test on Hood 2019 detections from siletzia

In [None]:
#make separate versions for each volcano
# for vv,v in enumerate(volc_sta):
v = Baker_sta
vv = 0
for s in range(0,len(v)): 
    try:
        read = pd.read_csv(homedir+'detections/'+volc_list_names[vv]+'_'+v[s]+'_'+year+'_detections.csv')
    except:
        print('No detections for',v[s])
        continue
        
    with open(homedir+'detections/'+volc_list_names[vv]+'_'+v[s]+'_'+str(year)+
          '_clean_detections.csv', 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Template_Name","Detection_Time", "Trigger_on", "Trigger_off","SNR"])
        file.close()
        
    #Make list of all clusters that have a template
    temp_name_list = read['Template_Name'].values.tolist() #make a list of template names
    cl_list_long = [] # make a list of the numbers in each template name, like [0,6,8,0,0,2,6,6,6,8,8...6]
    for i in temp_name_list: 
        num = re.findall(r'\d+', i)
        cl_list_long.append(*num)
    cl_list = np.unique(cl_list_long) #get rid of duplicates, like [0,6,8,2]
#     cl_list = [int(i) for i in cl_list] #change str to int
#     cl_list.sort() #put it in order from least to greatest, like [0,2,6,8]

    #find the channel as it appears in template names
#     chan_list = list(temp_name_list[0]) #get a template name
#     chan_list1 = []
#     for i in chan_list: #make a list of characters, exclude numbers, of template name
#         num = re.findall(r'\d+', i)
#         if num: continue
#         chan_list1.append(i)
#     chan_ = ''.join(chan_list1[-7:-4]) #make a string of the 7th to 4th from last characters (the channel)

    for cl in cl_list:
        times = [] #list of datetimes for this cluster
        for i in np.unique(temp_name_list):
            if i.endswith(cl):
                all_times = readsta[readsta['Template_Name']==i]['Detection_Time'].values.tolist()
                for at in all_times:
                    times.append(at)
#         times = read[read['Template_Name'] == v[s].lower()+chan_+'rpho'+str(cl)]['Detection_Time'].values.tolist()
        for ii,i in enumerate(times):
            et=UTCDateTime(i)+ta
            stt=UTCDateTime(i)-tb
            utct=UTCDateTime(i)
            st = obspy.read(*glob(datadir+str(i.julday).zfill(3)+'/'+v[s]+'.*.'+str(year)+'.*'))
            st.select(component="Z") #Use only the Z Component
            st.filter(type='bandpass',freqmin=fqmin,freqmax=fqmax)
            st.detrend(type='demean')
            st.resample(fs)
            st.trim(starttime=stt,endtime=et)
            st.merge(fill_value = 0)
            if len(st)==0: continue
    # classic
            try:
                cft = classic_sta_lta(st[0].data, int(nsta * fs), int(nlta * fs))
                print('-------------')
                print('detection: '+str(ii),'cluster id: '+str(cl))
                plot_trigger(st[0], cft, thr_on, thr_off)
                on_off = np.array(trigger_onset(cft, thr_on, thr_off))
                # show trigger on and off times, rounded to 4 decimal places
                trig_on = round(float(on_off[:, 0] / fs)-tb,4)
                trig_off = round(float(on_off[:, 1] / fs)-tb,4)
                print('Trigger on',trig_on,'seconds after detect time')
                print('Trigger off',trig_off,'seconds after detect time')
            except:
                print('NOT FOUND') #if no signal can be found, print 'NOT FOUND' and skip the rest of the loop
                continue
            signal_window = st[0].copy()
            noise_window = st[0].copy()

            signal_window.trim(starttime=UTCDateTime(i)+trig_on-0.5,endtime=UTCDateTime(i)+trig_off) 
            #i+trig_on-0.5 to include lead up to the signal
            noise_window.trim(starttime=UTCDateTime(i)-10,endtime=UTCDateTime(i))

            snr = 20 * np.log(np.percentile(np.abs(signal_window.data),pr) 
                              / np.percentile(np.abs(noise_window.data),pr))/np.log(10)
            if snr<7.: continue #if SNR is too low, skip saving it

            #put skipping REDpy detections here
            #read REDpy catalog to have a reference
            catalog = pd.read_csv('Hood_catalog.csv')
            rpdatetimes = catalog[catalog['Clustered'] == cl]['datetime'].values.tolist() 
            #make a list of datetimes for the current cluster
            skip=1 #set variable to arbitrary number
            for rr,r in enumerate(rpdatetimes):
                rs = UTCDateTime(r)-rpwi
                rend = UTCDateTime(r)+rpwi #changed from re to rend because of import re for cl_list
                if UTCDateTime(i)>rs and UTCDateTime(i)<rend:
                    skip=2 #if there is an overlap, reset the variable and break out of the loop
                    print('Overlap with REDpy detections')
                    break
            if skip!=2: #if skip has NOT been redefined, save this detection
                for u in np.unique(temp_name_list):
                    if u.endswith(cl):
                        t = u
                row = [t,i,trig_on,trig_off,snr]
                print(row)
    #             automatically filters out detections that it can't find a signal for
                with open(homedir+'detections/'+volc_list_names[vv]+'_'+v[s]+'_'+str(year)+
                          '_clean_detections.csv', 'a', newline='') as file:
                    writer = csv.writer(file)
                    writer.writerow(row)
                    file.close()


            break
        break