


# Surface Event Analysis
###### This notebook analyzes surface event waveforms and calculates location, directivity, and velocity
###### Francesca Skene
###### fskene@uw.edu
###### Created: 7/22/22

Import Modules

In [None]:
import obspy
import matplotlib.pyplot as plt
import numpy as np
from obspy.core import UTCDateTime
import pandas as pd
from obspy.clients.fdsn.client import Client
client2 = Client("IRIS")
from obspy.geodetics import *
import requests
import glob
import sys
sys.path.append("/data/wsd01/pnwstore/")
from obspy.signal.cross_correlation import *
from mpl_toolkits import mplot3d
import scipy


from scipy import optimize
from scipy.optimize import curve_fit
from pnwstore.mseed import WaveformClient
from obspy.core.utcdatetime import UTCDateTime
client = WaveformClient()


Parameters

In [None]:
t_before = 120 #number of seconds before pick time
t_after = 120 #number of seconds after pick time
fs = 40 #sampling rate that all waveforms are resampled to
window = 30 #window length of the signal
pr = 98 #percentile
thr = 7 #SNR threshold
station_distance_threshold = 25
pi = np.pi
v_s = 1000 #shear wave velocity at the surface
t_beginning = UTCDateTime(2001,1,1,0,0,0)
t_end = UTCDateTime(2021,12,31,23,59)
low_cut = 2
high_cut = 8
az_thr = 2 #threshold of distance from center of volcano that a station's azimuth counts towards direction flow

## Define Functions

This functions cross correlates envelopes of waveforms to calculate picktimes

In [None]:
def pick_time(ref_env, data_env_dict):
    est_picktimes = []
    est_picktimes.append(str(tr.stats.starttime + t_before)) 
    xcor = obspy.signal.cross_correlation.correlate(data_env_dict,ref_env,int(5*fs))
    index = np.argmax(xcor)
    cc = round(xcor[index],9) #correlation coefficient
    shift = 5*fs-index #how much it is shifted from the reference envelope
    #print(shift, cc, key)
    
    p = UTCDateTime(est_picktimes[0]) + shift/fs  # p is the new phase pick for each station
    return p

This function resamples the data in the streams to 40 Hz

In [None]:
def resample(st, fs):
    for i in st:
        i.detrend(type='demean')
        i.taper(0.05)
        i.resample(fs)   
    return st

Function to fit data

In [None]:
  def test_func(theta, a,theta0, c):
                    return a * np.cos(theta-theta0)+c

##  Import and organize metadata

### 1. Volcano Data (network and station, labeled with volcano name)

In [None]:
#this data includes all stations within 50km of each volcano and the lat, lon, elev of each station
df = pd.read_csv('Volcano_Metadata_50km.csv')

Input Volcano Names and Locations

In [None]:
#data obtained from www.lat-long.com
volc_lat_lon = {}
volc_lat_lon['Mt_Rainier'] = [46.8528857, -121.7603744, 4392.5]
volc_lat_lon['Mt_Adams'] = [46.202621, -121.4906384, 3743.2]
volc_lat_lon['Mt_Baker'] = [48.7773426,  -121.8132008, 3287.6]
# change the lat and lon of mt st helens to the middle of the dome instead of the highest point
#NOTE: while changing the lat and lon from the peak to the middle of the dome caused the sin curve to look
#differet, the flow direction stayed more or less the same!
volc_lat_lon['Mt_St_Helens'] =[46.200472222222224,-122.18883611111112,2549] #[46.1912, -122.1944, 2549]
volc_lat_lon['Glacier_Peak'] = [48.1112273, -121.1139922, 3213]
volc_lat_lon['Crater_Lake']=[42.907745, -122.143494, 1883]
volc_lat_lon['Mt_Hood']=[45.373221, -121.696509, 3428.7]
volc_lat_lon['Newberry']=[43.7220653, -121.2344654, 2435]

### 3. Surface Event Data from PNSN

In [None]:
#"su" is the label for surface event

df3= pd.read_csv('../surface_events/PNSN_Pick_Label.csv')

label = df3['Label'].values.tolist()

surface_label = df3[df3['Label']== 'su']['Label'].values.tolist()
net = df3[df3['Label']== 'su']['Network'].values.tolist()
sta = df3[df3['Label']== 'su']['Station'].values.tolist()
evt_id = df3[df3['Label']== 'su']['Event_ID'].values.tolist()
start_time = df3[df3['Label']== 'su']['Picktime'].values.tolist()                               

print((start_time[7234]))

## Calculating seasonal occurence of events

In [None]:
for name in volc_lat_lon:
    events = []
    starttimes = []
    stations = []
    networks = []
    for i in range(0, len(start_time)):
        try:
            associated_volcano = df[df['Station']== sta[i]]['Volcano_Name'].values[0]
        except: 
            associated_volcano = 'unknown'
    
        if associated_volcano == name and evt_id[i]!=evt_id[i-1]:
            events.append(evt_id[i])
            starttimes.append(start_time[i])
            stations.append(sta[i])
            networks.append(net[i])

    num_events = {}
    for year in range (2001, 2021):
        for month in range (1, 13):
            Nevt = []
            period = str(year)+"_"+str(month)
            t0 = UTCDateTime(year, month, 1)
            t1 = t0+3600*24*30
            for i in range(0, len(starttimes)):
                if t0<starttimes[i]<t1:
                    Nevt.append(events[i])
                    

            if len(Nevt) != 0:
                num_events[period]=len(Nevt)
            if len(Nevt) == 0:
                num_events[period] = 0


    periods = list(num_events.keys())
    num_of_events = list(num_events.values())
    fig = plt.figure(figsize = (60, 10))
    for x in range(0,len(periods)):
        if '5'<=periods[x][-1]<='9':
            plt.bar(periods[x], num_of_events[x], color = 'r', width = 0.4)
        else:
            plt.bar(periods[x],num_of_events[x], color ='b', width = 0.4)
    plt.xlabel("year_month")
    plt.xticks(np.arange(0, len(periods)+1, 12)) #make every year
    plt.ylabel("No. of events")
    plt.title("Number of surface events per month at" + str(name))
    plt.rcParams.update({'font.size': 30})
    plt.show()
    break

In [None]:
#Getting the average slope of each volcano (very tentative)
h1 = []
for y in volc_lat_lon:
    h1.append(volc_lat_lon[y][2]) #top of each volcano
    
h2 = [1538.935, 1822.704, 1343.863]

radius = [11265.41, 4828.032, 6437.376]

slope =[]
for i in range(len(h2)):
    height = h1[i]-h2[i]
    slope.append(height/radius[i])
print(slope)   

### Time Series for the XD temporary network


## Calculating directivity and velocity of events

In [None]:
plt.rcParams.update({'font.size': 10})
for n in range(249,260):
    event_ID = str(evt_id[n])
    t = UTCDateTime(start_time[n])
    if net != 'CN' and evt_id[n]!=evt_id[n-1]:
        if t_beginning<=t<=t_end:
            reference = str(net[n]+'.'+sta[n])
            try:
                associated_volcano = df[df['Station']== sta[n]]['Volcano_Name'].values[0]
            except: 
                associated_volcano = 'unknown'

            if associated_volcano == 'unknown':
                pass
            else:
            #get info for stations within 50km of volcano that event ocurred at
                stations = df[df['Volcano_Name'] == associated_volcano]['Station'].values.tolist()
                networks = df[df['Volcano_Name'] == associated_volcano]['Network'].values.tolist()
                latitudes = df[df['Volcano_Name'] == associated_volcano]['Latitude'].values.tolist()
                longitudes = df[df['Volcano_Name'] == associated_volcano]['Longitude'].values.tolist()
                elevations = df[df['Volcano_Name']== associated_volcano]['Elevation'].values.tolist()
                volc_lat = volc_lat_lon[associated_volcano][0]
                volc_lon = volc_lat_lon[associated_volcano][1]
                
                
            # get all waveforms for one event
                bulk = [] 
                for m in range(0, len(networks)):
                    bulk.append([networks[m], stations[m], '*', '*', t-t_before, t+t_after])
                st = client.get_waveforms_bulk(bulk)
                for tr in st:
                    if tr.stats.channel[0:2] != 'BH' and tr.stats.channel[0:2] != 'EH' and tr.stats.channel[0:2] != 'HH':
                            st.remove(tr)
                            continue
                    if len(tr.data)/tr.stats.sampling_rate < 239.9:
                        st.remove(tr)
                
            #resampling the data to 40Hz for each trace
                st = resample(st,fs) 
                
                
            #Plotting all traces for one event with channel z, SNR>7, and bandpasses between 2-12Hz
                snr, SNR, stas, data_env_dict = [], [], [],{}
                
                fig = plt.figure(figsize = (20,50), dpi=80)
                plt.subplots_adjust(hspace = .4)
                fig.suptitle('evtID:UW'+ event_ID+associated_volcano)

                ax1 = plt.subplot(4,1,1)
                iplot = 0
                for g,x in enumerate(st):
                    t = x.times()
                    x.detrend(type = 'demean')
                    x.filter('bandpass',freqmin=2.0,freqmax=12.0,corners=2,zerophase=True)
                    network = x.stats.network
                    station = x.stats.station
                    cha = x.stats.channel
                    starttime = x.stats.starttime
                    smooth_length = 2*fs

                    signal_window = x.copy()
                    noise_window = x.copy()
                    #TODO: fix signal window to be around the max amplitude
                    signal_window.trim(starttime+t_before-1, starttime+t_before-1+window)
                    noise_window.trim(starttime-window+t_before-10, starttime+t_before-10)

                    snr.append(20 * np.log(np.percentile(np.abs(signal_window.data),pr) 
                                   / np.percentile(np.abs(noise_window.data),pr))/np.log(10))

                    if cha[-1] == 'Z' and snr[g]>thr:
                        #enveloping the data to calculate picktimes later on
                        data_envelope = obspy.signal.filter.envelope(x.data[110*fs:140*fs])
                        data_envelope /= np.max(data_envelope)
                        data_envelope += iplot*1.5
                        data_envelope = obspy.signal.util.smooth(data_envelope, smooth_length)
                        data_env_dict[network+'.'+station]= data_envelope

                        ax1.plot(t[100*fs:175*fs],x.data[100*fs:175*fs]/np.max(np.abs(x.data))+iplot*1.5)
                        ax1.plot(t[110*fs:140*fs], data_envelope, color = 'k')
                        ax1.set_xlabel('time (seconds)')
                        ax1.set_xlim([100,175])
                        plt.text(t[175*fs], iplot*1.5, str(snr[g]))
                        plt.text(t[100*fs], iplot*1.5, x.stats.station)
                        iplot = iplot+1

                        stas.append(x.stats.station)
                        SNR.append(snr[g])
                    else:
                        st.remove(x) #only want to work with z component channels with high SNR
                        
        
                if len(st)>=4: #want events with enough waveforms to work with
                    dist, pick_times, lats, lons, elevs, r, theta, Sta = ([] for i in range(8))
                    for s in range(0, len(stas)):
                        dist.append(df[df['Station'] == stas[s]]['Distance_from_volc'].values[0]) 

                    for key in data_env_dict:
                        p = pick_time(data_env_dict[reference], data_env_dict[key]) #calculate picktimes
                        pick_times.append(p)
                   
                    for i, ii in enumerate(stas):
                        a = stations.index(ii)
                        lats.append(latitudes[a])
                        lons.append(longitudes[a])
                        elevs.append(elevations[a])
                    #calculating azimuth for each station with respect to the middle of the volcano
                        lat2 = lats[i]
                        lon2 = lons[i]
                        lat1 = volc_lat
                        lon1 = volc_lon
                        u,b,c = (gps2dist_azimuth(lat1, lon1, lat2, lon2, a=6378137.0, f=0.0033528106647474805))
                        r.append(u)
                        theta.append(b)
                        Sta.append(stas[i])
                            
                    #Get peak frequency of each event
                    ax2 = plt.subplot(4,1,2)
                    ax2.set_title('Power Spectral Density')
                    spectra_method = "welch"
                    char_freq_method = "mean"
                    # read and preprocess data
                    st.filter("bandpass",freqmin=low_cut,freqmax=high_cut)
                    st.taper(max_percentage=0.01,max_length=20)
                    st.trim(starttime=min(pick_times),endtime=min(pick_times)+20) 
                    
                    # make plot of spectra
                    colors = list(plt.cm.tab10(np.arange(10))) + ["crimson", "indigo", "powderblue", "lime"]
                    char_freq, spectra_list, weight= [],[],[]
                    for i in range(len(Sta)):
                        try:
                            data = st.select(station=Sta[i],component="Z")[0].data*100
                            a = 'stream is not empty'
                        except:
                            pass
                        if a == 'stream is not empty':
                            f,psd=scipy.signal.welch(data,fs=st[0].stats.sampling_rate,nperseg=81,noverlap=1)
                            #just get the indices of frequencies within the filter band
                            above_low_cut = [f>low_cut]
                            below_high_cut = [f<high_cut]
                            in_band = np.logical_and(above_low_cut,below_high_cut)[0]
                            f = f[in_band]
                            psd = psd[in_band]
                            #weighting the data by the spikiness of the PSD vs frequency graphs
                            ratio = (np.mean(psd)/np.max(psd))
                            weight.append(int(1/(ratio**2)*20))
                            
                            ax2.plot(f,psd,label=Sta[i],linewidth=2)
                            ax2.set_xscale('log')
                            ax2.set_yscale('log')
                            ax2.set_xlabel('Frequency [Hz]')
                            ax2.set_ylabel('PSD [$(mm/s)^2$/Hz]')
                            spectra_list.append(psd)
                            ax2.legend()
                            ax2.grid(True)

                        # calculate characteristic frequency and report
#                             if char_freq_method == "max":
                            char_freq_max = f[np.argmax(psd)]
#                             elif char_freq_method == "mean":
                            char_freq_mean= np.sum(psd*f)/np.sum(psd)
#                             elif char_freq_method == "median":
#                                 psd_cumsum = np.cumsum(psd)
#                                 psd_sum = np.sum(psd)
#                                 char_freq_median = f[np.argmin(np.abs(psd_cumsum-psd_sum/2))]
                            char_freq.append(char_freq_max)
                            ymax=max(psd)
                            plt.vlines(char_freq_max, ymin=0, ymax = ymax, color = colors[i])

                    if a == 'stream is not empty':
                        #manipulating the data
                        data = {'azimuth':theta, 'freq':char_freq, 'station':Sta, 'distance':dist, 'weight':weight, 'SNR':SNR}
                        DF = pd.DataFrame(data, index = None)
                        DF2 = DF.sort_values('azimuth')
                        drops =[]
                        for i in range (0,len(DF2)):
                            value = DF2.loc[i,'distance']
                            if value < az_thr:
                                drops.append(i)
                        DF3 = DF2.drop(drops)
                        y_data =  DF3["freq"].values.tolist()
                        Sta2 = DF3["station"].values.tolist()
                        dist2 = DF3["distance"].values.tolist()
                        weight2 = DF3["weight"].values.tolist()
                        SNR2 = DF3['SNR'].values.tolist()
                        x_data =  np.asarray(DF3["azimuth"].values.tolist())
                        x_points = np.linspace(0,360, 100)
                        ax3 = plt.subplot(4,1,3)
                        ax3.set_title('Fitting Sin curve')
                        ax3.set_ylabel('characteristic frequency(Hz)')
                        ax3.set_xlabel('azimuth(degrees)')
                        for i in range (0,len(Sta2)):
                            ax3.scatter(x_data[i], y_data[i], s = (dist2[i]**2*10), label=Sta2[i])
                        ax3.plot(x_data,y_data, '--', label='rawdata')
                        ax3.legend(loc='best')
                        #weighting the data
                        print(x_data, 'original')
                        tempx, tempy = [],[]
                        for i,ii in enumerate(x_data):
                            tempx.append([])
                            tempx[i].append([ii for l in range(0,weight2[i])])
                            tempy.append([])
                            tempy[i].append([y_data[i] for l in range(0,weight2[i])])   
                        weighted_x = sum(sum(tempx, []),[])
                        weighted_y = sum(sum(tempy, []),[])
                        
                        #optimizing parameters to fit weighted data to test_function
                        params, params_covariance = optimize.curve_fit(test_func, np.deg2rad(weighted_x), weighted_y, p0=None)
                        d = test_func(np.deg2rad(x_points), params[0], params[1], params[2])
                        ax3.plot(x_points, d, label='Fitted function')
                        
                        
                        len_r = int(max(r))
                        line_length = np.linspace(0,len_r,len_r+1)
                        rads = np.arange(0, (2 *pi), 0.01)
                        direction=[]
                        direction = [(params[1]) for i in range(len_r+1)]
                        
                        ax4= plt.subplot(4,1,4, polar=True)
                        ax4.set_theta_offset(pi/2)
                        ax4.set_theta_direction(-1)
                        for i in range(0,len(r)):
                            ax4.plot(np.deg2rad(theta[i]),r[i], 'g.')
                            ax4.text(np.deg2rad(theta[i]),r[i],stas[i]) 
                        ax4.plot(direction,line_length, 'k-')  #plot the estimated direction of the event
                        for rad in rads:
                            ax4.plot(rad,az_thr, 'b.', markersize = 2)

                        #calculating velocity from the frequency shift
                        fmax = max(d)
                        fmin = min(d)
                        v = v_s*((fmax-fmin)/(fmax+fmin))
                        print(v,'m/s')

                       #plt.savefig('evtID:UW'+ event_ID+associated_volcano+'.png')


## weighting the data

In [None]:
plt.rcParams.update({'font.size': 10})
for n in range(7234,7235):
    event_ID = str(evt_id[n])
    t = UTCDateTime(start_time[n])
    if net != 'CN' and evt_id[n]!=evt_id[n-1]:
        if t_beginning<=t<=t_end:
            reference = str(net[n]+'.'+sta[n])
            print(reference)
            try:
                associated_volcano = df[df['Station']== sta[n]]['Volcano_Name'].values[0]
            except: 
                associated_volcano = 'unknown'

            if associated_volcano == 'unknown':
                pass
            else:
            #get info for stations within 50km of volcano that event ocurred at
                stations = df[df['Volcano_Name'] == associated_volcano]['Station'].values.tolist()
                networks = df[df['Volcano_Name'] == associated_volcano]['Network'].values.tolist()
                latitudes = df[df['Volcano_Name'] == associated_volcano]['Latitude'].values.tolist()
                longitudes = df[df['Volcano_Name'] == associated_volcano]['Longitude'].values.tolist()
                elevations = df[df['Volcano_Name']== associated_volcano]['Elevation'].values.tolist()
                volc_lat = volc_lat_lon[associated_volcano][0]
                volc_lon = volc_lat_lon[associated_volcano][1]
                
                
            # get all waveforms for one event
                bulk = [] 
                for m in range(0, len(networks)):
                    bulk.append([networks[m], stations[m], '*', '*', t-t_before, t+t_after])
                st = client.get_waveforms_bulk(bulk)
                for tr in st:
                    if tr.stats.channel[0:2] != 'BH' and tr.stats.channel[0:2] != 'EH' and tr.stats.channel[0:2] != 'HH':
                            st.remove(tr)
                            continue
                    if len(tr.data)/tr.stats.sampling_rate < 239.9:
                        st.remove(tr)
                
            #resampling the data to 40Hz for each trace
                st = resample(st,fs) 
                
            #Plotting all traces for one event with channel z, SNR>7, and bandpasses between 2-12Hz
                snr, SNR, stas, data_env_dict = [], [], [],{}
                
                fig = plt.figure(figsize = (20,50), dpi=80)
                plt.subplots_adjust(hspace = .4)
                fig.suptitle('evtID:UW'+ event_ID+associated_volcano)

                ax1 = plt.subplot(4,1,1)
                iplot = 0
                for g,x in enumerate(st):
                    t = x.times()
                    x.detrend(type = 'demean')
                    x.filter('bandpass',freqmin=2.0,freqmax=12.0,corners=2,zerophase=True)
                    network = x.stats.network
                    station = x.stats.station
                    cha = x.stats.channel
                    starttime = x.stats.starttime
                    smooth_length = 2*fs

                    signal_window = x.copy()
                    noise_window = x.copy()
                    #TODO: fix signal window to be around the max amplitude
                    signal_window.trim(starttime+t_before-1, starttime+t_before-1+window)
                    noise_window.trim(starttime-window+t_before-10, starttime+t_before-10)

                    snr.append(20 * np.log(np.percentile(np.abs(signal_window.data),pr) 
                                   / np.percentile(np.abs(noise_window.data),pr))/np.log(10))

                    if cha[-1] == 'Z' and snr[g]>thr:
                        #enveloping the data to calculate picktimes later on
                        data_envelope = obspy.signal.filter.envelope(x.data[110*fs:140*fs])
                        data_envelope /= np.max(data_envelope)
                        data_envelope += iplot*1.5
                        data_envelope = obspy.signal.util.smooth(data_envelope, smooth_length)
                        data_env_dict[network+'.'+station]= data_envelope

                        ax1.plot(t[100*fs:175*fs],x.data[100*fs:175*fs]/np.max(np.abs(x.data))+iplot*1.5)
                        ax1.plot(t[110*fs:140*fs], data_envelope, color = 'k')
                        ax1.set_xlabel('time (seconds)')
                        ax1.set_xlim([100,175])
                        plt.text(t[175*fs], iplot*1.5, str(snr[g]))
                        plt.text(t[100*fs], iplot*1.5, x.stats.station)
                        iplot = iplot+1

                        stas.append(x.stats.station)
                        SNR.append(snr[g])
                    else:
                        st.remove(x) #only want to work with z component channels with high SNR
                        
        
                if len(st)>=4: #want events with enough waveforms to work with
                    dist, pick_times, lats, lons, elevs, r, theta, Sta = ([] for i in range(8))
                    for s in range(0, len(stas)):
                        dist.append(df[df['Station'] == stas[s]]['Distance_from_volc'].values[0]) 

                    for key in data_env_dict:
                        p = pick_time(data_env_dict[reference], data_env_dict[key]) #calculate picktimes
                        pick_times.append(p)
                   
                    for i, ii in enumerate(stas):
                        a = stations.index(ii)
                        lats.append(latitudes[a])
                        lons.append(longitudes[a])
                        elevs.append(elevations[a])
                    #calculating azimuth for each station with respect to the middle of the volcano
                        lat2 = lats[i]
                        lon2 = lons[i]
                        lat1 = volc_lat
                        lon1 = volc_lon
                        u,b,c = (gps2dist_azimuth(lat1, lon1, lat2, lon2, a=6378137.0, f=0.0033528106647474805))
                        r.append(u)
                        theta.append(b)
                        Sta.append(stas[i])
                            
                    #Get peak frequency of each event
                    spectra_method = "welch"
                    char_freq_method = "mean"
                    # read and preprocess data
                    st.filter("bandpass",freqmin=low_cut,freqmax=high_cut)
                    st.taper(max_percentage=0.01,max_length=20)
                    st.trim(starttime=min(pick_times),endtime=min(pick_times)+20) 
                    
                    # make plot of spectra
                    colors = list(plt.cm.tab10(np.arange(10))) + ["crimson", "indigo", "powderblue", "lime"]
                    char_freq, spectra_list, weight= [],[],[]
                    for i in range(len(Sta)):
                        try:
                            data = st.select(station=Sta[i],component="Z")[0].data*100
                            a = 'stream is not empty'
                        except:
                            pass
                        if a == 'stream is not empty':
                            f,psd=scipy.signal.welch(data,fs=st[0].stats.sampling_rate,nperseg=81,noverlap=1)
                            #just get the indices of frequencies within the filter band
                            above_low_cut = [f>low_cut]
                            below_high_cut = [f<high_cut]
                            in_band = np.logical_and(above_low_cut,below_high_cut)[0]
                            f = f[in_band]
                            psd = psd[in_band]
                            #weighting the data by the spikiness of the PSD vs frequency graphs
                            ratio = (np.mean(psd)/np.max(psd))
                            weight.append(int(1/(ratio**2)*20))


                        # calculate characteristic frequency and report
                            char_freq_max = f[np.argmax(psd)]
                            char_freq_mean= np.sum(psd*f)/np.sum(psd)
                            char_freq.append(char_freq_max)
                            ymax=max(psd)
                            

                    if a == 'stream is not empty':
                        #manipulating the data
                        data = {'azimuth':theta, 'freq':char_freq, 'station':Sta, 'distance':dist, 'weight':weight, 'SNR':SNR}
                        DF = pd.DataFrame(data, index = None)
                        DF2 = DF.sort_values('azimuth')
                        print(DF2)
                        drops = []
                        for i in range(len(DF2)):
                            value = DF2.loc[i,'distance']
                            if value < az_thr:
                                drops.append(i)
                        DF3 = DF2.drop(drops)
                        print(DF3)
                        
                        y_data =  DF3["freq"].values.tolist()
                        #colors2 = DF2["color"].values.tolist()
                        Sta2 = DF3["station"].values.tolist()
                        dist2 = DF3["distance"].values.tolist()
                        weight2 = DF3["weight"].values.tolist()
                        SNR2 = DF3['SNR'].values.tolist()
                        x_data =  np.asarray(DF3["azimuth"].values.tolist())
                        x_points = np.linspace(0,360, 100)
                        
                    #create figure showing effects of different weights on the data
                        fig = plt.figure(figsize = (15,23), dpi=80)
                        ax1 = plt.subplot(4,2,1)
                        ax1.set_ylabel('characteristic frequency(Hz)')
                        ax1.set_xlabel('azimuth(degrees)')
                        for i in range (0,len(Sta2)):
                            ax1.scatter(x_data[i], y_data[i], label=Sta2[i])
                        ax1.plot(x_data,y_data, '--', label='rawdata')
                        ax1.legend(loc='best')
                        #optimizing parameters to fit data to test_function
                        params, params_covariance = optimize.curve_fit(test_func, np.deg2rad(x_data), y_data, p0=None)
                        d = test_func(np.deg2rad(x_points), params[0], params[1], params[2])
                        fmax = max(d)
                        fmin = min(d)
                        v = v_s*((fmax-fmin)/(fmax+fmin))
                        ax1.set_title('Original'+str(v)+'m/s')
                        ax1.plot(x_points, d, label='Fitted function')
                        
                        len_r = int(max(r))
                        line_length = np.linspace(0,len_r,len_r+1)
                        rads = np.arange(0, (2 *pi), 0.01)
                        direction=[]
                        direction = [(params[1]) for i in range(len_r+1)]
                        
                        ax5= plt.subplot(4,2,5, polar=True)
                        ax5.set_title('Original'+str(v)+'m/s')
                        ax5.set_theta_offset(pi/2)
                        ax5.set_theta_direction(-1)
                        for i in range(0,len(r)):
                            ax5.plot(np.deg2rad(theta[i]),r[i], 'g.')
                            ax5.text(np.deg2rad(theta[i]),r[i],stas[i]) 
                        ax5.plot(direction,line_length, 'k-')  #plot the estimated direction of the event
                        for rad in rads:
                            ax5.plot(rad,az_thr, 'b.', markersize = 2)

                        
                        ax2 = plt.subplot(4,2,2)
                        ax2.set_ylabel('characteristic frequency(Hz)')
                        ax2.set_xlabel('azimuth(degrees)')
                        for i in range (0,len(Sta2)):
                            ax2.scatter(x_data[i], y_data[i], s = (weight2[i]**2)/35, label=Sta2[i])
                        ax2.plot(x_data,y_data, '--', label='rawdata')
                        ax2.legend(loc='best')
                        #weighting the data
                        tempx, tempy = [],[]
                        for i,ii in enumerate(x_data):
                            tempx.append([])
                            tempx[i].append([ii for l in range(0,weight2[i])])
                            tempy.append([])
                            tempy[i].append([y_data[i] for l in range(0,weight2[i])])   
                        weighted_x = sum(sum(tempx, []),[])
                        weighted_y = sum(sum(tempy, []),[])
                        #optimizing parameters to fit weighted data to test_function
                        params, params_covariance = optimize.curve_fit(test_func, np.deg2rad(weighted_x), weighted_y, p0=None)
                        d = test_func(np.deg2rad(x_points), params[0], params[1], params[2])
                        fmax = max(d)
                        fmin = min(d)
                        v = v_s*((fmax-fmin)/(fmax+fmin))
                        ax2.set_title('Spikiness'+str(v)+'m/s')
                        ax2.plot(x_points, d, label='Fitted function')
                        
                        direction=[]
                        direction = [(params[1]) for i in range(len_r+1)]
                        
                        ax6= plt.subplot(4,2,6, polar=True)
                        ax6.set_title('Spikiness'+str(v)+'m/s')
                        ax6.set_theta_offset(pi/2)
                        ax6.set_theta_direction(-1)
                        for i in range(0,len(r)):
                            ax6.plot(np.deg2rad(theta[i]),r[i], 'g.')
                            ax6.text(np.deg2rad(theta[i]),r[i],stas[i]) 
                        ax6.plot(direction,line_length, 'k-')  #plot the estimated direction of the event
                        for rad in rads:
                            ax6.plot(rad,az_thr, 'b.', markersize = 2)
                        
                        ax3 = plt.subplot(4,2,3)
                        ax3.set_ylabel('characteristic frequency(Hz)')
                        ax3.set_xlabel('azimuth(degrees)')
                        ax3.plot(x_data,y_data, '--', label='rawdata')
                        ax3.legend(loc='best')
                        #weighting the data
                        tempx, tempy, weight = [],[],[]
                        for i,ii in enumerate(x_data):
                            weight.append(int(1/(dist2[i]**2)*1000))
                            tempx.append([])
                            tempx[i].append([ii for l in range(0,weight[i])])
                            tempy.append([])
                            tempy[i].append([y_data[i] for l in range(0,weight[i])])   
                        weighted_x = sum(sum(tempx, []),[])
                        weighted_y = sum(sum(tempy, []),[])
                        for i in range (0,len(Sta2)):
                            ax3.scatter(x_data[i], y_data[i], s = (weight[i]**2/10),label=Sta2[i])
                        #optimizing parameters to fit weighted data to test_function
                        params, params_covariance = optimize.curve_fit(test_func, np.deg2rad(weighted_x), weighted_y, p0=None)
                        d = test_func(np.deg2rad(x_points), params[0], params[1], params[2])
                        fmax = max(d)
                        fmin = min(d)
                        v = v_s*((fmax-fmin)/(fmax+fmin))
                        ax3.set_title('Distance from volcano'+str(v)+'m/s')
                        ax3.plot(x_points, d, label='Fitted function')
                        
                        direction=[]
                        direction = [(params[1]) for i in range(len_r+1)]
                        
                        ax7= plt.subplot(4,2,7, polar=True)
                        ax7.set_title('Distance from volcano'+str(v)+'m/s')
                        ax7.set_theta_offset(pi/2)
                        ax7.set_theta_direction(-1)
                        for i in range(0,len(r)):
                            ax7.plot(np.deg2rad(theta[i]),r[i], 'g.')
                            ax7.text(np.deg2rad(theta[i]),r[i],stas[i]) 
                        ax7.plot(direction,line_length, 'k-')  #plot the estimated direction of the event
                        for rad in rads:
                            ax7.plot(rad,az_thr, 'b.', markersize = 2)

                        ax4 = plt.subplot(4,2,4)
                        ax4.set_ylabel('characteristic frequency(Hz)')
                        ax4.set_xlabel('azimuth(degrees)')
                        ax4.plot(x_data,y_data, '--', label='rawdata')
                        ax4.legend(loc='best')
                        #weighting the data
                        tempx, tempy, weight = [],[],[]
                        for i,ii in enumerate(x_data):
                            weight.append(int(SNR[i]))
                            tempx.append([])
                            tempx[i].append([ii for l in range(0,weight[i])])
                            tempy.append([])
                            tempy[i].append([y_data[i] for l in range(0,weight[i])])   
                        weighted_x = sum(sum(tempx, []),[])
                        weighted_y = sum(sum(tempy, []),[])
                        for i in range (0,len(Sta2)):
                            ax4.scatter(x_data[i], y_data[i], s = (weight[i]**2),label=Sta2[i])
                        #optimizing parameters to fit weighted data to test_function
                        params, params_covariance = optimize.curve_fit(test_func, np.deg2rad(weighted_x), weighted_y, p0=None)
                        d = test_func(np.deg2rad(x_points), params[0], params[1], params[2])
                        fmax = max(d)
                        fmin = min(d)
                        v = v_s*((fmax-fmin)/(fmax+fmin))
                        ax4.set_title('SNR'+str(v)+'m/s')
                        ax4.plot(x_points, d, label='Fitted function')
                        
                        direction=[]
                        direction = [(params[1]) for i in range(len_r+1)]
                        
                        ax8= plt.subplot(4,2,8, polar=True)
                        ax8.set_title('SNR'+str(v)+'m/s')
                        ax8.set_theta_offset(pi/2)
                        ax8.set_theta_direction(-1)
                        for i in range(0,len(r)):
                            ax8.plot(np.deg2rad(theta[i]),r[i], 'g.')
                            ax8.text(np.deg2rad(theta[i]),r[i],stas[i]) 
                        ax8.plot(direction,line_length, 'k-')  #plot the estimated direction of the event
                        for rad in rads:
                            ax8.plot(rad,az_thr, 'b.', markersize = 2)

                        
                        plt.savefig('evtID:UW'+ event_ID+associated_volcano+'weights.png')

## Grid Searching

In [None]:
#hyperparameters
#lats used for travel_time_table?
#lons
#d = pick_times
#t = origin time

#want:
    #X = (lat,lon,0) (location of the event)
    
# d = t+T(X)+e

## 2014 events during temporary XD stations

# Mount St Helens Event 5/14/14

In [None]:
#Read in Stream Data
bulk = []
associated_volcano = 'Mt_St_Helens'
t = UTCDateTime(2021,2,3,5,43)
event_ID = '??'
reference = 'CC.SEP'

networks = df[df['Volcano_Name'] == associated_volcano]['Network'].values.tolist()
stations = df[df['Volcano_Name'] == associated_volcano]['Station'].values.tolist()
latitudes = df[df['Volcano_Name'] == associated_volcano]['Latitude'].values.tolist()
longitudes = df[df['Volcano_Name'] == associated_volcano]['Longitude'].values.tolist()
elevations = df[df['Volcano_Name']== associated_volcano]['Elevation'].values.tolist()
volc_lat = 46.200472222222224 #volc_lat_lon[associated_volcano][0]
volc_lon =-122.18883611111112 #volc_lat_lon[associated_volcano][1]

for m in range(0, len(networks)):
    bulk.append([networks[m], stations[m], '*', '*', t-t_before, t+t_after])
st = client2.get_waveforms_bulk(bulk)
print(st.__str__(extended=True))
for tr in st:
    if tr.stats.channel[0:2] != 'BH' and tr.stats.channel[0:2] != 'EH' and tr.stats.channel[0:2] != 'HH':
            st.remove(tr)
            continue
    if len(tr.data)/tr.stats.sampling_rate < 239.9:
        st.remove(tr)
st = resample(st,fs) 

for i in st:
    if i.stats.station == 'ASR':
        st.remove(i)
        
#Plotting all traces for one event with channel z, SNR>7, and bandpasses between 2-12Hz
snr, SNR, stas, data_env_dict = [], [], [],{}

fig = plt.figure(figsize = (20,50), dpi=80)
plt.subplots_adjust(hspace = .4)
fig.suptitle('evtID:UW'+ event_ID+associated_volcano)

ax1 = plt.subplot(4,1,1)
iplot = 0
for g,x in enumerate(st):
    t = x.times()
    x.detrend(type = 'demean')
    x.filter('bandpass',freqmin=2.0,freqmax=12.0,corners=2,zerophase=True)
    network = x.stats.network
    station = x.stats.station
    cha = x.stats.channel
    starttime = x.stats.starttime
    smooth_length = 2*fs

    signal_window = x.copy()
    noise_window = x.copy()
    #TODO: fix signal window to be around the max amplitude
    signal_window.trim(starttime+t_before-1, starttime+t_before-1+window)
    noise_window.trim(starttime-window+t_before-10, starttime+t_before-10)

    snr.append(20 * np.log(np.percentile(np.abs(signal_window.data),pr) 
                   / np.percentile(np.abs(noise_window.data),pr))/np.log(10))

    if cha[-1] == 'Z' and snr[g]>thr:
        #enveloping the data to calculate picktimes later on
        data_envelope = obspy.signal.filter.envelope(x.data[110*fs:140*fs])
        data_envelope /= np.max(data_envelope)
        data_envelope += iplot*1.5
        data_envelope = obspy.signal.util.smooth(data_envelope, smooth_length)
        data_env_dict[network+'.'+station]= data_envelope

        ax1.plot(t[100*fs:175*fs],x.data[100*fs:175*fs]/np.max(np.abs(x.data))+iplot*1.5)
        ax1.plot(t[110*fs:140*fs], data_envelope, color = 'k')
        ax1.set_xlabel('time (seconds)')
        ax1.set_xlim([100,175])
        plt.text(t[175*fs], iplot*1.5, str(snr[g]))
        plt.text(t[100*fs], iplot*1.5, x.stats.station)
        iplot = iplot+1

        stas.append(x.stats.station)
        SNR.append(snr[g])
    else:
        st.remove(x) #only want to work with z component channels with high SNR


if len(st)>=4: #want events with enough waveforms to work with
    dist, pick_times, lats, lons, elevs, r, theta, Sta = ([] for i in range(8))
    for s in range(0, len(stas)):
        dist.append(df[df['Station'] == stas[s]]['Distance_from_volc'].values[0]) 

    for key in data_env_dict:
        p = pick_time(data_env_dict[reference], data_env_dict[key]) #calculate picktimes
        pick_times.append(p)

    for i, ii in enumerate(stas):
        a = stations.index(ii)
        lats.append(latitudes[a])
        lons.append(longitudes[a])
        elevs.append(elevations[a])
    #calculating azimuth for each station with respect to the middle of the volcano
        lat2 = lats[i]
        lon2 = lons[i]
        lat1 = volc_lat
        lon1 = volc_lon
        u,b,c = (gps2dist_azimuth(lat1, lon1, lat2, lon2, a=6378137.0, f=0.0033528106647474805))
        r.append(u)
        theta.append(b)
        Sta.append(stas[i])

    #Get peak frequency of each event
    spectra_method = "welch"
    char_freq_method = "mean"
    # read and preprocess data
    st.filter("bandpass",freqmin=low_cut,freqmax=high_cut)
    st.taper(max_percentage=0.01,max_length=20)
    st.trim(starttime=min(pick_times),endtime=min(pick_times)+20) 

    # make plot of spectra
    colors = list(plt.cm.tab10(np.arange(10))) + ["crimson", "indigo", "powderblue", "lime"]
    char_freq, spectra_list, weight= [],[],[]
    for i in range(len(Sta)):
        try:
            data = st.select(station=Sta[i],component="Z")[0].data*100
            a = 'stream is not empty'
        except:
            pass
        if a == 'stream is not empty':
            f,psd=scipy.signal.welch(data,fs=st[0].stats.sampling_rate,nperseg=81,noverlap=1)
            #just get the indices of frequencies within the filter band
            above_low_cut = [f>low_cut]
            below_high_cut = [f<high_cut]
            in_band = np.logical_and(above_low_cut,below_high_cut)[0]
            f = f[in_band]
            psd = psd[in_band]
            #weighting the data by the spikiness of the PSD vs frequency graphs
            ratio = (np.mean(psd)/np.max(psd))
            weight.append(int(1/(ratio**2)*20))


        # calculate characteristic frequency and report
            char_freq_max = f[np.argmax(psd)]
            char_freq_mean= np.sum(psd*f)/np.sum(psd)
            char_freq.append(char_freq_max)
            ymax=max(psd)


    if a == 'stream is not empty':
        #manipulating the data
        data = {'azimuth':theta, 'freq':char_freq, 'station':Sta, 'distance':dist, 'weight':weight, 'SNR':SNR}
        DF = pd.DataFrame(data, index = None)
        DF2 = DF.sort_values('azimuth')
        print(DF2)
        drops = []
        for i in range(len(DF2)):
            value = DF2.loc[i,'distance']
            if value < az_thr:
                drops.append(i)
        DF3 = DF2.drop(drops)
        print(DF3)

        y_data =  DF3["freq"].values.tolist()
        #colors2 = DF2["color"].values.tolist()
        Sta2 = DF3["station"].values.tolist()
        dist2 = DF3["distance"].values.tolist()
        weight2 = DF3["weight"].values.tolist()
        SNR2 = DF3['SNR'].values.tolist()
        x_data =  np.asarray(DF3["azimuth"].values.tolist())
        x_points = np.linspace(0,360, 100)

    #create figure showing effects of different weights on the data
        fig = plt.figure(figsize = (15,30), dpi=80)
        ax1 = plt.subplot(4,2,1)
        ax1.set_ylabel('characteristic frequency(Hz)')
        ax1.set_xlabel('azimuth(degrees)')
        for i in range (0,len(Sta2)):
            ax1.scatter(x_data[i], y_data[i], label=Sta2[i])
        ax1.plot(x_data,y_data, '--', label='rawdata')
        ax1.legend(loc='best')
        #optimizing parameters to fit data to test_function
        params, params_covariance = optimize.curve_fit(test_func, np.deg2rad(x_data), y_data, p0=None)
        d = test_func(np.deg2rad(x_points), params[0], params[1], params[2])
        fmax = max(d)
        fmin = min(d)
        v = v_s*((fmax-fmin)/(fmax+fmin))
        ax1.set_title('Original')#+str(v)+'m/s')
        ax1.plot(x_points, d, label='Fitted function')

        len_r = int(max(r))
        line_length = np.linspace(0,len_r,len_r+1)
        rads = np.arange(0, (2 *pi), 0.01)
        direction=[]
        direction = [(params[1]) for i in range(len_r+1)]

        ax5= plt.subplot(4,2,5, polar=True)
        ax5.set_title('Original')#+str(v)+'m/s')
        ax5.set_theta_offset(pi/2)
        ax5.set_theta_direction(-1)
        for i in range(0,len(r)):
            ax5.plot(np.deg2rad(theta[i]),r[i], 'g.')
            ax5.text(np.deg2rad(theta[i]),r[i],stas[i]) 
        ax5.plot(direction,line_length, 'k-')  #plot the estimated direction of the event
        for rad in rads:
            ax5.plot(rad,az_thr, 'b.', markersize = 2)


        ax2 = plt.subplot(4,2,2)
        ax2.set_ylabel('characteristic frequency(Hz)')
        ax2.set_xlabel('azimuth(degrees)')
        for i in range (0,len(Sta2)):
            ax2.scatter(x_data[i], y_data[i], s = (weight2[i]**2)/35, label=Sta2[i])
        ax2.plot(x_data,y_data, '--', label='rawdata')
        ax2.legend(loc='best')
        #weighting the data
        tempx, tempy = [],[]
        for i,ii in enumerate(x_data):
            tempx.append([])
            tempx[i].append([ii for l in range(0,weight2[i])])
            tempy.append([])
            tempy[i].append([y_data[i] for l in range(0,weight2[i])])   
        weighted_x = sum(sum(tempx, []),[])
        weighted_y = sum(sum(tempy, []),[])
        #optimizing parameters to fit weighted data to test_function
        params, params_covariance = optimize.curve_fit(test_func, np.deg2rad(weighted_x), weighted_y, p0=None)
        d = test_func(np.deg2rad(x_points), params[0], params[1], params[2])
        fmax = max(d)
        fmin = min(d)
        v = v_s*((fmax-fmin)/(fmax+fmin))
        ax2.set_title('Spikiness')#+str(v)+'m/s')
        ax2.plot(x_points, d, label='Fitted function')

        direction=[]
        direction = [(params[1]) for i in range(len_r+1)]

        ax6= plt.subplot(4,2,6, polar=True)
        ax6.set_title('Spikiness')#+str(v)+'m/s')
        ax6.set_theta_offset(pi/2)
        ax6.set_theta_direction(-1)
        for i in range(0,len(r)):
            ax6.plot(np.deg2rad(theta[i]),r[i], 'g.')
            ax6.text(np.deg2rad(theta[i]),r[i],stas[i]) 
        ax6.plot(direction,line_length, 'k-')  #plot the estimated direction of the event
        for rad in rads:
            ax6.plot(rad,az_thr, 'b.', markersize = 2)

        ax3 = plt.subplot(4,2,3)
        ax3.set_ylabel('characteristic frequency(Hz)')
        ax3.set_xlabel('azimuth(degrees)')
        ax3.plot(x_data,y_data, '--', label='rawdata')
        ax3.legend(loc='best')
        #weighting the data
        tempx, tempy, weight = [],[],[]
        for i,ii in enumerate(x_data):
            weight.append(int(1/(dist2[i]**2)*1000))
            tempx.append([])
            tempx[i].append([ii for l in range(0,weight[i])])
            tempy.append([])
            tempy[i].append([y_data[i] for l in range(0,weight[i])])   
        weighted_x = sum(sum(tempx, []),[])
        weighted_y = sum(sum(tempy, []),[])
        for i in range (0,len(Sta2)):
            ax3.scatter(x_data[i], y_data[i], s = (weight[i]**2/10),label=Sta2[i])
        #optimizing parameters to fit weighted data to test_function
        params, params_covariance = optimize.curve_fit(test_func, np.deg2rad(weighted_x), weighted_y, p0=None)
        d = test_func(np.deg2rad(x_points), params[0], params[1], params[2])
        fmax = max(d)
        fmin = min(d)
        v = v_s*((fmax-fmin)/(fmax+fmin))
        ax3.set_title('Distance from volcano')#+str(v)+'m/s')
        ax3.plot(x_points, d, label='Fitted function')

        direction=[]
        direction = [(params[1]) for i in range(len_r+1)]

        ax7= plt.subplot(4,2,7, polar=True)
        ax7.set_title('Distance from volcano')#+str(v)+'m/s')
        ax7.set_theta_offset(pi/2)
        ax7.set_theta_direction(-1)
        for i in range(0,len(r)):
            ax7.plot(np.deg2rad(theta[i]),r[i], 'g.')
            ax7.text(np.deg2rad(theta[i]),r[i],stas[i]) 
        ax7.plot(direction,line_length, 'k-')  #plot the estimated direction of the event
        for rad in rads:
            ax7.plot(rad,az_thr, 'b.', markersize = 2)

        ax4 = plt.subplot(4,2,4)
        ax4.set_ylabel('characteristic frequency(Hz)')
        ax4.set_xlabel('azimuth(degrees)')
        ax4.plot(x_data,y_data, '--', label='rawdata')
        ax4.legend(loc='best')
        #weighting the data
        tempx, tempy, weight = [],[],[]
        for i,ii in enumerate(x_data):
            weight.append(int(SNR[i]))
            tempx.append([])
            tempx[i].append([ii for l in range(0,weight[i])])
            tempy.append([])
            tempy[i].append([y_data[i] for l in range(0,weight[i])])   
        weighted_x = sum(sum(tempx, []),[])
        weighted_y = sum(sum(tempy, []),[])
        for i in range (0,len(Sta2)):
            ax4.scatter(x_data[i], y_data[i], s = (weight[i]**2),label=Sta2[i])
        #optimizing parameters to fit weighted data to test_function
        params, params_covariance = optimize.curve_fit(test_func, np.deg2rad(weighted_x), weighted_y, p0=None)
        d = test_func(np.deg2rad(x_points), params[0], params[1], params[2])
        fmax = max(d)
        fmin = min(d)
        v = v_s*((fmax-fmin)/(fmax+fmin))
        ax4.set_title('SNR')#+str(v)+'m/s')
        ax4.plot(x_points, d, label='Fitted function')

        direction=[]
        direction = [(params[1]) for i in range(len_r+1)]

        ax8= plt.subplot(4,2,8, polar=True)
        ax8.set_title('SNR')#+str(v)+'m/s')
        ax8.set_theta_offset(pi/2)
        ax8.set_theta_direction(-1)
        for i in range(0,len(r)):
            ax8.plot(np.deg2rad(theta[i]),r[i], 'g.')
            ax8.text(np.deg2rad(theta[i]),r[i],stas[i]) 
        ax8.plot(direction,line_length, 'k-')  #plot the estimated direction of the event
        for rad in rads:
            ax8.plot(rad,az_thr, 'b.', markersize = 2)


        #plt.savefig('evtID:UW'+ event_ID+associated_volcano+'weights.png')