


# Surface Event Analysis
###### This notebook analyzes surface event waveforms and calculates location, directivity, and velocity
###### Francesca Skene
###### fskene@uw.edu
###### Created: 7/22/22

Import Modules

In [None]:
import matplotlib.pyplot as plt
from matplotlib.pyplot import Figure
import numpy as np
import pandas as pd
import obspy
from obspy.core import UTCDateTime
from obspy.clients.fdsn.client import Client
from obspy.geodetics import *
from obspy.signal.cross_correlation import *
from obspy.signal.trigger import classic_sta_lta
from obspy.core.utcdatetime import UTCDateTime
import requests
import glob
import sys
from pnwstore.mseed import WaveformClient
from mpl_toolkits import mplot3d
import scipy
from scipy import optimize
from scipy.optimize import curve_fit
from geopy import distance
import datetime
import rasterio as rio
from rasterio.plot import show
from rasterio.merge import merge
from Functions import *
import richdem as rd
from pathlib import Path
from pyproj import Proj,transform,Geod

Parameters

In [None]:
sys.path.append('/data/wsd01/pnwstore/')
client = WaveformClient()
client2 = Client('IRIS')

t_before = 120 #number of seconds before pick time
t_after = 120 #number of seconds after pick time
fs = 40 #sampling rate that all waveforms are resampled to
window = 30 #window length of the signal
pr = 98 #percentile
thr = 12 #SNR threshold
station_distance_threshold = 25
pi = np.pi
v_s = 1000 #shear wave velocity at the surface
t_beginning = UTCDateTime(2001,1,1,0,0,0)
t_end = UTCDateTime(2021,12,31,23,59)
smooth_length = 5
low_cut = 2
high_cut = 8
az_thr = 1000 #threshold of distance in meters from source location
step = 1000 #step every km
t_step = 1 #step every second
ratio = 5.6915196
colors = list(plt.cm.tab10(np.arange(10)))*3
radius = 6371e3

## Define Functions

In [None]:
#define a function that calculates picktimes at each station
def pick_time(time, ref_env, data_env_dict, st, t_diff, t_before, fs):
    pick_times,offsets, starttimes = [],[],[]
    for i,key in enumerate(data_env_dict):
        starttimes.append(st[i].stats.starttime)
        xcor = correlate(data_env_dict[key],ref_env,int(50*fs))
        index = np.argmax(xcor)
        cc = round(xcor[index],9) #correlation coefficient
        shift = 50*fs-index #how much it is shifted from the reference envelope
        offset_time = time - shift/fs
        p = time - shift/fs  # p is the new phase pick for each station
        pick_times.append(p+t_diff[key])
        offsets.append(offset_time + t_diff[key])
    return pick_times, offsets, starttimes
    
def shift(pick_times, offsets, starttimes, t_diff):
    shifts, vals =[],[]
    for i,ii in enumerate(t_diff):
        t_shift = offsets[i]-min(offsets)
        vals.append((-1*t_diff[ii])+t_shift)
        shifts.append(t_shift)
        #plt.vlines(val, ymin = iplot*1.5-.5, ymax = iplot*1.5+.5, color = colors[i])
    return shifts, vals

# define functon that resamples the data
def resample(st, fs):
    for i in st:
        i.detrend(type='demean')
        i.taper(0.05)
        i.resample(fs)   
    return st

# define function to calculate number of surface events per month
def events_per_month(starttimes, events):
    num_events = {}
    for year in range (2001, 2021):
        for month in range (1, 13):
            Nevt = []
            period = str(year)+"_"+str(month)
            t0 = UTCDateTime(year, month, 1)
            t1 = t0+3600*24*30
            for i in range(0, len(starttimes)):
                if t0<starttimes[i]<t1:
                    Nevt.append(events[i])
            if len(Nevt) != 0:
                num_events[period]=len(Nevt)
            if len(Nevt) == 0:
                num_events[period] = 0

    periods = list(num_events.keys())
    num_of_events = list(num_events.values())
    return periods, num_of_events

# define function to fit data to
def test_func(theta, a,theta0, c):
    return a * np.cos(theta-theta0)+c

# define a function to make plots of weighted data
def make_weight_plts(title,x_data,y_data,weight,test_func,x_points,v_s,theta,r,stas,az_thr,e,f,g):
    ax = plt.subplot(2,1,e)
    for i in range (0,len(Sta2)):
        ax.scatter(x_data[i], y_data[i], s = (weight[i]**2*g),label=Sta2[i])
    ax.set_ylabel('characteristic frequency(Hz)')
    ax.set_xlabel('azimuth(degrees)')
    ax.plot(x_data,y_data, '--', label='rawdata')
    ax.legend(loc='best')
    
    #weighting the data
    tempx, tempy = [],[]
    for i,ii in enumerate(x_data):
        tempx.append([])
        tempx[i].append([ii for l in range(0,weight[i])])
        tempy.append([])
        tempy[i].append([y_data[i] for l in range(0,weight[i])])   
    weighted_x = sum(sum(tempx, []),[])
    weighted_y = sum(sum(tempy, []),[])
   
    #optimizing parameters to fit weighted data to test_function
    params, params_covariance = optimize.curve_fit(test_func, np.deg2rad(weighted_x), weighted_y, p0=None)
    d = test_func(np.deg2rad(x_points), params[0], params[1], params[2])
    fmax = max(d)
    fmin = min(d)
    v = v_s*((fmax-fmin)/(fmax+fmin))
    ax.set_title(title+str(v)+'m/s')
    ax.plot(x_points, d, label='Fitted function')

    len_r = int(max(r))
    line_length = np.linspace(0,len_r,len_r+1)
    rads = np.arange(0, (2 *pi), 0.01)
    direction=[]
    direction = [(params[1]) for i in range(len_r+1)]

#     ax1= plt.subplot(2,2,f, polar=True)
#     ax1.set_title(title+str(v)+'m/s')
#     ax1.set_theta_offset(pi/2)
#     ax1.set_theta_direction(-1)
#     for i in range(0,len(r)):
#         ax1.plot(np.deg2rad(theta[i]),r[i], 'g.')
#         ax1.text(np.deg2rad(theta[i]),r[i],stas[i]) 
#     ax1.plot(direction,line_length, 'k-')  #plot the estimated direction of the event
#     for rad in rads:
#         ax1.plot(rad, az_thr, 'b.', markersize = 2)
    plt.show()
    return v, direction

# define function to predict synthetic arrival times
def travel_time(t0, x, y, vs, sta_x, sta_y):
    dist = np.sqrt((sta_x - x)**2 + (sta_y - y)**2)
    tt = t0 + dist/vs
    return tt

# define function to compute residual sum of squares
def error(synth_arrivals,arrivals, weight):
    res = (arrivals - synth_arrivals)*weight 
    res_sqr = res**2
    mse = np.mean(res_sqr)
    rmse = np.sqrt(mse)
    return rmse

# define function to iterate through grid and calculate travel time residuals
def gridsearch(t0,x_vect,y_vect,sta_x,sta_y,vs,arrivals, weight):
    rss_mat = np.zeros((len(t0),len(x_vect),len(y_vect)))
    rss_mat[:,:,:] = np.nan
    for i in range(len(t0)):
        for j in range(len(x_vect)):
            for k in range(len(y_vect)):
                synth_arrivals = []
                for h in range(len(sta_x)):
                    tt = travel_time(t0[i],x_vect[j],y_vect[k],vs,sta_x[h],sta_y[h])
                    synth_arrivals.append(tt)
                rss = error(np.array(synth_arrivals),np.array(arrivals), np.array(weight))
                rss_mat[i,j,k] = rss
    return rss_mat

# define function to find lower-left corner of grid and grid size based on height of volcano
def start_latlon(elevation, ratio, center_lat, center_lon):
    side_length = elevation * ratio
    l = side_length/2
    hypotenuse = l*np.sqrt(2)
    d = distance.geodesic(meters = hypotenuse)
    start_lat = d.destination(point=[center_lat,center_lon], bearing=225)[0]
    start_lon = d.destination(point=[center_lat,center_lon], bearing=225)[1]
    return start_lat, start_lon, side_length

# define function to convert the location index into latitude and longitude
def location(x_dist, y_dist, start_lat, start_lon):
    bearing = 90-np.rad2deg(np.arctan(y_dist/x_dist))
    dist = np.sqrt((x_dist)**2 + (y_dist)**2)
    d = distance.geodesic(meters = dist)
    loc_lat = d.destination(point=[start_lat,start_lon], bearing=bearing)[0]
    loc_lon = d.destination(point=[start_lat,start_lon], bearing=bearing)[1]
    return loc_lat, loc_lon, d

# define function to find diameter in meters of the error on the location
def error_diameter(new_array):
    min_idx = np.min(new_array[:,1])
    max_idx = np.max(new_array[:,1])
    difference = max_idx-min_idx
    diameter_m = difference*1000
    return diameter_m 

##  Import and organize metadata

### 1. Volcano Data (network and station, labeled with volcano name)

In [None]:
#this data includes all stations within 50km of each volcano and the lat, lon, elev of each station
df = pd.read_csv('Volcano_Metadata_50km.csv')
df_xd = pd.read_csv('XD_Metadata_50km.csv')

In [None]:
volc_lat_lon = {}
volc_lat_lon['Mt_Rainier'] = [46.8528857, -121.7603744, 4392.5]
volc_lat_lon['Mt_Adams'] = [46.202621, -121.4906384, 3743.2]
volc_lat_lon['Mt_Baker'] = [48.7773426,  -121.8132008, 3287.6]
# change the lat and lon of mt st helens to the middle of the dome instead of the highest point
volc_lat_lon['Mt_St_Helens'] =[46.200472222222224,-122.18883611111112,2549] #[46.1912, -122.1944, 2549]
volc_lat_lon['Glacier_Peak'] = [48.1112273, -121.1139922, 3213]
volc_lat_lon['Crater_Lake']=[42.907745, -122.143494, 1883]
volc_lat_lon['Mt_Hood']=[45.373221, -121.696509, 3428.7]
volc_lat_lon['Newberry']=[43.7220653, -121.2344654, 2435]

In [None]:
#Find the lower left corner and grid size based on volcano elevation
volc_grid = {}
for volc in volc_lat_lon:
    elevation = volc_lat_lon[volc][2]
    center_lat = volc_lat_lon[volc][0]
    center_lon = volc_lat_lon[volc][1]
    start_lat, start_lon, side_length = start_latlon(elevation, ratio, center_lat, center_lon)
    volc_grid[volc] = [start_lat, start_lon, side_length]

In [None]:
#DEM data 
dem_data_dict = {}
for name in volc_lat_lon:
    if volc_lat_lon[name][0]>46:
        dem = rio.open('DEM_data/'+str(name)+'/'+str(name)+'.tif') #washington volcanoes
        dem_array = dem.read(1).astype('float64')
        dem_array[dem_array == -32767] = np.nan
        crs = dem.crs
    else:
        dem = rio.open('DEM_data/'+str(name)+'/_w001001.adf') #oregon volcanoes
        dem_array = dem.read(1).astype('float64')
        dem_array[dem_array == -3.40282347e+38] = np.nan
        crs = dem.crs
#     volc = rd.rdarray(dem_array, no_data=-9999)
#     slope = rd.TerrainAttribute(volc,attrib = 'slope_riserun')
#     aspect = rd.TerrainAttribute(volc, attrib = 'aspect')
#     dem_data_dict[name] = {'data':dem_array, 'elevation':volc, 'slope':slope, 'aspect':aspect}
    dem_data_dict[name]={'data':dem_array, 'crs':crs, 'left':dem.bounds[0], 'right':dem.bounds[2], 'bottom':dem.bounds[1], 'top':dem.bounds[3]}

In [None]:
lat_lon_dict = {}
lat_lon_dict['Mt_Rainier']={'tick_lons':[-121.65, -121.7, -121.75, -121.8, -121.85],
                            'tick_lats':[46.75,46.8,46.85,46.9,46.95]
}
lat_lon_dict['Mt_St_Helens']={'tick_lons':[-122.10,-122.15,-122.2,-122.25],
                              'tick_lats':[46.16, 46.18, 46.20, 46.22]
}
lat_lon_dict['Mt_Adams']={'tick_lons':[],'tick_lats':[]}
lat_lon_dict['Mt_Baker']={'tick_lons':[],'tick_lats':[]}
lat_lon_dict['Mt_Hood']={'tick_lons':[],'tick_lats':[]}
lat_lon_dict['Glacier_Peak']={'tick_lons':[],'tick_lats':[]}
lat_lon_dict['Newberry']={'tick_lons':[],'tick_lats':[]}
lat_lon_dict['Crater_Lake']={'tick_lons':[],'tick_lats':[]}

### 3. Surface Event Data from PNSN

In [None]:
#"su" is the label for surface event

df3= pd.read_csv('../surface_events/PNSN_Pick_Label.csv')

label = df3['Label'].values.tolist()
surface_label = df3[df3['Label']== 'su']['Label'].values.tolist()
net_temp = df3[df3['Label']== 'su']['Network'].values.tolist()
sta_temp = df3[df3['Label']== 'su']['Station'].values.tolist()
evt_id_temp = df3[df3['Label']== 'su']['Event_ID'].values.tolist()
start_time_temp = df3[df3['Label']== 'su']['Picktime'].values.tolist()                               

net,sta,evt_id,start_time = [],[],[],[]
for i,ii in enumerate(start_time_temp):
    if t_beginning<UTCDateTime(ii)<t_end:
        net.append(net_temp[i])
        sta.append(sta_temp[i])
        evt_id.append(evt_id_temp[i])
        start_time.append(ii)


## Calculating seasonal occurence of events

In [None]:
plt.rcParams.update({'font.size': 30})
for name in volc_lat_lon:
    events = []
    stations = []
    starttimes = []
    networks = []
    for i in range(0, len(start_time)):
        try:
            associated_volcano = df[df['Station']== sta[i]]['Volcano_Name'].values[0]
        except: 
            associated_volcano = 'unknown'
       
        if associated_volcano == name and evt_id[i]!=evt_id[i-1]:
            events.append(evt_id[i])
            starttimes.append(start_time[i])
            stations.append(sta[i])
            networks.append(net[i])

    periods, num_of_events = events_per_month(starttimes, events)

    fig = plt.figure(figsize = (60, 10))
    
    for x in range(0,len(periods)):
        if '5'<=periods[x][-1]<='9' or periods[x][-2]=='10':
            plt.bar(periods[x], num_of_events[x], color = 'r', width = 0.4)
        else:
            plt.bar(periods[x],num_of_events[x], color ='b', width = 0.4)
    plt.xlabel('year_month')
    plt.xticks(np.arange(0, len(periods)+1, 12)) #make every year
    plt.ylabel('No. of events')
    plt.title('Number of surface events per month at ' + str(name))   

    plt.show()
    break

Station counts as a function of time for each volcano. Permanent and permanent+temporary XD network

In [None]:
plt.rcParams.update({'font.size': 30})
for name in volc_lat_lon:
    for i in range(0, len(start_time)):
        try:
            associated_volcano = df[df['Station']== sta[i]]['Volcano_Name'].values[0]
        except: 
            associated_volcano = 'unknown'
        if associated_volcano == 'Mt_St_Helens':
            stations = df[df['Volcano_Name'] == associated_volcano]['Station'].values.tolist()
            stations_w_temp = stations+df_xd[df_xd['Volcano_Name'] == associated_volcano]['Station'].values.tolist()
            starts_perm = df[df['Volcano_Name'] == associated_volcano]['Start'].values.tolist() 
            ends_perm = df[df['Volcano_Name'] == associated_volcano]['End'].values.tolist()
            starts_w_temp = starts_perm + df_xd[df_xd['Volcano_Name'] == associated_volcano]['Start'].values.tolist()
            ends_w_temp = ends_perm + df_xd[df_xd['Volcano_Name'] == associated_volcano]['End'].values.tolist()

            num_tempstas_50km = {}
            num_permstas_50km = {}
            for year in range (2001, 2021):
                for month in range (1, 13):
                    nsta = []
                    period = str(year)+"_"+str(month)
                    t0 = UTCDateTime(year, month, 1)
                    t1 = t0+3600*24*30
                    for i in range(0, len(starts_w_temp)):
                        try:
                            if UTCDateTime(starts_w_temp[i])<t1<UTCDateTime(ends_w_temp[i]):
                                nsta.append(stations_w_temp[i])
                        except:
                            if UTCDateTime(starts_w_temp[i])<t1:
                                nsta.append(stations_w_temp[i])
                    if len(nsta) != 0:
                        num_tempstas_50km[period]=len(nsta)
                    if len(nsta) == 0:
                        num__tempstas_50km[period] = 0
                    nsta = []
                    for i in range(0, len(starts_perm)):
                        try:
                            if UTCDateTime(starts_perm[i])<t1<UTCDateTime(ends_perm[i]):
                                nsta.append(stations[i])
                        except:
                            if UTCDateTime(starts_perm[i])<t1:
                                nsta.append(stations[i])
                    if len(nsta) != 0:
                        num_permstas_50km[period]=len(nsta)
                    if len(nsta) == 0:
                        num__permstas_50km[period] = 0

            periods_temps = list(num_tempstas_50km.keys())
            num_of_tempstas = list(num_tempstas_50km.values())
            periods_perms = list(num_permstas_50km.keys())
            num_of_permstas = list(num_permstas_50km.values())

            fig = plt.figure(figsize = (60, 10))
            for x in range(0,len(periods_temps)):
                plt.bar(periods_temps[x], num_of_tempstas[x], color = 'b', width = 0.4)
            plt.xlabel('year_month')
            plt.xticks(np.arange(0, len(periods_temps)+1, 12)) #make every year
            plt.ylabel('No. of stations')
            plt.ylim([0,100])
            plt.title('Number of permanent+temporary stations per month at ' + str(associated_volcano))

            fig = plt.figure(figsize = (60, 10))
            for x in range(0,len(periods_perms)):
                plt.bar(periods_perms[x], num_of_permstas[x], color = 'r', width = 0.4)
            plt.xlabel('year_month')
            plt.xticks(np.arange(0, len(periods_perms)+1, 12)) #make every year
            plt.ylabel('No. of stations')
            plt.title('Number of permanent stations per month at ' + str(associated_volcano))

    else:
        stations = df[df['Volcano_Name'] == associated_volcano]['Station'].values.tolist()
        starts_perm = df[df['Volcano_Name'] == associated_volcano]['Start'].values.tolist() 
        ends_perm = df[df['Volcano_Name'] == associated_volcano]['End'].values.tolist()

        num_permstas_50km = {}
        for year in range (2001,2021):
            for month in range (1,13):
                nsta = []
                period = str(year)+"_"+str(month)
                t0 = UTCDateTime(year, month, 1)
                t1 = t0+3600*24*30
                for i in range(0, len(starts_perm)):
                    try:
                        if UTCDateTime(starts_perm[i])<t1<UTCDateTime(ends_perm[i]):
                            nsta.append(stations[i])
                    except:
                        if UTCDateTime(starts_perm[i])<t1:
                            nsta.append(stations[i])
                if len(nsta) != 0:
                    num_permstas_50km[period]=len(nsta)
                if len(nsta) == 0:
                    num__permstas_50km[period] = 0

        periods_perms = list(num_permstas_50km.keys())
        num_of_permstas = list(num_permstas_50km.values())

        fig = plt.figure(figsize = (60, 10))
        for x in range(0,len(periods_perms)):
            plt.bar(periods_perms[x], num_of_permstas[x], color = 'r', width = 0.4)
        plt.xlabel('year_month')
        plt.xticks(np.arange(0, len(periods_perms)+1, 12)) 
        plt.ylabel('No. of stations')
        plt.title('Number of permanent stations per month at ' + str(associated_volcano))
    break

In [None]:
start_time[1600]

## Calculating directivity and velocity of events

In [None]:
evt_data = pd.DataFrame(columns = ['event_ID', 'location','location_uncertainty(m)','origin_time','direction(degrees)', 
                'direction_sharpness(degrees)''params_std_deviation', 'velocity(m/s)', 'number_of_stations'
                'direction_snr(degrees)'])

plt.rcParams.update({'font.size': 10})
for n in range(0,200):    
    event_ID = str(evt_id[n])
    time = UTCDateTime(start_time[n])
    if net != 'CN' and evt_id[n]!=evt_id[n-1]:
        reference = str(net[n]+'.'+sta[n])
        try:
            associated_volcano = df[df['Station']== sta[n]]['Volcano_Name'].values[0]
        except: 
            pass
        if associated_volcano == 'Mt_Hood':
            #get info for stations within 50km of volcano that event ocurred at
            stations = df[df['Volcano_Name'] == associated_volcano]['Station'].values.tolist()
            networks = df[df['Volcano_Name'] == associated_volcano]['Network'].values.tolist()
            latitudes = df[df['Volcano_Name'] == associated_volcano]['Latitude'].values.tolist()
            longitudes = df[df['Volcano_Name'] == associated_volcano]['Longitude'].values.tolist()
            elevations = df[df['Volcano_Name']== associated_volcano]['Elevation'].values.tolist()

            if stations.count("LON")>0 and stations.count("LO2")>0:
                index = stations.index("LO2")
                del stations[index]
                del networks[index]
                del latitudes[index]
                del longitudes[index]
                del elevations[index]

            #Get all waveforms for that event based on stations and times
            bulk = [] 
            for m in range(0, len(networks)):
                bulk.append([networks[m], stations[m], '*', '*', time-t_before, time+t_after])
            st = client.get_waveforms_bulk(bulk)
            #remove unwanted data
            for tr in st:
                if tr.stats.channel[0:2] != 'BH' and tr.stats.channel[0:2] != 'EH' and tr.stats.channel[0:2] != 'HH':
                        st.remove(tr)
                try:
                    if len(tr.data)/tr.stats.sampling_rate < 239.9:
                        st.remove(tr)
                except:
                    pass

            #resampling the data to 40Hz for each trace
            st = resample(st,fs) 

            #Plotting all traces for one event with channel z, SNR>10, and bandpasses between 2-12Hz
            SNR, stas, nets, discards, data_env_dict, t_diff = [], [], [],[],{},{}
            fig = plt.figure(figsize = (20,50), dpi=80)
            fig.suptitle('evtID:UW'+ event_ID+associated_volcano)
            ax1 = plt.subplot(4,1,1)
            iplot = 0
            for i,ii in enumerate(st):
                ii.detrend(type = 'demean')
                ii.filter('bandpass',freqmin=2.0,freqmax=12.0,corners=2,zerophase=True)
                cha = ii.stats.channel
                starttime = ii.stats.starttime
                signal_window = ii.copy()
                noise_window = ii.copy()
                signal_window.trim(starttime + t_before - 20, starttime + t_before - 20 + window)
                noise_window.trim(starttime + t_before - window -10, starttime + t_before - 10)
                snr = (20 * np.log(np.percentile(np.abs(signal_window.data),pr) 
                               / np.percentile(np.abs(noise_window.data),pr))/np.log(10))

                if cha[-1] == 'Z' and snr>thr:
                    t = ii.times()
                    network = ii.stats.network
                    station = ii.stats.station
                    max_amp = int(ii.data.argmax())
                    max_amp_time = t[max_amp]
                    t_diff[network+'.'+station] = starttime-time 
                    #enveloping the data to calculate picktimes
                    data_envelope = obspy.signal.filter.envelope(ii.data[115*fs:135*fs])
                    data_envelope /= np.max(data_envelope)
                    data_envelope += iplot*1.5
                    data_envelope = obspy.signal.util.smooth(data_envelope, smooth_length)
                    data_env_dict[network+'.'+station]= data_envelope
                    b,e = 115,150
                    ax1.plot(t[b*fs:e*fs],ii.data[b*fs:e*fs]/np.max(np.abs(ii.data))+iplot*1.5)
                    ax1.plot(t[115*fs:135*fs], data_envelope, color = 'k')
                    ax1.set_xlabel('time (seconds)')
                    ax1.set_xlim([b,e])
                    #ax1.vlines(max_env_amp_time, ymin = iplot*1.5-2, ymax = iplot*1.5+2)
                    #ax1.vlines(amp_third_time, ymin = iplot*1.5-1, ymax = iplot*1.5+1)
                    plt.text(t[e*fs], iplot*1.5, str(snr))
                    plt.text(t[b*fs], iplot*1.5, station)
                    iplot = iplot+1

                    stas.append(ii.stats.station)
                    nets.append(ii.stats.network)
                    SNR.append(snr)
                else:
                    discards.append(snr) 
                    st.remove(ii)              
            if len(st)<4:  
                continue

             #Get peak frequency of each event
    #             ax2 = plt.subplot(4,1,2)
    #             ax2.set_title('Power Spectral Density')
    #             ax2.set_xscale('log')
    #             ax2.set_yscale('log')
    #             ax2.set_xlabel('Frequency [Hz]')
    #             ax2.set_ylabel('PSD [$(mm/s)^2$/Hz]')
    #             ax2.grid(True)

            # read and preprocess data
            st.taper(max_percentage=0.01,max_length=20)
            st.trim(starttime=time-20,endtime=time+30) 

            # make plot of spectra
            char_freq, sharp_weight= [],[]
            for i in range(len(stas)):
                data = st.select(station=stas[i],component="Z")[0].data*100
                f,psd=scipy.signal.welch(data,fs=st[0].stats.sampling_rate,nperseg=81,noverlap=1)
                #j ust get the frequencies within the filter band
                above_low_cut = [f>low_cut]
                below_high_cut = [f<high_cut]
                in_band = np.logical_and(above_low_cut,below_high_cut)[0]
                f = f[in_band]
                psd = psd[in_band]
    #                 ax2.plot(f,psd,label=stas[i],linewidth=2)

                # calculate characteristic frequency and report, clean this up
                char_freq_max = f[np.argmax(psd)]
                char_freq_mean= np.sum(psd*f)/np.sum(psd)
                psd_cumsum = np.cumsum(psd)
                psd_sum = np.sum(psd)
                char_freq_median = f[np.argmin(np.abs(psd_cumsum-psd_sum/2))]
                char_freq.append(char_freq_mean)
    #                 plt.vlines(char_freq_max, ymin=0, ymax = max(psd), color = colors[i])

                # weighting the data by the spikiness of the PSD vs frequency graphs
                ratio = (np.mean(psd)/np.max(psd))
                sharp_weight.append(int(1/(ratio**2)*20))
    #             ax2.legend()

            lats, lons, elevs, r, theta = ([] for i in range(5)) 
            ref = str(nets[0]+'.'+stas[0])
            try:
                ref_env = data_env_dict[reference]
            except:
                ref_env = data_env_dict[ref]

            # calculating the picktimes and shift in arrival times using envelope cross_correlation
            pick_times, offsets, starttimes = pick_time(time, ref_env, data_env_dict,st,t_diff, t_before, fs) #calculate picktimes
            shifts, vals = shift(pick_times, offsets, starttimes, t_diff)

            # gathering locations of stations with clean data
            iplot = 0 
            for i,ii in enumerate(stas):
                ax1.vlines(vals[i], ymin = iplot*1.5-.5, ymax = iplot*1.5+.5, color = colors[i])
                a = stations.index(ii)
                lats.append(latitudes[a])
                lons.append(longitudes[a])
                elevs.append(elevations[a])
                iplot = iplot+1

            # create a time table and extracting necessary data
            data = {'station': stas, 'offset_arrivals':shifts, 'sta_lat': lats, 'sta_lon':lons, 'sta_elev':elevs, 'snr':SNR }
            timetable = pd.DataFrame(data, index = None) 
            arrivals =  timetable['offset_arrivals'].values.tolist()#unsure if this step is necessary because they are already lists
            sta_lats = timetable['sta_lat'].values.tolist()
            sta_lons= timetable['sta_lon'].values.tolist()

            # define grid origin in lat,lon and grid dimensions in m
            lat_start = volc_grid[associated_volcano][0]
            lon_start = volc_grid[associated_volcano][1]
            side_length = volc_grid[associated_volcano][2]

            # weighting each station based on SNR, these locations will have a larger weight in the error estimate
            SNR_weight = [int(i) for i in SNR] 
            # create the grid of locations
            sta_x = []
            sta_y = []
            for i in range(len(sta_lats)):
                x_dist = distance.distance([lat_start,lon_start],[lat_start,sta_lons[i]]).m
                y_dist = distance.distance([lat_start,lon_start],[sta_lats[i],lon_start]).m
                sta_x.append(x_dist)
                sta_y.append(y_dist)
            x_vect = np.arange(0, side_length, step)
            y_vect = np.arange(0, side_length, step)
            t0 = np.arange(0,np.max(arrivals),t_step)

            # carry out the gridsearch
            rss_mat = gridsearch(t0,x_vect,y_vect,sta_x,sta_y,1000,arrivals,SNR_weight)
            loc_idx = np.unravel_index([np.argmin(rss_mat)], rss_mat.shape)
            print(loc_idx[0])
            # plot a spatial map of error for lowest-error origin time
            fig2,ax = plt.subplots()
            ax.set_title('Vs = 1000m/s')
            ax.scatter(x_vect[loc_idx[1]],y_vect[loc_idx[2]],s=100,marker='*',c='r')
            im = ax.imshow(np.log10(rss_mat[loc_idx[0],:,:].T),origin="lower",extent=[0,side_length,0,side_length])
            fig2.colorbar(im)
            contours = ax.contour(x_vect,y_vect,np.log10(rss_mat[int(loc_idx[0]),:,:].T),cmap='plasma_r')
            ax.clabel(contours)
            #ax.arrow(int(x_vect[loc_idx[1]]),int(y_vect[loc_idx[2]]),dy/2,dx/2, color = 'w')
            plt.colorbar(contours)
            plt.show()
            # find the latitude and longitude of the location index
            loc_lat, loc_lon, d = location(x_vect[loc_idx[1]], y_vect[loc_idx[2]], lat_start, lon_start)
            err_thr = np.min(np.log10(rss_mat))+.05
            thr_array = np.argwhere(np.log10(rss_mat)<err_thr)
            diameter = error_diameter(thr_array)
            print(loc_lat, loc_lon)
            print('diameter of error on the location',diameter,'meters')

            # calculating azimuth for each station with respect to the middle of the volcano
            for i, ii in enumerate(stas):
                u,b,c = (gps2dist_azimuth(loc_lat, loc_lon, lats[i], lons[i], a=6378137.0, f=0.0033528106647474805))
                r.append(u)
                theta.append(b)

            #manipulating the data
            data = {'azimuth_deg':theta, 'freq':char_freq, 'station':stas, 'distance_m':r, 'weight':sharp_weight, 'SNR':SNR}
            DF = pd.DataFrame(data, index = None)
            DF2 = DF.sort_values('azimuth_deg')

            #Taking out stations that are too close to the location when looking at azimuth 
            drops = []
            for i in range(len(DF2)):
                value = DF2.loc[i,'distance_m']
                if value < az_thr:
                    drops.append(i)
            DF3 = DF2.drop(drops)
            y_data =  DF3["freq"].values.tolist()
            Sta2 = DF3["station"].values.tolist()
            dist2 = DF3["distance_m"].values.tolist()
            spike_weight = DF3["weight"].values.tolist()
            SNR2 = DF3['SNR'].values.tolist()
            x_data =  np.asarray(DF3["azimuth_deg"].values.tolist())
            x_points = np.linspace(0,360, 100)

            #optimizing parameters to fit data to test_function
            params, params_covariance = optimize.curve_fit(test_func, np.deg2rad(x_data), y_data, p0=None)
            perr = np.sqrt(np.diag(params_covariance))
            std_deviation = str(round(perr[0],9))+','+str(round(perr[1],9))+','+str(round(perr[2],9))
            d = test_func(np.deg2rad(x_points), params[0], params[1], params[2])
            len_r = int(max(r))
            line_length = np.linspace(0,len_r,len_r+1)
            rads = np.arange(0, (2 *pi), 0.01)
            direction=[]
            direction = [(params[1]) for i in range(len_r+1)]
            fmax = max(d)
            fmin = min(d)
            v = v_s*((fmax-fmin)/(fmax+fmin))

            dy = len_r*np.sin(direction[0])
            dx = len_r*np.cos(direction[0])     

            #create figure showing effects of different weights on the data
            fig = plt.figure(figsize = (20,7), dpi=80)
            ax1 = plt.subplot(1,2,1)
            ax1.set_ylabel('characteristic frequency(Hz)')
            ax1.set_xlabel('azimuth(degrees)')
            for i in range (0,len(Sta2)):
                ax1.scatter(x_data[i], y_data[i], label=Sta2[i])
            ax1.plot(x_data,y_data, '--', label='rawdata')
            ax1.legend(loc='best')
            ax1.set_title('Original'+str(v)+'m/s')
            ax1.plot(x_points, d, label='Fitted function')

    #         ax5= plt.subplot(1,2,2)
    #         ax5.set_title('Original'+str(v)+'m/s')
    #         y = len_r*np.cos(direction)
    #         x = -1*len_r*np.sin(direction)
    #         slope = y[0]/x[0]
    #         x_ticks = np.arange(0,x[0],1)
    #         ax5.plot(x_ticks,slope*x_ticks,'-')

            ax5= plt.subplot(1,2,2, polar=True)
            ax5.set_title('Original'+str(v)+'m/s')
            ax5.set_theta_offset(pi/2)
            ax5.set_theta_direction(-1)
            for i in range(0,len(r)):
                ax5.plot(np.deg2rad(theta[i]),r[i], 'g.')
                ax5.text(np.deg2rad(theta[i]),r[i],stas[i]) 
            ax5.plot(direction,line_length, 'k-')  #plot the estimated direction of the event
            for rad in rads:
                ax5.plot(rad,az_thr, 'b.', markersize = 2)

            fig = plt.figure(figsize = (20,20), dpi=80) 
            # weighted plots
            e,g = 1,.02857142857
            title = 'Sharpness'
            v_sharp,direction_sharp = make_weight_plts(title,x_data,y_data,sharp_weight,test_func,x_points,v_s,theta,r,stas,az_thr,e,f,g)

            dy_sharp = len_r*np.sin(direction_sharp[0])
            dx_sharp = len_r*np.cos(direction_sharp[0])    


            SNR_weight = [int(i) for i in SNR] #larger SNRs have higher weight
            e,g = 2,1
            title = 'SNR'
            v_snr,direction_snr = make_weight_plts(title,x_data,y_data,SNR_weight,test_func,x_points,v_s,theta,r,stas,az_thr,e,f,g)  

            dy_snr = len_r*np.sin(direction_snr[0])
            dx_snr = len_r*np.cos(direction_snr[0]) 

            # make a dataframe of the data
            evt_data = evt_data.append({'event_ID':[event_ID], 
                        'location': [str(loc_lat)+','+ str(loc_lon)],
                        'location_uncertainty(m)':[diameter],
                        'origin_time': [min(offsets)-int(loc_idx[0])],
                        'direction(degrees)':[np.rad2deg(direction[0])],
                        'direction_sharpness(degrees)':[np.rad2deg(direction_sharp[0])],
                        'direction_snr(degrees)':[np.rad2deg(direction_snr[0])],
                        'params_std_deviation':[std_deviation], 
                        'velocity(m/s)':[v], 
                        'number_of_stations':[len(stas)]}, ignore_index = True)

               #plt.savefig('evtID:UW'+ event_ID+associated_volcano+'.png')


Plots the error contours

In [None]:
crs = dem_data_dict[associated_volcano]['crs']
data = dem_data_dict[associated_volcano]['data']


p2 = Proj(crs,preserve_units=False)
p1 = Proj(proj='latlong',preserve_units=False)
# gives the lower left grid point in the grid search
left_x,bottom_y = transform(p1,p2,volc_grid[associated_volcano][1],volc_grid[associated_volcano][0])
# gives the left right, bottom, top of the grid
grid_bounds = [left_x, left_x+volc_grid[associated_volcano][2], bottom_y, bottom_y+volc_grid[associated_volcano][2]]
left, right = dem_data_dict[associated_volcano]['left'],dem_data_dict[associated_volcano]['right']
bottom, top = dem_data_dict[associated_volcano]['bottom'],dem_data_dict[associated_volcano]['top']

fig,ax = plt.subplots(1,1,figsize=(7,7))
ax.imshow(data,extent=[left, right, bottom, top],cmap='jet')
#ax.imshow(np.log10(rss_mat[loc_idx[0],:,:].T),origin='lower',extent=grid_bounds,alpha = .5)
contour_x,contour_y = np.meshgrid(left_x+x_vect,bottom_y+y_vect)
contours = ax.contour(contour_x,contour_y,np.log10(rss_mat[int(loc_idx[0]),:,:].T),cmap='plasma')
ax.clabel(contours)
plt.colorbar(contours)

#plotting the stations on top of this as white triangles
for i, ii in enumerate(stas):
    sta_x,sta_y = transform(p1,p2,lons[i],lats[i])
    ax.plot(sta_x,sta_y, c='k', marker="^")
    ax.text(sta_x,sta_y,ii, c='k')

ax.set_xlim(left+10000,right)
ax.set_ylim(bottom+15000,top-5000)

# getting lat and lon tick marks on the axis
# tick_lons = lat_lon_dict[associated_volcano]['tick_lons']
# tick_lats = lat_lon_dict[associated_volcano]['tick_lats']

# ticks_x = []
# ticks_y = []
# for i in range(len(tick_lons)):
#     tick_x,tick_y = transform(p1,p2,tick_lons[i],tick_lats[i])
#     ticks_x.append(tick_x)
#     ticks_y.append(tick_y)
#     tick_lons[i]=str(tick_lons[i])
#     tick_lats[i]=str(tick_lats[i])

# ax.set_title('Location Error')
# ax.set_xlabel('longitudes')
# ax.set_ylabel('latitudes')
# ax.set_xticks(ticks_x)
# ax.set_xticklabels(tick_lons)
# ax.set_yticks(ticks_y)
# ax.set_yticklabels(tick_lats)

plt.show()  

Plot the direction of Flow

In [None]:
crs = dem_data_dict[associated_volcano]['crs']
data = dem_data_dict[associated_volcano]['data']


p2 = Proj(crs,preserve_units=False)
p1 = Proj(proj='latlong',preserve_units=False)
# gives the lower left grid point in the grid search
left_x,bottom_y = transform(p1,p2,volc_grid[associated_volcano][1],volc_grid[associated_volcano][0])
# gives the left right, bottom, top of the grid
grid_bounds = [left_x, left_x+volc_grid[associated_volcano][2], bottom_y, bottom_y+volc_grid[associated_volcano][2]]
left, right = dem_data_dict[associated_volcano]['left'],dem_data_dict[associated_volcano]['right']
bottom, top = dem_data_dict[associated_volcano]['bottom'],dem_data_dict[associated_volcano]['top']

fig,ax = plt.subplots(1,1,figsize=(7,7))
a = ax.imshow(data,extent=[left, right, bottom, top],cmap='jet')
plt.colorbar(a)
loc_x,loc_y = transform(p1,p2,loc_lon,loc_lat)
plt.arrow(loc_x,loc_y,dy/2,dx/2, color='w', width=200, label='no weight')
plt.arrow(loc_x,loc_y,dy_sharp/2,dx_sharp/2, color='k', width=200, label='sharpness')
plt.arrow(loc_x,loc_y,dy_snr/2,dx_snr/2, color='m', width=200, label='snr')
ax.legend()

#plotting the stations on top of this as white triangles
for i, ii in enumerate(stas):
    sta_x,sta_y = transform(p1,p2,lons[i],lats[i])
    ax.plot(sta_x,sta_y, c='k', marker="^")
    ax.text(sta_x,sta_y,ii, c='k')

ax.set_xlim(left,right)
ax.set_ylim(bottom,top)

# getting lat and lon tick marks on the axis
# tick_lons = lat_lon_dict[associated_volcano]['tick_lons']
# tick_lats = lat_lon_dict[associated_volcano]['tick_lats']

# ticks_x = []
# ticks_y = []
# for i in range(len(tick_lons)):
#     tick_x,tick_y = transform(p1,p2,tick_lons[i],tick_lats[i])
#     ticks_x.append(tick_x)
#     ticks_y.append(tick_y)
#     tick_lons[i]=str(tick_lons[i])
#     tick_lats[i]=str(tick_lats[i])

# ax.set_title('Direction of flow based on different weights')
# ax.set_xlabel('longitudes')
# ax.set_ylabel('latitudes')
# ax.set_xticks(ticks_x)
# ax.set_xticklabels(tick_lons)
# ax.set_yticks(ticks_y)
# ax.set_yticklabels(tick_lats)

plt.show()  

In [None]:
data[0]

## Events in time window of XD temporary station

In [None]:
associated_volcano = 'Mt_St_Helens'
#events during temp station time period:
# 2014,7,27,19,14,52      eventID = 1780258   reference = CC.SEP
# 2014,7,27,18,39,47      eventID = 1780253   reference = CC.SUG
# 2014,7,27,13,20,43      eventID = 1781583   reference = CC.SEP
# 2014,5,25,16,49,52      eventID = 1778978   reference = CC.SEP
# 2014,7,25,6,49,53        eventID = 1779148   reference = CC.SEP
# 2014,7,24,20,8,10        eventID = 1777563   reference = CC.SEP
# 2014,7,22,16,51,36      eventID = 1792948   reference = CC.SEP