## Match the events found by Pyocto with other catalogs

Get the events from the USGS, Stone and Morton's papers and compare with the events from our association


### Comparing with the USGS catalog

In [594]:
import pandas as pd
import numpy as np
from obspy.clients.fdsn import Client
from pnwstore.mseed import WaveformClient
from datetime import datetime

### Working on my catalog

In [595]:
# Load all the pick assignments 
mycatalog = pd.read_csv('all_pick_assignments.csv')
# Convert the time series in all_pick_assignments to datetime
mycatalog['datetime'] = pd.to_datetime(mycatalog['time'], utc = True)
# Print the first five rows of the catalog
mycatalog.head()

Unnamed: 0.1,Unnamed: 0,idx,time,x,y,z,picks,latitude,longitude,depth,event_idx,pick_idx,residual,station,phase,time_pick,datetime
0,0,0,2012-10-14 01:24:03.676530+00:00,-99.823478,8.014299,24.609375,4,41.815873,-125.951392,24.609375,0,283,-0.006772,G19B,P,1350178000.0,2012-10-14 01:24:03.676530+00:00
1,1,0,2012-10-14 01:24:03.676530+00:00,-99.823478,8.014299,24.609375,4,41.815873,-125.951392,24.609375,0,843,0.125995,G27B,P,1350178000.0,2012-10-14 01:24:03.676530+00:00
2,2,0,2012-10-14 01:24:03.676530+00:00,-99.823478,8.014299,24.609375,4,41.815873,-125.951392,24.609375,0,1053,-0.261009,M12B,P,1350178000.0,2012-10-14 01:24:03.676530+00:00
3,3,0,2012-10-14 01:24:03.676530+00:00,-99.823478,8.014299,24.609375,4,41.815873,-125.951392,24.609375,0,1938,0.141786,M12B,S,1350178000.0,2012-10-14 01:24:03.676530+00:00
4,4,1,2012-10-14 02:39:44.924554+00:00,26.269336,-128.412984,40.234375,5,40.593306,-124.439672,40.234375,1,12,0.377781,FS09B,P,1350182000.0,2012-10-14 02:39:44.924554+00:00


### Working on the USGS catalog

In [597]:
# Load the USGS catalog
client = Client("USGS")
client_waveforms = WaveformClient()
events_USGS = client.get_events(starttime = UTCDateTime("2012-10-14T00:00:00.000000"),
                  endtime   = UTCDateTime("2012-10-14T23:59:59.999999"),
                  minlatitude = 40, maxlatitude = 44, 
                  minlongitude = -125, maxlongitude = -120)
events_USGS

2 Event(s) in Catalog:
2012-10-14T03:08:01.290000Z | +40.721, -124.210 | 1.74 md | manual
2012-10-14T02:39:45.510000Z | +40.301, -124.416 | 1.49 md | manual

In [598]:
USGS_list = []
USGS_common_list = []

for i in range(len(events_USGS)):
    origin_datetime = pd.to_datetime(events_USGS[i].origins[0].time.datetime, utc = True)
    t1 = origin_datetime - pd.Timedelta(10, "seconds")
    t2 = origin_datetime + pd.Timedelta(10, "seconds")

    assignments_USGS_time = mycatalog.loc[(mycatalog['datetime'] > t1) & (mycatalog['datetime'] < t2) ]
    assignments_USGS_time = assignments_USGS_time.drop_duplicates('event_idx', keep = 'first')
    
    USGS_list.append(origin_datetime)
    USGS_common_list.append(assignments_USGS_time)

USGS_common_list = pd.concat(USGS_common_list)

In [599]:
# Print the common events I found in the USGS catalog
USGS_common_list

Unnamed: 0.1,Unnamed: 0,idx,time,x,y,z,picks,latitude,longitude,depth,event_idx,pick_idx,residual,station,phase,time_pick,datetime
9,9,2,2012-10-14 03:08:04.522444+00:00,12.259024,-107.072404,49.609375,7,40.785806,-124.604763,49.609375,2,1456,0.293201,FS09B,S,1350184000.0,2012-10-14 03:08:04.522444+00:00
4,4,1,2012-10-14 02:39:44.924554+00:00,26.269336,-128.412984,40.234375,5,40.593306,-124.439672,40.234375,1,12,0.377781,FS09B,P,1350182000.0,2012-10-14 02:39:44.924554+00:00


### Comparing with Stone's catalogs

### Working on Stone's Catalog

In [569]:
# Load Stone's catalog
events_cascadia = pd.read_csv('jgrb52524-sup-0002-2017jb014966-ds01.csv')
events_jdf = pd.read_csv('jgrb52524-sup-0003-2017jb014966-ds02.csv')

### Working on the Cascadia Catalog

In [576]:
# Print the first five rows of the catalog
events_cascadia.head() 

Unnamed: 0,yyyy/mm/dd,hh:MM:ss.ff,Latitude,Longitude,Depth (km),Magnitude,# Nonzero Phase Arrivals,RMS (s),Max Azimuthal Gap,Distance to nearest station (km),horizontal med. st. err.,vertical med. st. err.,depth fixed? (1=yes),deployment year,Land Network ID (CNSN=1),Land Network,Notes
0,'2011/10/21','10:58:59.67',47.869,-124.261333,35.9,1.1,13,0.26,142,12.3,1.2,1.3,0,1,0,'-',
1,'2011/11/16','20:10:51.77',48.7755,-124.967667,21.03,2.44,12,0.57,201,45.3,3.8,23.3,0,1,10830998,'PNSN',
2,'2011/11/28','00:27:12.78',44.4325,-124.501167,22.1,1.65,20,0.28,95,10.6,0.6,0.7,0,1,0,'-',
3,'2011/12/05','14:39:07.32',48.008,-124.31,33.67,2.67,6,0.07,154,10.5,1.6,1.2,0,1,0,'-',
4,'2011/12/20','00:00:21.16',44.672333,-124.292,18.41,1.93,23,0.4,106,20.2,0.5,0.6,0,1,10832893,'PNSN',


In [577]:
# Drop the row with year 1902 since it might be an error
events_cascadia = events_cascadia.drop([131])

In [578]:
# Reset the index to after removing the row described above
events_cascadia.reset_index(drop=True, inplace=True)

In [580]:
# Combine the 'yyyy/mm/dd' and 'hh:MM:ss.ff' columns and put into a new column called 'datetime'
events_cascadia['datetime'] = events_cascadia['yyyy/mm/dd'].astype('str')+events_cascadia['hh:MM:ss.ff'].astype('str')

In [581]:
# Remove the quotation marks around the combined time strings
events_cascadia['datetime'] = events_cascadia['datetime'].apply(lambda a:a.replace("'", ""))

In [582]:
# Convert the time strings to datetimes
events_cascadia['datetime'] = pd.to_datetime(events_cascadia['datetime'], format='%Y/%m/%d%H:%M:%S.%f', utc=True)

In [583]:
# Append the origin times of the Cascadia catalog in cascadia_list and the matching times with my catalog in cascadia_common_list
cascadia_list = []
cascadia_common_list = []

for i in range(len(events_cascadia)):
    origin_datetime = events_cascadia['datetime'][i]
    t1 = origin_datetime - pd.Timedelta(10, "seconds")
    t2 = origin_datetime + pd.Timedelta(10, "seconds")

    assignments_cascadia_time = mycatalog.loc[(mycatalog['datetime'] > t1) & (mycatalog['datetime'] < t2) ]
    assignments_cascadia_time = assignments_cascadia_time.drop_duplicates('event_idx', keep = 'first')
    
    cascadia_list.append(origin_datetime)
    cascadia_common_list.append(assignments_cascadia_time)

cascadia_common_list = pd.concat(cascadia_common_list)

In [586]:
# Print the common events I found in the Cascadia catalog
cascadia_common_list

Unnamed: 0.1,Unnamed: 0,idx,time,x,y,z,picks,latitude,longitude,depth,event_idx,pick_idx,residual,station,phase,time_pick,datetime


### Working on the Juan de Fuca Catalog


In [590]:
# Print the first five rows of the catalog
events_jdf.head()

Unnamed: 0,Time (epoch),Longitude,Latitude,Depth (km),RMS (s),CI Year
0,1319372776,-128.5916,44.1944,25.0802,0.8778,1
1,1319592376,-126.6822,43.567,0.0,0.5846,1
2,1320368639,-127.3553,43.8455,0.0,0.4044,1
3,1320610871,-127.954,43.8234,0.0,0.441,1
4,1320616859,-127.4139,44.4245,7.4284,0.3948,1


In [591]:
# Convert the time epochs to datetimes 
events_jdf['datetime'] = events_jdf.apply(lambda a: datetime.fromtimestamp(a['Time (epoch)']), axis = 1)
events_jdf['datetime'] = pd.to_datetime(events_jdf['datetime'], utc = True)

In [592]:
# Append the origin times of the JDF catalog in jdf_list and the matching times with my catalog in jdf_common_list
jdf_list = []
jdf_common_list = []

for i in range(len(events_jdf)):
    origin_datetime = events_jdf['datetime'][i]
    t1 = origin_datetime - pd.Timedelta(10, "seconds")
    t2 = origin_datetime + pd.Timedelta(10, "seconds")

    assignments_jdf_time = mycatalog.loc[(mycatalog['datetime'] > t1) & (mycatalog['datetime'] < t2) ]
    assignments_jdf_time = assignments_jdf_time.drop_duplicates('event_idx', keep = 'first')
    
    jdf_list.append(origin_datetime)
    jdf_common_list.append(assignments_jdf_time)

jdf_common_list = pd.concat(jdf_common_list)

In [593]:
# Print the common events I found in the JDF catalog
jdf_common_list

Unnamed: 0.1,Unnamed: 0,idx,time,x,y,z,picks,latitude,longitude,depth,event_idx,pick_idx,residual,station,phase,time_pick,datetime


### Comparing with Morton's catalogs

### Working on Morton's Catalog

In [400]:
# Read Morton's catalog
events_morton = pd.read_csv('ds01.csv')

In [401]:
# Print the first five rows of the catalog
events_morton.head()

Unnamed: 0,CI YEAR,TSTRING,YEAR,MONTH,DAY,HOUR,MINUTE,SECOND,LAT,LON,...,max az gap,dist to nearest stn,tt RMS,ERH,ERZ,STRIKE,DIP,RAKE,PLATE DESIGNATION,TEMPLATE EVENT?
0,1.0,20110730000000.0,2011.0,7.0,26.0,1.0,2.0,7.37,47.3217,-123.2708,...,166.0,27.4,0.19,0.8,1.2,,,,Interface,Catalog
1,1.0,20110730000000.0,2011.0,7.0,26.0,1.0,2.0,7.72,44.2888,-124.334,...,332.0,163.8,0.06,13.1,3.2,,,,Upper Plate,
2,1.0,20110730000000.0,2011.0,7.0,26.0,1.0,2.0,8.56,44.3017,-124.318,...,316.0,131.1,0.5,35.4,22.2,,,,Upper Plate,
3,1.0,20110730000000.0,2011.0,7.0,26.0,7.0,31.0,2.17,48.2635,-124.9298,...,205.0,44.4,0.77,3.5,6.4,,,,Upper Plate,
4,1.0,20110730000000.0,2011.0,7.0,26.0,9.0,50.0,27.63,48.3032,-124.9157,...,199.0,46.1,0.94,4.0,6.9,,,,Upper Plate,T


In [402]:
# Convert the TSTRING to datetime
events_morton['datetime'] = pd.to_datetime(events_morton['TSTRING'], format='%Y%m%d%H%M%S', utc=True)

In [588]:
# Append the origin times of the Morton catalog in morton_list and the matching times with my catalog in morton_common_list
morton_list = []
morton_common_list = []

for i in range(len(events_morton)):
    origin_datetime = events_morton['datetime'][i]
    t1 = origin_datetime - pd.Timedelta(10, "seconds")
    t2 = origin_datetime + pd.Timedelta(10, "seconds")

    assignments_morton_time = mycatalog.loc[(mycatalog['datetime'] > t1) & (mycatalog['datetime'] < t2) ]
    assignments_morton_time = assignments_morton_time.drop_duplicates('event_idx', keep = 'first')
    
    morton_list.append(origin_datetime)
    morton_common_list.append(assignments_morton_time)

morton_common_list = pd.concat(morton_common_list)

In [589]:
# Print the common events I found in Morton's catalog
morton_common_list

Unnamed: 0.1,Unnamed: 0,idx,time,x,y,z,picks,latitude,longitude,depth,event_idx,pick_idx,residual,station,phase,time_pick,datetime
4,4,1,2012-10-14 02:39:44.924554+00:00,26.269336,-128.412984,40.234375,5,40.593306,-124.439672,40.234375,1,12,0.377781,FS09B,P,1350182000.0,2012-10-14 02:39:44.924554+00:00
9,9,2,2012-10-14 03:08:04.522444+00:00,12.259024,-107.072404,49.609375,7,40.785806,-124.604763,49.609375,2,1456,0.293201,FS09B,S,1350184000.0,2012-10-14 03:08:04.522444+00:00


### Save  matched the events in my catalog

In [601]:
morton_common_list.to_csv('../workflow/matched_events.csv')