## Match the events found by Pyocto with other catalogs

Get the events from the USGS, Stone and Morton's papers and compare with the events from our association


### Comparing with the USGS catalog

In [244]:
import pandas as pd
import numpy as np
from obspy.clients.fdsn import Client
from pnwstore.mseed import WaveformClient
from datetime import datetime

In [245]:
# Load all the pick assignments 
all_pick_assignments = pd.read_csv('all_pick_assignments.csv')

In [246]:
# Load the USGS catalog
client = Client("USGS")
client_waveforms = WaveformClient()
events_USGS = client.get_events(starttime = UTCDateTime("2012-10-14T00:00:00.000000"),
                  endtime   = UTCDateTime("2012-10-14T23:59:59.999999"),
                  minlatitude = 40, maxlatitude = 44, 
                  minlongitude = -125, maxlongitude = -120)
events_USGS

2 Event(s) in Catalog:
2012-10-14T03:08:01.290000Z | +40.721, -124.210 | 1.74 md | manual
2012-10-14T02:39:45.510000Z | +40.301, -124.416 | 1.49 md | manual

In [247]:
# Convert the time series in all_pick_assignments to 

all_pick_assignments['time'] = pd.to_datetime(all_pick_assignments['time'])

In [248]:
# Convert the 
USGS_pick_time1 = events_USGS[0].origins[0].time.datetime
USGS_pick_time2 = events_USGS[1].origins[0].time.datetime

In [249]:
USGS_pick_timestamp1 = pd.to_datetime(USGS_pick_time1, utc=True)
USGS_pick_timestamp2 = pd.to_datetime(USGS_pick_time2, utc=True)

In [250]:
assignments_USGS_time1 = all_pick_assignments.loc[(all_pick_assignments['time'] > USGS_pick_timestamp1 - pd.Timedelta(5, "seconds")) & (all_pick_assignments['time'] < USGS_pick_timestamp1 + pd.Timedelta(5, "seconds")) ]
assignments_USGS_time1

Unnamed: 0.1,Unnamed: 0,idx,time,x,y,z,picks,latitude,longitude,depth,event_idx,pick_idx,residual,station,phase,time_pick
9,9,2,2012-10-14 03:08:04.522444+00:00,12.259024,-107.072404,49.609375,7,40.785806,-124.604763,49.609375,2,1456,0.293201,FS09B,S,1350184000.0
10,10,2,2012-10-14 03:08:04.522444+00:00,12.259024,-107.072404,49.609375,7,40.785806,-124.604763,49.609375,2,192,-1.275212,G10B,P,1350184000.0
11,11,2,2012-10-14 03:08:04.522444+00:00,12.259024,-107.072404,49.609375,7,40.785806,-124.604763,49.609375,2,1599,0.826728,G10B,S,1350184000.0
12,12,2,2012-10-14 03:08:04.522444+00:00,12.259024,-107.072404,49.609375,7,40.785806,-124.604763,49.609375,2,288,-0.841498,G19B,P,1350184000.0
13,13,2,2012-10-14 03:08:04.522444+00:00,12.259024,-107.072404,49.609375,7,40.785806,-124.604763,49.609375,2,1630,0.238312,G19B,S,1350184000.0
14,14,2,2012-10-14 03:08:04.522444+00:00,12.259024,-107.072404,49.609375,7,40.785806,-124.604763,49.609375,2,1256,0.128741,M14B,P,1350184000.0
15,15,2,2012-10-14 03:08:04.522444+00:00,12.259024,-107.072404,49.609375,7,40.785806,-124.604763,49.609375,2,2002,0.629728,M14B,S,1350184000.0


In [251]:
assignments_USGS_time2 = all_pick_assignments.loc[(all_pick_assignments['time'] > USGS_pick_timestamp2 - pd.Timedelta(5, "seconds")) & (all_pick_assignments['time'] < USGS_pick_timestamp2 + pd.Timedelta(5, "seconds")) ]
assignments_USGS_time2

Unnamed: 0.1,Unnamed: 0,idx,time,x,y,z,picks,latitude,longitude,depth,event_idx,pick_idx,residual,station,phase,time_pick
4,4,1,2012-10-14 02:39:44.924554+00:00,26.269336,-128.412984,40.234375,5,40.593306,-124.439672,40.234375,1,12,0.377781,FS09B,P,1350182000.0
5,5,1,2012-10-14 02:39:44.924554+00:00,26.269336,-128.412984,40.234375,5,40.593306,-124.439672,40.234375,1,1455,-0.171493,FS09B,S,1350182000.0
6,6,1,2012-10-14 02:39:44.924554+00:00,26.269336,-128.412984,40.234375,5,40.593306,-124.439672,40.234375,1,190,-0.43694,G10B,P,1350182000.0
7,7,1,2012-10-14 02:39:44.924554+00:00,26.269336,-128.412984,40.234375,5,40.593306,-124.439672,40.234375,1,1597,0.220285,G10B,S,1350182000.0
8,8,1,2012-10-14 02:39:44.924554+00:00,26.269336,-128.412984,40.234375,5,40.593306,-124.439672,40.234375,1,286,0.010367,G19B,P,1350182000.0


In [252]:
type(USGS_pick_timestamp1)

pandas._libs.tslibs.timestamps.Timestamp

In [253]:
type(USGS_pick_time1)

datetime.datetime

### Comparing to Stone's Catalog

In [268]:
# Load Stone's catalog
events_cascadia = pd.read_csv('jgrb52524-sup-0002-2017jb014966-ds01.csv')
events_jdf = pd.read_csv('jgrb52524-sup-0003-2017jb014966-ds02.csv')

In [200]:
events_cascadia[0:5]

Unnamed: 0,yyyy/mm/dd,hh:MM:ss.ff,Latitude,Longitude,Depth (km),Magnitude,# Nonzero Phase Arrivals,RMS (s),Max Azimuthal Gap,Distance to nearest station (km),horizontal med. st. err.,vertical med. st. err.,depth fixed? (1=yes),deployment year,Land Network ID (CNSN=1),Land Network,Notes,time
0,'2011/10/21','10:58:59.67',47.869,-124.261333,35.9,1.1,13,0.26,142,12.3,1.2,1.3,0,1,0,'-',,'2011/10/21''10:58:59.67'
1,'2011/11/16','20:10:51.77',48.7755,-124.967667,21.03,2.44,12,0.57,201,45.3,3.8,23.3,0,1,10830998,'PNSN',,'2011/11/16''20:10:51.77'
2,'2011/11/28','00:27:12.78',44.4325,-124.501167,22.1,1.65,20,0.28,95,10.6,0.6,0.7,0,1,0,'-',,'2011/11/28''00:27:12.78'
3,'2011/12/05','14:39:07.32',48.008,-124.31,33.67,2.67,6,0.07,154,10.5,1.6,1.2,0,1,0,'-',,'2011/12/05''14:39:07.32'
4,'2011/12/20','00:00:21.16',44.672333,-124.292,18.41,1.93,23,0.4,106,20.2,0.5,0.6,0,1,10832893,'PNSN',,'2011/12/20''00:00:21.16'


In [269]:
events_jdf[0:5]

Unnamed: 0,Time (epoch),Longitude,Latitude,Depth (km),RMS (s),CI Year
0,1319372776,-128.5916,44.1944,25.0802,0.8778,1
1,1319592376,-126.6822,43.567,0.0,0.5846,1
2,1320368639,-127.3553,43.8455,0.0,0.4044,1
3,1320610871,-127.954,43.8234,0.0,0.441,1
4,1320616859,-127.4139,44.4245,7.4284,0.3948,1


In [270]:
#
events_jdf['datetime'] = events_jdf.apply(lambda a: datetime.fromtimestamp(a['Time (epoch)']), axis = 1)


In [271]:
events_jdf['datetime'] = pd.to_datetime(events_jdf['datetime'])

In [262]:
# Check the type of the time object
pd.to_datetime(events_jdf['datetime'][0]))

pandas._libs.tslibs.timestamps.Timestamp

In [265]:
# Define the time just for testing the next cell
time = pd.to_datetime('2012-10-14 00:00:00.000000Z', utc=True)

In [272]:
events_jdf_20121014 = events_jdf.loc[events_jdf['datetime'] <  pd.to_datetime("10/14/2012  9:09:37 PM", utc=True)]

TypeError: Invalid comparison between dtype=datetime64[ns] and Timestamp

In [None]:
events_cascadia_20121014 = events_cascadia.loc[(pd.to_datetime(events_cascadia['time']) > pd.to_datetime('2012-10-14 00:00:00', utc=True)) & (pd.to_datetime(events_cascadia['time']) < pd.to_datetime('2012-10-15 00:00:00', utc=True))]

In [223]:
events_cascadia['time']= events_cascadia['yyyy/mm/dd'].str.cat(events_cascadia['hh:MM:ss.ff'])

In [211]:
events_cascadia_20121014 = events_cascadia.loc[(pd.to_datetime(events_cascadia['time']) > pd.to_datetime('2012-10-14 00:00:00', utc=True)) & (pd.to_datetime(events_cascadia['time']) < pd.to_datetime('2012-10-15 00:00:00', utc=True))]

ParserError: hour must be in 0..23: '1902/01/31''46:57:50.28' present at position 131

In [213]:
events_cascadia_20121014 = events_cascadia.loc[(pd.to_datetime(events_cascadia['time']) > pd.to_datetime('2012-10-14 00:00:00', utc=True))]

ParserError: hour must be in 0..23: '1902/01/31''46:57:50.28' present at position 131

In [218]:
pd.to_datetime(events_cascadia['time']) 

ParserError: hour must be in 0..23: '1902/01/31''46:57:50.28' present at position 131

In [224]:
events_cascadia['time'][131]

"'1902/01/31''46:57:50.28'"

In [196]:
events_jdf['Time (epoch)'] = pd.to_datetime(events_jdf['Time (epoch)'])
events_jdf['Time (epoch)']

0     1970-01-01 00:00:01.319372776
1     1970-01-01 00:00:01.319592376
2     1970-01-01 00:00:01.320368639
3     1970-01-01 00:00:01.320610871
4     1970-01-01 00:00:01.320616859
                   ...             
435   1970-01-01 00:00:01.411301721
436   1970-01-01 00:00:01.415354286
437   1970-01-01 00:00:01.415608558
438   1970-01-01 00:00:01.412975672
439   1970-01-01 00:00:01.413879766
Name: Time (epoch), Length: 440, dtype: datetime64[ns]

In [209]:
time = pd.to_datetime(events_cascadia['time'][0])

In [210]:
type(time)

pandas._libs.tslibs.timestamps.Timestamp

### Comparing to Morton's Catalog

In [235]:
# Read Morton's catalog
events_morton = pd.read_csv('ds01.csv')

In [236]:
events_morton[0:5]

Unnamed: 0,CI YEAR,TSTRING,YEAR,MONTH,DAY,HOUR,MINUTE,SECOND,LAT,LON,...,max az gap,dist to nearest stn,tt RMS,ERH,ERZ,STRIKE,DIP,RAKE,PLATE DESIGNATION,TEMPLATE EVENT?
0,1.0,20110730000000.0,2011.0,7.0,26.0,1.0,2.0,7.37,47.3217,-123.2708,...,166.0,27.4,0.19,0.8,1.2,,,,Interface,Catalog
1,1.0,20110730000000.0,2011.0,7.0,26.0,1.0,2.0,7.72,44.2888,-124.334,...,332.0,163.8,0.06,13.1,3.2,,,,Upper Plate,
2,1.0,20110730000000.0,2011.0,7.0,26.0,1.0,2.0,8.56,44.3017,-124.318,...,316.0,131.1,0.5,35.4,22.2,,,,Upper Plate,
3,1.0,20110730000000.0,2011.0,7.0,26.0,7.0,31.0,2.17,48.2635,-124.9298,...,205.0,44.4,0.77,3.5,6.4,,,,Upper Plate,
4,1.0,20110730000000.0,2011.0,7.0,26.0,9.0,50.0,27.63,48.3032,-124.9157,...,199.0,46.1,0.94,4.0,6.9,,,,Upper Plate,T


In [238]:
events_morton['datetime'] = events_morton.apply(lambda a: datetime.fromtimestamp(a['TSTRING']), axis = 1)
events_morton

ValueError: year 639253 is out of range

In [234]:
events_morton

Unnamed: 0,CI YEAR,TSTRING,YEAR,MONTH,DAY,HOUR,MINUTE,SECOND,LAT,LON,...,dist to nearest stn,tt RMS,ERH,ERZ,STRIKE,DIP,RAKE,PLATE DESIGNATION,TEMPLATE EVENT?,datetime
0,1.0,2.011073e+13,2011.0,7.0,26.0,1.0,2.0,7.37,47.3217,-123.2708,...,27.4,0.19,0.8,1.2,,,,Interface,Catalog,1970-01-01 05:35:10.726010207
1,1.0,2.011073e+13,2011.0,7.0,26.0,1.0,2.0,7.72,44.2888,-124.3340,...,163.8,0.06,13.1,3.2,,,,Upper Plate,,1970-01-01 05:35:10.726010207
2,1.0,2.011073e+13,2011.0,7.0,26.0,1.0,2.0,8.56,44.3017,-124.3180,...,131.1,0.50,35.4,22.2,,,,Upper Plate,,1970-01-01 05:35:10.726010208
3,1.0,2.011073e+13,2011.0,7.0,26.0,7.0,31.0,2.17,48.2635,-124.9298,...,44.4,0.77,3.5,6.4,,,,Upper Plate,,1970-01-01 05:35:10.726073102
4,1.0,2.011073e+13,2011.0,7.0,26.0,9.0,50.0,27.63,48.3032,-124.9157,...,46.1,0.94,4.0,6.9,,,,Upper Plate,T,1970-01-01 05:35:10.726095027
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5290,,,,,,,,,,,...,,,,,,,,,,NaT
5291,,,,,,,,,,,...,,,,,,,,,,NaT
5292,,,,,,,,,,,...,,,,,,,,,,NaT
5293,,,,,,,,,,,...,,,,,,,,,,NaT
