Notebook for matching the updated locations after cross correlation with catalogs by ANSS and Morton (2023)

In [1]:
import os
import sys
import pandas as pd
import numpy as np
import obspy
from tqdm import tqdm

from obspy.clients.fdsn import Client
from obspy.clients.fdsn import Client as FDSNClient

from mpl_toolkits.basemap import Basemap


from pnwstore.mseed import WaveformClient
from matplotlib import pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

from obspy import UTCDateTime

from tqdm import tqdm
from obspy.geodetics import locations2degrees, degrees2kilometers

notebook_dir = os.getcwd()
parent_dir = os.path.abspath(os.path.join(notebook_dir, '../'))
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

# from plot_utils import *
from qc_utils import * 

import datetime


## Morton Catalog

In [2]:
# Read Morton's catalog
events_morton = pd.read_csv('/wd1/hbito_data/data/ds01.csv')
# Convert the TSTRING to datetime
events_morton['datetime'] = pd.to_datetime(events_morton['TSTRING'], format='%Y%m%d%H%M%S', utc=True)
# Get the events in the Morton catalog 
t1 = pd.Timestamp('2010-1-1 00:00:00.000000+0000', tz='UTC')
t2 = pd.Timestamp('2015-12-31 23:59:59.999999+0000', tz='UTC')

events_morton= events_morton.loc[(events_morton['datetime'] > t1) & (events_morton['datetime'] < t2) ]

events_morton.head()

Unnamed: 0,CI YEAR,TSTRING,YEAR,MONTH,DAY,HOUR,MINUTE,SECOND,LAT,LON,...,dist to nearest stn,tt RMS,ERH,ERZ,STRIKE,DIP,RAKE,PLATE DESIGNATION,TEMPLATE EVENT?,datetime
0,1,20110730000000.0,2011,7,26,1,2,7.37,47.3217,-123.2708,...,27.4,0.19,0.8,1.2,,,,Interface,Catalog,2011-07-26 01:02:07+00:00
1,1,20110730000000.0,2011,7,26,1,2,7.72,44.2888,-124.334,...,163.8,0.06,13.1,3.2,,,,Upper Plate,,2011-07-26 01:02:07+00:00
2,1,20110730000000.0,2011,7,26,1,2,8.56,44.3017,-124.318,...,131.1,0.5,35.4,22.2,,,,Upper Plate,,2011-07-26 01:02:08+00:00
3,1,20110730000000.0,2011,7,26,7,31,2.17,48.2635,-124.9298,...,44.4,0.77,3.5,6.4,,,,Upper Plate,,2011-07-26 07:31:02+00:00
4,1,20110730000000.0,2011,7,26,9,50,27.63,48.3032,-124.9157,...,46.1,0.94,4.0,6.9,,,,Upper Plate,T,2011-07-26 09:50:27+00:00


In [3]:
events_morton

Unnamed: 0,CI YEAR,TSTRING,YEAR,MONTH,DAY,HOUR,MINUTE,SECOND,LAT,LON,...,dist to nearest stn,tt RMS,ERH,ERZ,STRIKE,DIP,RAKE,PLATE DESIGNATION,TEMPLATE EVENT?,datetime
0,1,2.011073e+13,2011,7,26,1,2,7.37,47.3217,-123.2708,...,27.4,0.19,0.8,1.2,,,,Interface,Catalog,2011-07-26 01:02:07+00:00
1,1,2.011073e+13,2011,7,26,1,2,7.72,44.2888,-124.3340,...,163.8,0.06,13.1,3.2,,,,Upper Plate,,2011-07-26 01:02:07+00:00
2,1,2.011073e+13,2011,7,26,1,2,8.56,44.3017,-124.3180,...,131.1,0.50,35.4,22.2,,,,Upper Plate,,2011-07-26 01:02:08+00:00
3,1,2.011073e+13,2011,7,26,7,31,2.17,48.2635,-124.9298,...,44.4,0.77,3.5,6.4,,,,Upper Plate,,2011-07-26 07:31:02+00:00
4,1,2.011073e+13,2011,7,26,9,50,27.63,48.3032,-124.9157,...,46.1,0.94,4.0,6.9,,,,Upper Plate,T,2011-07-26 09:50:27+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5277,4,2.015101e+13,2015,10,7,8,1,50.83,40.5895,-124.0455,...,5.4,0.14,0.9,0.8,,,,Slab,,2015-10-07 08:01:50+00:00
5278,4,2.015101e+13,2015,10,7,8,7,8.40,40.5380,-123.7217,...,14.9,0.09,3.3,13.5,,,,Upper Plate,,2015-10-07 08:07:08+00:00
5279,4,2.015101e+13,2015,10,7,11,31,26.69,40.5822,-124.0432,...,5.6,0.05,1.7,0.8,,,,Slab,,2015-10-07 11:31:26+00:00
5280,4,2.015101e+13,2015,10,7,18,11,9.52,40.2710,-124.3777,...,7.9,0.19,0.7,0.2,,,,Upper Plate,,2015-10-07 18:11:09+00:00


## ANSS Catalog

In [4]:
events_anss = pd.read_csv('/wd1/hbito_data/data/datasets_anss/anss_2010-15.csv')
events_anss['datetime'] = pd.to_datetime(events_anss['time'], format='%Y-%m-%dT%H:%M:%S.%fZ', utc=True)
events_anss= events_anss.loc[(events_anss['datetime'] > t1) & (events_anss['datetime'] < t2) ]
events_anss

Unnamed: 0.1,Unnamed: 0,time,latitude,longitude,depth,mag,magType,nst,gap,dmin,...,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource,datetime
0,0,2010-01-01T00:16:49.190Z,48.203167,-121.676833,2.948,2.20,md,16.0,79.0,0.167900,...,"8 km SW of Darrington, Washington",earthquake,1.004,12.60,0.070,9.0,reviewed,uw,uw,2010-01-01 00:16:49.190000+00:00
1,1,2010-01-01T01:09:53.550Z,38.755167,-122.717167,1.604,0.20,md,9.0,67.0,0.015320,...,"7 km S of Cobb, California",earthquake,0.500,1.19,0.175,4.0,reviewed,nc,nc,2010-01-01 01:09:53.550000+00:00
2,2,2010-01-01T01:28:40.410Z,38.824167,-122.814667,1.479,0.29,md,15.0,90.0,0.003604,...,"7 km W of Cobb, California",earthquake,0.250,0.33,0.273,4.0,reviewed,nc,nc,2010-01-01 01:28:40.410000+00:00
3,3,2010-01-01T01:28:44.590Z,38.825500,-122.815333,1.859,0.65,md,7.0,106.0,0.004505,...,"8 km W of Cobb, California",earthquake,1.220,1.78,,1.0,reviewed,nc,nc,2010-01-01 01:28:44.590000+00:00
4,4,2010-01-01T01:45:21.220Z,38.813332,-122.785500,0.739,0.97,md,15.0,56.0,0.009009,...,"5 km W of Cobb, California",earthquake,0.200,0.49,0.120,16.0,automatic,nc,nc,2010-01-01 01:45:21.220000+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
151785,151785,2015-12-31T22:15:46.650Z,38.837502,-122.825333,1.450,0.18,md,6.0,180.0,0.008108,...,"9 km W of Cobb, California",earthquake,0.660,1.08,0.110,2.0,automatic,nc,nc,2015-12-31 22:15:46.650000+00:00
151786,151786,2015-12-31T22:18:13.120Z,41.856400,-119.599200,8.700,1.40,ml,6.0,210.1,0.175000,...,"45 km E of Fort Bidwell, California",earthquake,,3.40,0.210,3.0,reviewed,nn,nn,2015-12-31 22:18:13.120000+00:00
151787,151787,2015-12-31T23:19:21.650Z,38.823334,-122.765663,1.680,0.54,md,7.0,99.0,0.008108,...,"3 km W of Cobb, California",earthquake,0.500,1.54,0.030,2.0,automatic,nc,nc,2015-12-31 23:19:21.650000+00:00
151788,151788,2015-12-31T23:22:20.730Z,38.841000,-122.878166,1.730,0.77,md,8.0,95.0,0.007207,...,"12 km ENE of Cloverdale, California",earthquake,0.580,1.02,0.180,3.0,automatic,nc,nc,2015-12-31 23:22:20.730000+00:00


## Our Catalog


In [3]:
region = 'all_regions'
mycatalog = pd.read_csv(f'/wd1/hbito_data/data/datasets_{region}/origin_2010_2015_reloc_cog_ver3_cc_p_4_s_4_rms_2_5.csv',index_col=0)
mycatalog["datetime"] = pd.to_datetime(mycatalog["time"].apply(datetime.datetime.fromtimestamp, tz=datetime.timezone.utc))
mycatalog = mycatalog.rename(columns={"lat": "latitude", "lon": "longitude"})
mycatalog['idx'] = mycatalog['orid']

NameError: name 'pd' is not defined

In [7]:
mycatalog

Unnamed: 0,latitude,longitude,depth,time,orid,nass,p_picks,s_picks,rms,nsphz,gap,algorithm,datetime,idx
7,48.17742,-121.83289,6.1630,1.262305e+09,1,55,25,30,0.985,30.0,197.622556,genie,2010-01-01 00:16:49.343000+00:00,1
62,47.85353,-122.12435,18.2960,1.262330e+09,2,28,10,18,0.784,18.0,179.587425,genie,2010-01-01 07:18:03.837000+00:00,2
90,47.97665,-122.90617,20.9100,1.262336e+09,3,20,10,10,0.465,10.0,153.822127,genie,2010-01-01 08:51:56.196000+00:00,3
110,45.86019,-122.19002,8.4035,1.262362e+09,4,39,20,19,0.657,19.0,238.225383,genie,2010-01-01 16:12:43.926000+00:00,4
149,48.47310,-123.04700,20.6355,1.262370e+09,5,33,16,17,0.429,17.0,62.004725,genie,2010-01-01 18:16:53.111999+00:00,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1004153,49.76815,-124.51205,3.5885,1.435086e+09,63877,16,9,7,0.484,7.0,227.532026,genie,2015-06-23 18:55:29.384999+00:00,63877
1004169,40.74956,-124.25301,22.7025,1.435086e+09,63878,37,13,24,0.907,24.0,53.849757,genie,2015-06-23 18:59:51.592999+00:00,63878
1004215,40.57046,-124.29327,18.0040,1.435090e+09,63880,43,18,25,0.756,25.0,77.280783,genie,2015-06-23 19:59:16.246000+00:00,63880
1004270,40.59877,-124.40572,20.4825,1.435097e+09,63882,23,9,14,1.015,14.0,77.202555,genie,2015-06-23 22:11:13.828000+00:00,63882


## Run this loop

In [8]:

# Initialize lists to store distances
distances_morton2mycatalog = []
distances_anss2mycatalog = []

# Initialize lists to store matched and unmatched events
matched_events_mycatalog2morton = []
matched_times_morton2mycatalog = []
matched_events_morton2mycatalog = []
unmatched_times_morton2mycatalog = []
unmatched_events_morton2mycatalog = []
unmatched_events_mycatalog2morton_and_anss = []
matched_events_anss2mycatalog = []
matched_events_mycatalog2anss = []
unmatched_times_anss2mycatalog = []
unmatched_events_anss2mycatalog = []
matched_times_anss2mycatalog = []

time_threshold = 120  # in seconds
dist_threshold = 25  # in kilometers

lat_morton2mycatalog = []
lon_morton2mycatalog = []

lat_anss2mycatalog = []
lon_anss2mycatalog = []

# A set to keep track of matched indices in mycatalog
matched_indices_morton = set()
matched_indices_anss = set()
count_c = 0
count_d = 0

# Loop over events in Morton's catalog
for i in tqdm(range(len(events_morton))):
    t11 = events_morton.iloc[i]['datetime']
    olat = events_morton.iloc[i]['LAT']
    olon = events_morton.iloc[i]['LON']

    condition = (mycatalog['datetime'] >= t11 - pd.Timedelta(seconds=time_threshold)) & \
                (mycatalog['datetime'] <= t11 + pd.Timedelta(seconds=time_threshold)) & \
                (degrees2kilometers(locations2degrees(olat, olon, mycatalog['latitude'], mycatalog['longitude'])) <= dist_threshold)
    matched_df = mycatalog.loc[condition]

    if len(matched_df) == 1:
        # print('matched idx of Morton: ', i)
        count_c += 1
        matched_times_morton2mycatalog.append(t11)
        matched_events_morton2mycatalog.append(events_morton.iloc[i])
        lat_morton2mycatalog.append(events_morton.iloc[i]['LAT'])
        lon_morton2mycatalog.append(events_morton.iloc[i]['LON'])
        matched_events_mycatalog2morton.append(matched_df)
        matched_indices_morton.update(matched_df['idx'])
        distance = degrees2kilometers(locations2degrees(olat, olon, matched_df['latitude'].values[0], matched_df['longitude'].values[0]))
        distances_morton2mycatalog.append(distance)
    elif len(matched_df) > 1:
        # print('matched idx of Morton: ', i)
        count_d += 1
        diffs = abs(matched_df['datetime'] - t11)
        closest_index = diffs.idxmin()
        closest_event = matched_df.loc[[closest_index]]
        matched_times_morton2mycatalog.append(t11)
        matched_events_morton2mycatalog.append(events_morton.iloc[i])
        lat_morton2mycatalog.append(events_morton.iloc[i]['LAT'])
        lon_morton2mycatalog.append(events_morton.iloc[i]['LON'])
        matched_events_mycatalog2morton.append(closest_event)
        matched_indices_morton.update(closest_event['idx'])
        distance = degrees2kilometers(locations2degrees(olat, olon, closest_event['latitude'].values[0], closest_event['longitude'].values[0]))
        distances_morton2mycatalog.append(distance)
    else:
        unmatched_times_morton2mycatalog.append(t11)
        unmatched_events_morton2mycatalog.append(events_morton.iloc[i])

# All events in mycatalog not matched with Morton's catalog are unmatched
unmatched_indices_morton = set(mycatalog.idx) - matched_indices_morton

count_a = 0
count_b = 0

# Loop over events in ANSS catalog
for i in tqdm(range(len(events_anss))):
    t11 = events_anss.iloc[i]['datetime']
    olat = events_anss.iloc[i]['latitude']
    olon = events_anss.iloc[i]['longitude']

    condition = (mycatalog['datetime'] >= t11 - pd.Timedelta(seconds=time_threshold)) & \
                (mycatalog['datetime'] <= t11 + pd.Timedelta(seconds=time_threshold)) & \
                (degrees2kilometers(locations2degrees(olat, olon, mycatalog['latitude'], mycatalog['longitude'])) <= dist_threshold)
    matched_df = mycatalog.loc[condition]

    if len(matched_df) == 1:
        # print('matched idx of ANSS: ', i)
        count_a += 1
        matched_times_anss2mycatalog.append(t11)
        matched_events_anss2mycatalog.append(events_anss.iloc[i])
        lat_anss2mycatalog.append(events_anss.iloc[i]['latitude'])
        lon_anss2mycatalog.append(events_anss.iloc[i]['longitude'])
        matched_events_mycatalog2anss.append(matched_df)
        matched_indices_anss.update(matched_df['idx'])
        distance = degrees2kilometers(locations2degrees(olat, olon, matched_df['latitude'].values[0], matched_df['longitude'].values[0]))
        distances_anss2mycatalog.append(distance)
    elif len(matched_df) > 1:
        # print('matched idx of ANSS: ', i)
        count_b += 1
        diffs = abs(matched_df['datetime'] - t11)
        closest_index = diffs.idxmin()
        closest_event = matched_df.loc[[closest_index]]
        matched_times_anss2mycatalog.append(t11)
        matched_events_anss2mycatalog.append(events_anss.iloc[i])
        lat_anss2mycatalog.append(events_anss.iloc[i]['latitude'])
        lon_anss2mycatalog.append(events_anss.iloc[i]['longitude'])
        matched_events_mycatalog2anss.append(closest_event)
        matched_indices_anss.update(closest_event['idx'])
        distance = degrees2kilometers(locations2degrees(olat, olon, closest_event['latitude'].values[0], closest_event['longitude'].values[0]))
        distances_anss2mycatalog.append(distance)
    else:
        unmatched_times_anss2mycatalog.append(t11)
        unmatched_events_anss2mycatalog.append(events_anss.iloc[i])

# All events in mycatalog not matched with ANSS catalog are unmatched
unmatched_indices_anss = set(mycatalog.idx) - matched_indices_anss

unmatched_indices_morton_and_anss = unmatched_indices_morton.intersection(unmatched_indices_anss)
print(len(mycatalog), len(unmatched_indices_morton_and_anss))

unmatched_events_mycatalog2morton_and_anss = mycatalog.loc[mycatalog['idx'].isin(list(unmatched_indices_morton_and_anss))]

print(f"length of mycatalog: {len(mycatalog)}")
print(f"length of events_morton: {len(events_morton)}")
print(f"length of events_anss: {len(events_anss)}")
print(f"matched_indices_morton: {len(matched_indices_morton)}")
print(f"matched_indices_anss: {len(matched_indices_anss)}")
print(f"unmatched_indices_morton: {len(unmatched_indices_morton)}")
print(f"unmatched_indices_anss: {len(unmatched_indices_anss)}")
print(f"unmatched_indices_morton_and_anss (new events): {len(unmatched_indices_morton_and_anss)}")

  0%|          | 0/5282 [00:00<?, ?it/s]100%|██████████| 5282/5282 [00:31<00:00, 166.33it/s]
100%|██████████| 151790/151790 [15:03<00:00, 167.95it/s]

31020 20471
length of mycatalog: 31020
length of events_morton: 5282
length of events_anss: 151790
matched_indices_morton: 3085
matched_indices_anss: 8324
unmatched_indices_morton: 27935
unmatched_indices_anss: 22696
unmatched_indices_morton_and_anss (new events): 20471





In [None]:
# new concat code
# Concatenate and clean up dataframes
if len(matched_events_mycatalog2morton)>0:
    matched_events_mycatalog2morton = pd.concat(matched_events_mycatalog2morton).reset_index(drop=True)
    matched_times_mycatalog2morton = matched_events_mycatalog2morton['datetime']
    lat_mycatalog2morton = matched_events_mycatalog2morton['latitude']
    lon_mycatalog2morton = matched_events_mycatalog2morton['longitude']

if len(matched_events_morton2mycatalog)>0:   
    matched_events_morton2mycatalog = pd.DataFrame(matched_events_morton2mycatalog).reset_index(drop=True)
    
if len(unmatched_events_anss2mycatalog)>0:
    unmatched_events_anss2mycatalog = pd.DataFrame(unmatched_events_anss2mycatalog).reset_index(drop=True)
    
if len(unmatched_events_morton2mycatalog)>0:
    unmatched_events_morton2mycatalog = pd.DataFrame(unmatched_events_morton2mycatalog).reset_index(drop=True)

# if len(unmatched_events_mycatalog2morton_and_anss) > 0:
#     unmatched_events_mycatalog2morton_and_anss = unmatched_events_mycatalog2morton_and_anss.reset_index(drop=True)

if len(matched_events_mycatalog2anss)>0:
    matched_events_mycatalog2anss = pd.concat(matched_events_mycatalog2anss).reset_index(drop=True)
    matched_times_mycatalog2anss = matched_events_mycatalog2anss['datetime']
    lat_mycatalog2anss = matched_events_mycatalog2anss['latitude']
    lon_mycatalog2anss = matched_events_mycatalog2anss['longitude']

if len(matched_events_anss2mycatalog)>0:   
    matched_events_anss2mycatalog = pd.DataFrame(matched_events_anss2mycatalog).reset_index(drop=True)
    matched_times_anss2mycatalog = matched_events_anss2mycatalog['datetime']

print(f"length of mycatalog:{len(mycatalog)}")
print(f"length of events_morton:{len(events_morton)}")
print(f"length of events_anss:{len(events_anss)}")
print(f"matched_events_mycatalog2morton:{len(matched_events_mycatalog2morton)}")
print(f"matched_events_morton2mycatalog:{len(matched_events_morton2mycatalog)}")
print(f"unmatched_events_anss2mycatalog:{len(unmatched_events_anss2mycatalog)}")
print(f"unmatched_events_morton2mycatalog:{len(unmatched_events_morton2mycatalog)}")
print(f"unmatched_events_mycatalog2morton_and_anss (new events):{len(unmatched_events_mycatalog2morton_and_anss)}")
print(f"matched_events_mycatalog2anss:{len(matched_events_mycatalog2anss)}")
print(f"matched_events_anss2mycatalog:{len(matched_events_anss2mycatalog)}")

length of mycatalog:31020
length of events_morton:5282
length of events_anss:151790
matched_events_mycatalog2morton:3099
matched_events_morton2mycatalog:3099
unmatched_events_anss2mycatalog:143343
unmatched_events_morton2mycatalog:2183
unmatched_events_mycatalog2morton_and_anss (new events):20471
matched_events_mycatalog2anss:8447
matched_events_anss2mycatalog:8447


In [23]:
# Save these three catalogs to csv files
year = 'all_regions'
matched_events_mycatalog2morton.to_csv(f'/wd1/hbito_data/data/datasets_{year}/matched_events_with_morton_mycatalog_reloc_cog_ver3_cc_p_4_s_4_rms_2_5.csv')
matched_events_anss2mycatalog.to_csv(f'/wd1/hbito_data/data/datasets_{year}/matched_events_with_mycatalog_anss_reloc_cog_ver3_cc_p_4_s_4_rms_2_5.csv')
matched_events_morton2mycatalog.to_csv(f'/wd1/hbito_data/data/datasets_{year}/matched_events_with_mycatalog_morton_reloc_cog_ver3_cc_p_4_s_4_rms_2_5.csv')
unmatched_events_mycatalog2morton_and_anss.to_csv(f'/wd1/hbito_data/data/datasets_{year}/new_events_reloc_cog_ver3_cc_p_4_s_4_rms_2_5.csv')
matched_events_mycatalog2anss.to_csv(f'/wd1/hbito_data/data/datasets_{year}/matched_events_with_anss_mycatalog_reloc_cog_ver3_cc_p_4_s_4_rms_2_5.csv')
unmatched_events_morton2mycatalog.to_csv(f'/wd1/hbito_data/data/datasets_{year}/missing_events_from_mycatalog_morton_reloc_cog_ver3_cc_p_4_s_4_rms_2_5.csv')
unmatched_events_anss2mycatalog.to_csv(f'/wd1/hbito_data/data/datasets_{year}/missing_events_from_mycatalog_anss_reloc_cog_ver3_cc_p_4_s_4_rms_2_5.csv')
