Notebook for matching the relocated events with catalogs by ANSS and Morton (2023)

In [2]:
import os
import sys
import pandas as pd
import numpy as np
import obspy
from tqdm import tqdm

from obspy.clients.fdsn import Client
from obspy.clients.fdsn import Client as FDSNClient

from mpl_toolkits.basemap import Basemap


from pnwstore.mseed import WaveformClient
from matplotlib import pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

from obspy import UTCDateTime

from tqdm import tqdm
from obspy.geodetics import locations2degrees, degrees2kilometers

notebook_dir = os.getcwd()
parent_dir = os.path.abspath(os.path.join(notebook_dir, '../'))
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

import datetime


## Morton Catalog

In [3]:
# Read Morton's catalog
events_morton = pd.read_csv('../data/ds01.csv')
# Convert the TSTRING to datetime
events_morton['datetime'] = pd.to_datetime(events_morton['TSTRING'], format='%Y%m%d%H%M%S', utc=True)
# Get the events in the Morton catalog 
t1 = pd.Timestamp('2010-1-1 00:00:00.000000+0000', tz='UTC')
t2 = pd.Timestamp('2015-12-31 23:59:59.999999+0000', tz='UTC')

events_morton= events_morton.loc[(events_morton['datetime'] > t1) & (events_morton['datetime'] < t2) ]

events_morton.head()

FileNotFoundError: [Errno 2] No such file or directory: '../data/ds01.csv'

In [None]:
events_morton

Unnamed: 0,CI YEAR,TSTRING,YEAR,MONTH,DAY,HOUR,MINUTE,SECOND,LAT,LON,...,dist to nearest stn,tt RMS,ERH,ERZ,STRIKE,DIP,RAKE,PLATE DESIGNATION,TEMPLATE EVENT?,datetime
0,1,2.011073e+13,2011,7,26,1,2,7.37,47.3217,-123.2708,...,27.4,0.19,0.8,1.2,,,,Interface,Catalog,2011-07-26 01:02:07+00:00
1,1,2.011073e+13,2011,7,26,1,2,7.72,44.2888,-124.3340,...,163.8,0.06,13.1,3.2,,,,Upper Plate,,2011-07-26 01:02:07+00:00
2,1,2.011073e+13,2011,7,26,1,2,8.56,44.3017,-124.3180,...,131.1,0.50,35.4,22.2,,,,Upper Plate,,2011-07-26 01:02:08+00:00
3,1,2.011073e+13,2011,7,26,7,31,2.17,48.2635,-124.9298,...,44.4,0.77,3.5,6.4,,,,Upper Plate,,2011-07-26 07:31:02+00:00
4,1,2.011073e+13,2011,7,26,9,50,27.63,48.3032,-124.9157,...,46.1,0.94,4.0,6.9,,,,Upper Plate,T,2011-07-26 09:50:27+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5277,4,2.015101e+13,2015,10,7,8,1,50.83,40.5895,-124.0455,...,5.4,0.14,0.9,0.8,,,,Slab,,2015-10-07 08:01:50+00:00
5278,4,2.015101e+13,2015,10,7,8,7,8.40,40.5380,-123.7217,...,14.9,0.09,3.3,13.5,,,,Upper Plate,,2015-10-07 08:07:08+00:00
5279,4,2.015101e+13,2015,10,7,11,31,26.69,40.5822,-124.0432,...,5.6,0.05,1.7,0.8,,,,Slab,,2015-10-07 11:31:26+00:00
5280,4,2.015101e+13,2015,10,7,18,11,9.52,40.2710,-124.3777,...,7.9,0.19,0.7,0.2,,,,Upper Plate,,2015-10-07 18:11:09+00:00


## ANSS Catalog

In [4]:
events_anss = pd.read_csv('/wd1/hbito_data/data/datasets_anss/anss_2010-15.csv')
events_anss['datetime'] = pd.to_datetime(events_anss['time'], format='%Y-%m-%dT%H:%M:%S.%fZ', utc=True)
events_anss= events_anss.loc[(events_anss['datetime'] > t1) & (events_anss['datetime'] < t2) ]
events_anss

NameError: name 't1' is not defined

## Our Catalog

In [5]:
region = 'all_regions'
mycatalog = pd.read_csv(f'/wd1/hbito_data/data/datasets_all_regions/origin_2010_2015_reloc_cog_morton_ver3_p_4_s_4_rms_2_5.csv',index_col=0)
mycatalog["datetime"] = pd.to_datetime(mycatalog["time"].apply(datetime.datetime.fromtimestamp, tz=datetime.timezone.utc))
mycatalog = mycatalog.rename(columns={"lat": "latitude", "lon": "longitude"})
mycatalog['idx'] = mycatalog['orid']

In [6]:
mycatalog

Unnamed: 0,latitude,longitude,depth,time,orid,nass,p_picks,s_picks,rms,nsphz,gap,algorithm,id_Morton,dist,dt,NonDimDist,datetime,idx
27593,40.46402,-124.63921,21.674,1.350339e+09,27593,33,14,19,2.498,19.0,70.689876,genie,676,0.043925,1.489598e+00,0.012537,2012-10-15 22:03:37.489598+00:00,27593
33313,40.46453,-124.64685,19.221,1.363883e+09,33313,23,9,14,2.497,14.0,104.305083,genie,1239,0.064674,1.564381e+03,13.036509,2013-03-21 16:22:28.381014+00:00,33313
44330,49.75784,-123.42677,4.296,1.395439e+09,44330,10,5,5,2.494,5.0,273.037707,genie,1821,1.710027,9.866703e+03,82.222555,2014-03-21 21:53:00.703153+00:00,44330
42581,49.04914,-122.09718,13.249,1.393710e+09,42581,26,16,10,2.491,10.0,254.509741,genie,1805,1.868034,6.251413e+04,520.951049,2014-03-01 21:39:46.874711+00:00,42581
36355,40.39732,-124.58839,16.076,1.371875e+09,36355,22,9,13,2.491,13.0,206.224926,genie,1556,0.607235,5.253552e+04,437.796015,2013-06-22 04:16:06.478228+00:00,36355
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1542,46.14740,-122.14032,44.614,1.266069e+09,1542,10,5,5,0.044,5.0,123.524895,genie,0,1.406842,4.557335e+07,379777.923383,2010-02-13 13:46:16.194077+00:00,1542
8390,48.79538,-123.26321,-0.782,1.293723e+09,8390,10,5,5,0.043,5.0,159.143198,genie,0,1.473689,1.791899e+07,149324.956402,2010-12-30 15:32:12.231767+00:00,8390
8590,48.57995,-123.03222,13.812,1.294554e+09,8590,10,5,5,0.042,5.0,230.479217,genie,0,1.268354,1.708779e+07,142398.279265,2011-01-09 06:25:33.488215+00:00,8590
41383,46.32146,-122.37012,13.638,1.390093e+09,41383,10,5,5,0.036,5.0,260.537377,genie,1768,2.804629,1.094893e+04,91.241118,2014-01-19 00:59:04.074082+00:00,41383


## Run this loop

In [40]:

# Initialize lists to store distances
distances_morton2mycatalog = []
distances_anss2mycatalog = []

# Initialize lists to store matched and unmatched events
matched_events_mycatalog2morton = []
matched_times_morton2mycatalog = []
matched_events_morton2mycatalog = []
unmatched_times_morton2mycatalog = []
unmatched_events_morton2mycatalog = []
unmatched_events_mycatalog2morton_and_anss = []
matched_events_anss2mycatalog = []
matched_events_mycatalog2anss = []
unmatched_times_anss2mycatalog = []
unmatched_events_anss2mycatalog = []
matched_times_anss2mycatalog = []

time_threshold = 120  # in seconds
dist_threshold = 25  # in kilometers

lat_morton2mycatalog = []
lon_morton2mycatalog = []

lat_anss2mycatalog = []
lon_anss2mycatalog = []

# A set to keep track of matched indices in mycatalog
matched_indices_morton = set()
matched_indices_anss = set()
count_c = 0
count_d = 0

# Loop over events in Morton's catalog
for i in range(len(events_morton)):
    t11 = events_morton.iloc[i]['datetime']
    olat = events_morton.iloc[i]['LAT']
    olon = events_morton.iloc[i]['LON']

    condition = (mycatalog['datetime'] >= t11 - pd.Timedelta(seconds=time_threshold)) & \
                (mycatalog['datetime'] <= t11 + pd.Timedelta(seconds=time_threshold)) & \
                (degrees2kilometers(locations2degrees(olat, olon, mycatalog['latitude'], mycatalog['longitude'])) <= dist_threshold)
    matched_df = mycatalog.loc[condition]

    if len(matched_df) == 1:
        print('matched idx of Morton: ', i)
        count_c += 1
        matched_times_morton2mycatalog.append(t11)
        matched_events_morton2mycatalog.append(events_morton.iloc[i])
        lat_morton2mycatalog.append(events_morton.iloc[i]['LAT'])
        lon_morton2mycatalog.append(events_morton.iloc[i]['LON'])
        matched_events_mycatalog2morton.append(matched_df)
        matched_indices_morton.update(matched_df['idx'])
        distance = degrees2kilometers(locations2degrees(olat, olon, matched_df['latitude'].values[0], matched_df['longitude'].values[0]))
        distances_morton2mycatalog.append(distance)
    elif len(matched_df) > 1:
        print('matched idx of Morton: ', i)
        count_d += 1
        diffs = abs(matched_df['datetime'] - t11)
        closest_index = diffs.idxmin()
        closest_event = matched_df.loc[[closest_index]]
        matched_times_morton2mycatalog.append(t11)
        matched_events_morton2mycatalog.append(events_morton.iloc[i])
        lat_morton2mycatalog.append(events_morton.iloc[i]['LAT'])
        lon_morton2mycatalog.append(events_morton.iloc[i]['LON'])
        matched_events_mycatalog2morton.append(closest_event)
        matched_indices_morton.update(closest_event['idx'])
        distance = degrees2kilometers(locations2degrees(olat, olon, closest_event['latitude'].values[0], closest_event['longitude'].values[0]))
        distances_morton2mycatalog.append(distance)
    else:
        unmatched_times_morton2mycatalog.append(t11)
        unmatched_events_morton2mycatalog.append(events_morton.iloc[i])

# All events in mycatalog not matched with Morton's catalog are unmatched
unmatched_indices_morton = set(mycatalog.idx) - matched_indices_morton

count_a = 0
count_b = 0

# Loop over events in ANSS catalog
for i in range(len(events_anss)):
    t11 = events_anss.iloc[i]['datetime']
    olat = events_anss.iloc[i]['latitude']
    olon = events_anss.iloc[i]['longitude']

    condition = (mycatalog['datetime'] >= t11 - pd.Timedelta(seconds=time_threshold)) & \
                (mycatalog['datetime'] <= t11 + pd.Timedelta(seconds=time_threshold)) & \
                (degrees2kilometers(locations2degrees(olat, olon, mycatalog['latitude'], mycatalog['longitude'])) <= dist_threshold)
    matched_df = mycatalog.loc[condition]

    if len(matched_df) == 1:
        print('matched idx of ANSS: ', i)
        count_a += 1
        matched_times_anss2mycatalog.append(t11)
        matched_events_anss2mycatalog.append(events_anss.iloc[i])
        lat_anss2mycatalog.append(events_anss.iloc[i]['latitude'])
        lon_anss2mycatalog.append(events_anss.iloc[i]['longitude'])
        matched_events_mycatalog2anss.append(matched_df)
        matched_indices_anss.update(matched_df['idx'])
        distance = degrees2kilometers(locations2degrees(olat, olon, matched_df['latitude'].values[0], matched_df['longitude'].values[0]))
        distances_anss2mycatalog.append(distance)
    elif len(matched_df) > 1:
        print('matched idx of ANSS: ', i)
        count_b += 1
        diffs = abs(matched_df['datetime'] - t11)
        closest_index = diffs.idxmin()
        closest_event = matched_df.loc[[closest_index]]
        matched_times_anss2mycatalog.append(t11)
        matched_events_anss2mycatalog.append(events_anss.iloc[i])
        lat_anss2mycatalog.append(events_anss.iloc[i]['latitude'])
        lon_anss2mycatalog.append(events_anss.iloc[i]['longitude'])
        matched_events_mycatalog2anss.append(closest_event)
        matched_indices_anss.update(closest_event['idx'])
        distance = degrees2kilometers(locations2degrees(olat, olon, closest_event['latitude'].values[0], closest_event['longitude'].values[0]))
        distances_anss2mycatalog.append(distance)
    else:
        unmatched_times_anss2mycatalog.append(t11)
        unmatched_events_anss2mycatalog.append(events_anss.iloc[i])

# All events in mycatalog not matched with ANSS catalog are unmatched
unmatched_indices_anss = set(mycatalog.idx) - matched_indices_anss

unmatched_indices_morton_and_anss = unmatched_indices_morton.intersection(unmatched_indices_anss)
print(len(mycatalog), len(unmatched_indices_morton_and_anss))

unmatched_events_mycatalog2morton_and_anss = mycatalog.loc[mycatalog['idx'].isin(list(unmatched_indices_morton_and_anss))]

print(f"length of mycatalog: {len(mycatalog)}")
print(f"length of events_morton: {len(events_morton)}")
print(f"length of events_anss: {len(events_anss)}")
print(f"matched_indices_morton: {len(matched_indices_morton)}")
print(f"matched_indices_anss: {len(matched_indices_anss)}")
print(f"unmatched_indices_morton: {len(unmatched_indices_morton)}")
print(f"unmatched_indices_anss: {len(unmatched_indices_anss)}")
print(f"unmatched_indices_morton_and_anss (new events): {len(unmatched_indices_morton_and_anss)}")

matched idx of Morton:  0
matched idx of Morton:  4
matched idx of Morton:  5
matched idx of Morton:  13
matched idx of Morton:  14
matched idx of Morton:  15
matched idx of Morton:  16
matched idx of Morton:  17
matched idx of Morton:  18
matched idx of Morton:  19
matched idx of Morton:  20
matched idx of Morton:  21
matched idx of Morton:  22
matched idx of Morton:  23
matched idx of Morton:  24
matched idx of Morton:  26
matched idx of Morton:  28
matched idx of Morton:  31
matched idx of Morton:  32
matched idx of Morton:  33
matched idx of Morton:  35
matched idx of Morton:  37
matched idx of Morton:  38
matched idx of Morton:  39
matched idx of Morton:  41
matched idx of Morton:  43
matched idx of Morton:  44
matched idx of Morton:  45
matched idx of Morton:  48
matched idx of Morton:  50
matched idx of Morton:  54
matched idx of Morton:  55
matched idx of Morton:  56
matched idx of Morton:  57
matched idx of Morton:  59
matched idx of Morton:  61
matched idx of Morton:  62
matc

In [41]:
# new concat code
# Concatenate and clean up dataframes
if len(matched_events_mycatalog2morton)>0:
    matched_events_mycatalog2morton = pd.concat(matched_events_mycatalog2morton).reset_index(drop=True)
    matched_times_mycatalog2morton = matched_events_mycatalog2morton['datetime']
    lat_mycatalog2morton = matched_events_mycatalog2morton['latitude']
    lon_mycatalog2morton = matched_events_mycatalog2morton['longitude']

if len(matched_events_morton2mycatalog)>0:   
    matched_events_morton2mycatalog = pd.DataFrame(matched_events_morton2mycatalog).reset_index(drop=True)
    
if len(unmatched_events_anss2mycatalog)>0:
    unmatched_events_anss2mycatalog = pd.DataFrame(unmatched_events_anss2mycatalog).reset_index(drop=True)
    
if len(unmatched_events_morton2mycatalog)>0:
    unmatched_events_morton2mycatalog = pd.DataFrame(unmatched_events_morton2mycatalog).reset_index(drop=True)

# if len(unmatched_events_mycatalog2morton_and_anss) > 0:
#     unmatched_events_mycatalog2morton_and_anss = unmatched_events_mycatalog2morton_and_anss.reset_index(drop=True)

if len(matched_events_mycatalog2anss)>0:
    matched_events_mycatalog2anss = pd.concat(matched_events_mycatalog2anss).reset_index(drop=True)
    matched_times_mycatalog2anss = matched_events_mycatalog2anss['datetime']
    lat_mycatalog2anss = matched_events_mycatalog2anss['latitude']
    lon_mycatalog2anss = matched_events_mycatalog2anss['longitude']

if len(matched_events_anss2mycatalog)>0:   
    matched_events_anss2mycatalog = pd.DataFrame(matched_events_anss2mycatalog).reset_index(drop=True)
    matched_times_anss2mycatalog = matched_events_anss2mycatalog['datetime']

print(f"length of mycatalog:{len(mycatalog)}")
print(f"length of events_morton:{len(events_morton)}")
print(f"length of events_anss:{len(events_anss)}")
print(f"matched_events_mycatalog2morton:{len(matched_events_mycatalog2morton)}")
print(f"matched_events_morton2mycatalog:{len(matched_events_morton2mycatalog)}")
print(f"unmatched_events_anss2mycatalog:{len(unmatched_events_anss2mycatalog)}")
print(f"unmatched_events_morton2mycatalog:{len(unmatched_events_morton2mycatalog)}")
print(f"unmatched_events_mycatalog2morton_and_anss (new events):{len(unmatched_events_mycatalog2morton_and_anss)}")
print(f"matched_events_mycatalog2anss:{len(matched_events_mycatalog2anss)}")
print(f"matched_events_anss2mycatalog:{len(matched_events_anss2mycatalog)}")

length of mycatalog:31020
length of events_morton:5282
length of events_anss:151790
matched_events_mycatalog2morton:3109
matched_events_morton2mycatalog:3109
unmatched_events_anss2mycatalog:143274
unmatched_events_morton2mycatalog:2173
unmatched_events_mycatalog2morton_and_anss (new events):20396
matched_events_mycatalog2anss:8516
matched_events_anss2mycatalog:8516


In [None]:
# Save these three catalogs to csv files
year = 'all_regions'
matched_events_mycatalog2morton.to_csv(f'/wd1/hbito_data/data/datasets_{year}/matched_events_with_morton_mycatalog_reloc_cog_morton_ver3_p_4_s_4_rms_2_5.csv')
matched_events_anss2mycatalog.to_csv(f'/wd1/hbito_data/data/datasets_{year}/matched_events_with_mycatalog_anss_reloc_cog_morton_ver3_p_4_s_4_rms_2_5.csv')
matched_events_morton2mycatalog.to_csv(f'/wd1/hbito_data/data/datasets_{year}/matched_events_with_mycatalog_morton_reloc_cog_morton_ver3_p_4_s_4_rms_2_5.csv')
unmatched_events_mycatalog2morton_and_anss.to_csv(f'/wd1/hbito_data/data/datasets_{year}/new_events_reloc_cog_morton_ver3_p_4_s_4_rms_2_5.csv')
matched_events_mycatalog2anss.to_csv(f'/wd1/hbito_data/data/datasets_{year}/matched_events_with_anss_mycatalog_reloc_cog_morton_ver3_p_4_s_4_rms_2_5.csv')
unmatched_events_morton2mycatalog.to_csv(f'/wd1/hbito_data/data/datasets_{year}/missing_events_from_mycatalog_morton_reloc_cog_morton_ver3_p_4_s_4_rms_2_5.csv')
unmatched_events_anss2mycatalog.to_csv(f'/wd1/hbito_data/data/datasets_{year}/missing_events_from_mycatalog_anss_reloc_cog_morton_ver3_p_4_s_4_rms_2_5.csv')
