Skip to content

Commit

Permalink
Minor refactoring (#253)
Browse files Browse the repository at this point in the history
* First-cut Event-Correlator Workflow

Initial commit for a draft event-correlator workflow

* Minor cleanups
  • Loading branch information
geojunky committed Jul 5, 2023
1 parent 01f2a8a commit 15be567
Show file tree
Hide file tree
Showing 2 changed files with 158 additions and 152 deletions.
153 changes: 1 addition & 152 deletions seismic/xcorqc/correlator.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

from ordered_set import OrderedSet as set
import numpy as np
from scipy.spatial import cKDTree
import random
import click
import re
Expand All @@ -31,159 +30,10 @@
from obspy.geodetics.base import gps2dist_azimuth

from seismic.xcorqc.xcorqc import IntervalStackXCorr
from seismic.xcorqc.utils import getStationInventory
from seismic.xcorqc.utils import getStationInventory, read_location_preferences, Dataset
from seismic.misc import get_git_revision_hash, rtp2xyz, split_list
from seismic.misc_p import ProgressTracker

class Dataset:
def __init__(self, asdf_file_name, netsta_list='*'):

self._data_path = asdf_file_name
self._earth_radius = 6371 # km

self.fds = FederatedASDFDataSet(asdf_file_name)
# Gather station metadata
netsta_list_subset = set(netsta_list.split(' ')) if netsta_list != '*' else netsta_list
self.netsta_list = []
self.metadata = defaultdict(list)

rtps = []
for netsta in list(self.fds.unique_coordinates.keys()):
if(netsta_list_subset != '*'):
if netsta not in netsta_list_subset:
continue

self.netsta_list.append(netsta)
self.metadata[netsta] = self.fds.unique_coordinates[netsta]

rtps.append([self._earth_radius,
np.radians(90 - self.metadata[netsta][1]),
np.radians(self.metadata[netsta][0])])
# end for

if(len(rtps) == 0):
assert 0, 'No station-pairs found due to missing stations. Aborting..'
# end if

rtps = np.array(rtps)
xyzs = rtp2xyz(rtps[:, 0], rtps[:, 1], rtps[:, 2])

self._tree = cKDTree(xyzs)
self._cart_location = defaultdict(list)
for i, ns in enumerate(self.netsta_list):
self._cart_location[ns] = xyzs[i, :]
# end for
# end func

def get_closest_stations(self, netsta, other_dataset, nn=1):
assert isinstance(netsta, str), 'station_name must be a string'
assert isinstance(other_dataset, Dataset), 'other_dataset must be an instance of Dataset'
netsta = netsta.upper()

assert netsta in self.netsta_list, '%s not found'%(netsta)

d, l = other_dataset._tree.query(self._cart_location[netsta], nn)

if isinstance(l, int):
l = np.array([l])

l = l[l<len(other_dataset.netsta_list)]

if isinstance(l, int):
l = np.array([l])

assert len(l), 'No stations found..'

return list(np.array(other_dataset.netsta_list)[l])
# end func

def get_unique_station_pairs(self, other_dataset, nn=1):
pairs = set()
for ns1 in self.netsta_list:
ns2list = None
if (nn != -1):
if self == other_dataset:
ns2list = set(self.get_closest_stations(ns1, other_dataset, nn=nn + 1))
if ns1 in ns2list:
ns2list.remove(ns1)
ns2list = list(ns2list)
else:
ns2list = self.get_closest_stations(ns1, other_dataset, nn=nn)
else:
ns2list = other_dataset.netsta_list
# end if

for ns2 in ns2list:
pairs.add((ns1, ns2))
# end for
# end for

pairs_subset = set()
for item in pairs:
if(item[0] == item[1]): continue

dup_item = (item[1], item[0])
if(dup_item not in pairs_subset and item not in pairs_subset):
pairs_subset.add(item)
# end if
# end if

# cull pairs based on temporal overlap (note: gaps are not considered)
result_pairs = set()
range_cache = defaultdict(tuple)
for ns1, ns2 in pairs_subset:
st1 = et1 = st2 = et2 = None

if(ns1 in range_cache.keys()):
st1, et1 = range_cache[ns1]
else:
net1, sta1 = ns1.split('.')
st1, et1 = self.fds.get_global_time_range(net1, sta1)
range_cache[ns1] = (st1, et1)
# end if

if(ns2 in range_cache.keys()):
st2, et2 = range_cache[ns2]
else:
net2, sta2 = ns2.split('.')
st2, et2 = other_dataset.fds.get_global_time_range(net2, sta2)
range_cache[ns2] = (st2, et2)
# end if

# check for temporal overlap
if((st1 <= et2) and (st2 <= et1)):
result_pairs.add((ns1, ns2))
# end if
# end for

return list(result_pairs)
# end func
# end class

def read_location_preferences(location_preferences_fn):
result = defaultdict(lambda: None)

if(location_preferences_fn):
pref_list = open(location_preferences_fn, 'r').readlines()

for pref in pref_list:
pref = pref.strip()
if (len(pref)):
try:
netsta, loc = pref.split()
net, sta = netsta.split('.')

result[netsta] = loc
except Exception as e:
print(str(e))
assert 0, 'Error parsing: {}'.format(pref)
# end try
# end if
# end for
# end if
return result
# end func

def process(data_source1, data_source2, output_path,
interval_seconds, window_seconds, window_overlap, window_buffer_length,
read_ahead_window_seconds, resample_rate=None, taper_length=0.05, nearest_neighbours=1,
Expand Down Expand Up @@ -430,7 +280,6 @@ def evaluate_channels(cha1, cha2):


CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'], show_default=True)

@click.command(context_settings=CONTEXT_SETTINGS)
@click.argument('data-source1',
type=click.Path('r'))
Expand Down
157 changes: 157 additions & 0 deletions seismic/xcorqc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,163 @@
from obspy.geodetics.base import gps2dist_azimuth
from tempfile import SpooledTemporaryFile
from scipy.interpolate import interp1d
from seismic.ASDFdatabase.FederatedASDFDataSet import FederatedASDFDataSet
from seismic.misc import rtp2xyz
from seismic.misc import get_git_revision_hash, rtp2xyz, split_list

class Dataset:
def __init__(self, asdf_file_name, netsta_list='*'):

self._data_path = asdf_file_name
self._earth_radius = 6371 # km

self.fds = FederatedASDFDataSet(asdf_file_name)
# Gather station metadata
netsta_list_subset = set(netsta_list.split(' ')) if netsta_list != '*' else netsta_list
self.netsta_list = []
self.metadata = defaultdict(list)

rtps = []
for netsta in list(self.fds.unique_coordinates.keys()):
if(netsta_list_subset != '*'):
if netsta not in netsta_list_subset:
continue

self.netsta_list.append(netsta)
self.metadata[netsta] = self.fds.unique_coordinates[netsta]

rtps.append([self._earth_radius,
np.radians(90 - self.metadata[netsta][1]),
np.radians(self.metadata[netsta][0])])
# end for

if(len(rtps) == 0):
assert 0, 'No station-pairs found due to missing stations. Aborting..'
# end if

rtps = np.array(rtps)
xyzs = rtp2xyz(rtps[:, 0], rtps[:, 1], rtps[:, 2])

self._tree = cKDTree(xyzs)
self._cart_location = defaultdict(list)
for i, ns in enumerate(self.netsta_list):
self._cart_location[ns] = xyzs[i, :]
# end for
# end func

def get_closest_stations(self, netsta, other_dataset, nn=1):
assert isinstance(netsta, str), 'station_name must be a string'
assert isinstance(other_dataset, Dataset), 'other_dataset must be an instance of Dataset'
netsta = netsta.upper()

assert netsta in self.netsta_list, '%s not found'%(netsta)

d, l = other_dataset._tree.query(self._cart_location[netsta], nn)

if isinstance(l, int):
l = np.array([l])

l = l[l<len(other_dataset.netsta_list)]

if isinstance(l, int):
l = np.array([l])

assert len(l), 'No stations found..'

return list(np.array(other_dataset.netsta_list)[l])
# end func

def get_unique_station_pairs(self, other_dataset, nn=1, require_overlap=True):
pairs = set()
for ns1 in self.netsta_list:
ns2list = None
if (nn != -1):
if self == other_dataset:
ns2list = set(self.get_closest_stations(ns1, other_dataset, nn=nn + 1))
if ns1 in ns2list:
ns2list.remove(ns1)
ns2list = list(ns2list)
else:
ns2list = self.get_closest_stations(ns1, other_dataset, nn=nn)
else:
ns2list = other_dataset.netsta_list
# end if

for ns2 in ns2list:
pairs.add((ns1, ns2))
# end for
# end for

pairs_subset = set()
for item in pairs:
if(item[0] == item[1]): continue

dup_item = (item[1], item[0])
if(dup_item not in pairs_subset and item not in pairs_subset):
pairs_subset.add(item)
# end if
# end if

# cull pairs based on temporal overlap if specified
# (note: gaps are not considered)
result_pairs = set()
if(require_overlap):
range_cache = defaultdict(tuple)
for ns1, ns2 in pairs_subset:
st1 = et1 = st2 = et2 = None

if(ns1 in range_cache.keys()):
st1, et1 = range_cache[ns1]
else:
net1, sta1 = ns1.split('.')
st1, et1 = self.fds.get_global_time_range(net1, sta1)
range_cache[ns1] = (st1, et1)
# end if

if(ns2 in range_cache.keys()):
st2, et2 = range_cache[ns2]
else:
net2, sta2 = ns2.split('.')
st2, et2 = other_dataset.fds.get_global_time_range(net2, sta2)
range_cache[ns2] = (st2, et2)
# end if

# check for temporal overlap
if((st1 <= et2) and (st2 <= et1)):
result_pairs.add((ns1, ns2))
# end if
# end for
else:
result_pairs = pairs_subset
# end if

return list(result_pairs)
# end func
# end class

def read_location_preferences(location_preferences_fn):
result = defaultdict(lambda: None)

if(location_preferences_fn):
pref_list = open(location_preferences_fn, 'r').readlines()

for pref in pref_list:
pref = pref.strip()
if (len(pref)):
try:
netsta, loc = pref.split()
net, sta = netsta.split('.')

result[netsta] = loc
except Exception as e:
print(str(e))
assert 0, 'Error parsing: {}'.format(pref)
# end try
# end if
# end for
# end if
return result
# end func

def getStationInventory(master_inventory, inventory_cache, netsta, location_preferences_dict):
netstaInv = None
Expand Down

0 comments on commit 15be567

Please sign in to comment.