Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 184ac7b

Browse files
authoredAug 27, 2019
Merge b9e1275 into 3d4e0c3
2 parents 3d4e0c3 + b9e1275 commit 184ac7b

File tree

6 files changed

+258
-26
lines changed

6 files changed

+258
-26
lines changed
 
+195
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
#!/usr/env python
2+
"""
3+
Description:
4+
Reads waveforms from an ASDF file, optionally applies instrument response correction,
5+
resamples and outputs them to another ASDF file. This preprocessing is crucial for
6+
large-scale studies involving > 10000 Green's Functions, e.g. in ambient noise
7+
tomography. This approach significantly reduces IO bottlenecks and computational
8+
costs associated with having to apply instrument response corrections on data from
9+
a given station in alternative workflows.
10+
References:
11+
12+
CreationDate: 18/07/19
13+
Developer: rakib.hassan@ga.gov.au
14+
15+
Revision History:
16+
LastUpdate: 18/07/19 RH
17+
LastUpdate: dd/mm/yyyy Who Optional description
18+
"""
19+
20+
import click
21+
import os
22+
from mpi4py import MPI
23+
import pyasdf
24+
from tqdm import tqdm
25+
from obspy import UTCDateTime, read_inventory, Inventory, Stream
26+
from collections import defaultdict
27+
from obspy.core.util.misc import get_window_times
28+
import gc
29+
from obspy.core.util.misc import limit_numpy_fft_cache
30+
31+
def split_list(lst, npartitions):
32+
k, m = divmod(len(lst), npartitions)
33+
return [lst[i * k + min(i, m):(i + 1) * k + min(i + 1, m)] for i in range(npartitions)]
34+
# end func
35+
36+
def getStationInventory(master_inventory, inventory_cache, netsta):
37+
netstaInv = None
38+
if (master_inventory):
39+
if (inventory_cache is None): inventory_cache = defaultdict(list)
40+
net, sta = netsta.split('.')
41+
42+
if (isinstance(inventory_cache[netsta], Inventory)):
43+
netstaInv = inventory_cache[netsta]
44+
else:
45+
inv = master_inventory.select(network=net, station=sta)
46+
if(len(inv.networks)):
47+
inventory_cache[netsta] = inv
48+
netstaInv = inv
49+
# end if
50+
# end if
51+
# end if
52+
53+
return netstaInv, inventory_cache
54+
# end func
55+
56+
def create_station_asdf(input_asdf, output_folder, resample_rate,
57+
instrument_response_inventory, instrument_response_output, water_level):
58+
# mpi attributes
59+
comm = MPI.COMM_WORLD
60+
nproc = comm.Get_size()
61+
rank = comm.Get_rank()
62+
63+
# input asdf file
64+
ids = pyasdf.ASDFDataSet(input_asdf, mode='r')
65+
66+
# get stations
67+
stations = ids.get_all_coordinates().keys()
68+
69+
# local work-load
70+
stations = split_list(stations, nproc)[rank]
71+
72+
# read inventory
73+
stationInvCache = None
74+
# read inventory
75+
inv = None
76+
try:
77+
inv = read_inventory(instrument_response_inventory)
78+
except Exception as e:
79+
print (e)
80+
raise RuntimeError('Failed to read inventory: %s' % (instrument_response_inventory))
81+
# end try
82+
83+
for s in stations:
84+
# output asdf file
85+
ofn = os.path.join(output_folder,
86+
os.path.splitext(os.path.basename(input_asdf))[0] + '.%s.h5'%(s))
87+
88+
if (os.path.exists(ofn)): os.remove(ofn)
89+
ods = pyasdf.ASDFDataSet(ofn, mode='w', mpi=False, compression='gzip-3')
90+
91+
sta = ids.waveforms[s]
92+
for tag in tqdm(sta.list(), desc='Rank %d, Station %s:'%(rank, s)):
93+
# get response object
94+
sinv, stationInvCache = getStationInventory(inv, stationInvCache, s)
95+
96+
st = sta[tag]
97+
dayst = Stream()
98+
for tr in st:
99+
start_time = tr.stats.starttime
100+
offset = (UTCDateTime(year=start_time.year, month=start_time.month,
101+
day=start_time.day) - start_time)
102+
for wtr in tr.slide(3600*24, 3600*24, offset=offset, include_partial_windows=True):
103+
wtr = wtr.copy()
104+
dayst += wtr
105+
# end for
106+
# end for
107+
gc.collect()
108+
109+
# remove response
110+
if(sinv):
111+
for tr in dayst:
112+
limit_numpy_fft_cache(max_size_in_mb_per_cache=10)
113+
try:
114+
tr.remove_response(sinv, output=instrument_response_output.upper(),
115+
water_level=water_level)
116+
except Exception as e:
117+
print (e)
118+
# end try
119+
gc.collect()
120+
# end for
121+
# end if
122+
123+
# detrend and taper
124+
taper_length = 20.0 # seconds
125+
for tr in dayst:
126+
if tr.stats.npts < 4 * taper_length * tr.stats.sampling_rate:
127+
dayst.remove(tr)
128+
else:
129+
tr.detrend(type="demean")
130+
tr.detrend(type="linear")
131+
tr.taper(max_percentage=None, max_length=1.0)
132+
# end if
133+
# end for
134+
gc.collect()
135+
136+
# apply low-pass filter and create day traces
137+
for tr in dayst:
138+
tr.filter('lowpass', freq=resample_rate * 0.5, corners=6, zerophase=True)
139+
tr.interpolate(resample_rate, method='weighted_average_slopes')
140+
# end for
141+
gc.collect()
142+
143+
# add traces
144+
for tr in dayst:
145+
try:
146+
ods.add_waveforms(tr, tag='raw_recording')
147+
except Exception as e:
148+
print (e)
149+
print (tr)
150+
# end try
151+
# end for
152+
#break
153+
# end for
154+
gc.collect()
155+
156+
ods.add_stationxml(ids.waveforms[s].StationXML)
157+
158+
print ('Closing asdf file..')
159+
del ods
160+
161+
#break
162+
# end for
163+
del ids
164+
# end func
165+
166+
CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])
167+
@click.command(context_settings=CONTEXT_SETTINGS)
168+
@click.argument('input-asdf', required=True,
169+
type=click.Path(exists=True))
170+
@click.argument('output-folder', required=True,
171+
type=click.Path(exists=True))
172+
@click.option('--resample-rate', default=10,
173+
help="Resample rate in Hz; default is 10 Hz")
174+
@click.option('--instrument-response-inventory', default=None,
175+
type=click.Path('r'),
176+
help="FDSNxml inventory containing instrument response information. Note that when this parameter is provided "
177+
", instrument response corrections are automatically applied for matching stations with response "
178+
"information.")
179+
@click.option('--instrument-response-output',
180+
type=click.Choice(['vel', 'disp']),
181+
default='vel', help="Output of instrument response correction; must be either 'vel' (default) for velocity"
182+
" or 'disp' for displacement. Note, this parameter has no effect if instrument response"
183+
" correction is not performed.")
184+
@click.option('--water-level', default=50., help="Water-level in dB to limit amplification during instrument response correction"
185+
"to a certain cut-off value. Note, this parameter has no effect if instrument"
186+
"response correction is not performed.")
187+
def process(input_asdf, output_folder, resample_rate, instrument_response_inventory,
188+
instrument_response_output, water_level):
189+
190+
create_station_asdf(input_asdf, output_folder, resample_rate, instrument_response_inventory,
191+
instrument_response_output, water_level)
192+
# end func
193+
194+
if (__name__ == '__main__'):
195+
process()

‎seismic/pick_harvester/pick.py

+16-4
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,7 @@ def outputConfigParameters():
389389
# Retrieve estimated workload
390390
# ==================================================
391391
taupyModel = TauPyModel(model='iasp91')
392-
fds = FederatedASDFDataSet(asdf_source, use_json_db=False, logger=None)
392+
fds = FederatedASDFDataSet(asdf_source, logger=None)
393393
workload = getWorkloadEstimate(fds, originTimestamps)
394394

395395
# ==================================================
@@ -445,9 +445,15 @@ def outputConfigParameters():
445445
st = fds.get_waveforms(codes[0], codes[1], codes[2], codes[3],
446446
curr,
447447
curr + step,
448-
automerge=True,
449448
trace_count_threshold=200)
450449

450+
try:
451+
st.merge(method=-1)
452+
except Exception as e:
453+
print (e)
454+
continue
455+
# end try
456+
451457
if (len(st) == 0): continue
452458
dropBogusTraces(st)
453459

@@ -521,14 +527,20 @@ def outputConfigParameters():
521527
stn = fds.get_waveforms(codesn[0], codesn[1], codesn[2], codesn[3],
522528
curr,
523529
curr + step,
524-
automerge=True,
525530
trace_count_threshold=200)
526531
ste = fds.get_waveforms(codese[0], codese[1], codese[2], codese[3],
527532
curr,
528533
curr + step,
529-
automerge=True,
530534
trace_count_threshold=200)
531535

536+
try:
537+
stn.merge(method=-1)
538+
ste.merge(method=-1)
539+
except Exception as e:
540+
print (e)
541+
continue
542+
# end try
543+
532544
dropBogusTraces(stn)
533545
dropBogusTraces(ste)
534546

‎seismic/xcorqc/correlator.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ def process(data_source1, data_source2, output_path,
132132
clip_to_2std=False, whitening=False, one_bit_normalize=False, read_buffer_size=10,
133133
ds1_zchan=None, ds1_nchan=None, ds1_echan=None,
134134
ds2_zchan=None, ds2_nchan=None, ds2_echan=None, corr_chan=None,
135-
envelope_normalize=False, ensemble_stack=False, restart=False):
135+
envelope_normalize=False, ensemble_stack=False, restart=False, no_tracking_tag=False):
136136
"""
137137
DATA_SOURCE1: Text file containing paths to ASDF files \n
138138
DATA_SOURCE2: Text file containing paths to ASDF files \n
@@ -157,11 +157,15 @@ def process(data_source1, data_source2, output_path,
157157
if (rank == 0):
158158
# Register time tag with high resolution, since queued jobs can readily
159159
# commence around the same time.
160-
time_tag = UTCDateTime.now().strftime("%y-%m-%d.T%H.%M.%S.%f")
160+
161+
if(no_tracking_tag):
162+
time_tag = None
163+
else:
164+
time_tag = UTCDateTime.now().strftime("%y-%m-%d.T%H.%M.%S.%f")
161165

162166
def outputConfigParameters():
163167
# output config parameters
164-
fn = 'correlator.%s.cfg' % (time_tag)
168+
fn = 'correlator.%s.cfg' % (time_tag) if time_tag else 'correlator.cfg'
165169
fn = os.path.join(output_path, fn)
166170

167171
f = open(fn, 'w+')
@@ -191,6 +195,7 @@ def outputConfigParameters():
191195
f.write('%25s\t\t\t: %s\n' % ('--whitening', whitening))
192196
f.write('%25s\t\t\t: %s\n' % ('--ensemble-stack', ensemble_stack))
193197
f.write('%25s\t\t\t: %s\n' % ('--restart', 'TRUE' if restart else 'FALSE'))
198+
f.write('%25s\t\t\t: %s\n' % ('--no-tracking-tag', 'TRUE' if no_tracking_tag else 'FALSE'))
194199

195200
f.close()
196201
# end func
@@ -354,11 +359,12 @@ def outputConfigParameters():
354359
"single CC function, aimed at producing empirical Greens "
355360
"functions for surface wave tomography.")
356361
@click.option('--restart', default=False, is_flag=True, help='Restart job')
362+
@click.option('--no-tracking-tag', default=False, is_flag=True, help='Do not tag output file names with a time-tag')
357363
def main(data_source1, data_source2, output_path, interval_seconds, window_seconds, resample_rate,
358364
nearest_neighbours, fmin, fmax, station_names1, station_names2, start_time,
359365
end_time, instrument_response_inventory, instrument_response_output, water_level, clip_to_2std,
360366
whitening, one_bit_normalize, read_buffer_size, ds1_zchan, ds1_nchan, ds1_echan, ds2_zchan,
361-
ds2_nchan, ds2_echan, corr_chan, envelope_normalize, ensemble_stack, restart):
367+
ds2_nchan, ds2_echan, corr_chan, envelope_normalize, ensemble_stack, restart, no_tracking_tag):
362368
"""
363369
DATA_SOURCE1: Path to ASDF file \n
364370
DATA_SOURCE2: Path to ASDF file \n
@@ -376,7 +382,7 @@ def main(data_source1, data_source2, output_path, interval_seconds, window_secon
376382
nearest_neighbours, fmin, fmax, station_names1, station_names2, start_time,
377383
end_time, instrument_response_inventory, instrument_response_output, water_level, clip_to_2std,
378384
whitening, one_bit_normalize, read_buffer_size, ds1_zchan, ds1_nchan, ds1_echan, ds2_zchan,
379-
ds2_nchan, ds2_echan, corr_chan, envelope_normalize, ensemble_stack, restart)
385+
ds2_nchan, ds2_echan, corr_chan, envelope_normalize, ensemble_stack, restart, no_tracking_tag)
380386
# end func
381387

382388
if __name__ == '__main__':

‎seismic/xcorqc/xcorqc.py

+36-16
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
from obspy.signal.cross_correlation import xcorr
3838
from obspy.signal.detrend import simple, spline
3939
from obspy.signal.filter import bandpass
40+
from obspy.geodetics.base import gps2dist_azimuth
4041
from scipy import signal
4142

4243
from seismic.xcorqc.fft import *
@@ -282,12 +283,6 @@ def xcorr2(tr1, tr2, sta1_inv=None, sta2_inv=None,
282283
tr2_d[clip_indices_tr2] = 2 * std_tr2 * np.sign(tr2_d[clip_indices_tr2])
283284
# end if
284285

285-
# spectral whitening
286-
if(whitening):
287-
tr1_d = whiten(tr1_d, sr1)
288-
tr2_d = whiten(tr2_d, sr2)
289-
# end if
290-
291286
# 1-bit normalization
292287
if(one_bit_normalize):
293288
tr1_d = np.sign(tr1_d)
@@ -305,6 +300,12 @@ def xcorr2(tr1, tr2, sta1_inv=None, sta2_inv=None,
305300
tr2_d /= np.std(tr2_d)
306301
# end if
307302

303+
# spectral whitening
304+
if(whitening):
305+
tr1_d = whiten(tr1_d, sr1)
306+
tr2_d = whiten(tr2_d, sr2)
307+
# end if
308+
308309
if (sr1 < sr2):
309310
fftlen2 = fftlen
310311
fftlen1 = int((fftlen2 * 1.0 * sr1) / sr)
@@ -494,11 +495,6 @@ def IntervalStackXCorr(refds, tempds,
494495
if(resample_rate < 2*fhi):
495496
raise RuntimeError('Resample-rate should be >= 2*fmax')
496497

497-
if(whitening and (ref_sta_inv or temp_sta_inv)):
498-
raise RuntimeError('Mutually exclusive parameterization: specify either spectral whitening or '
499-
'instrument response removal')
500-
# end if
501-
502498
if(clip_to_2std and one_bit_normalize):
503499
raise RuntimeError('Mutually exclusive parameterization: clip_to_2std and one-bit-normalizations'
504500
'together is redundant')
@@ -695,19 +691,19 @@ def IntervalStackXCorr(refds, tempds,
695691
lag = root_grp.createVariable('lag', 'f4', ('lag',))
696692

697693
# Add metadata
698-
sr = root_grp.createVariable('SampleRate', 'f4')
699694
lon1 = root_grp.createVariable('Lon1', 'f4')
700695
lat1 = root_grp.createVariable('Lat1', 'f4')
701696
lon2 = root_grp.createVariable('Lon2', 'f4')
702697
lat2 = root_grp.createVariable('Lat2', 'f4')
703-
xcorrchan = root_grp.createVariable('CorrChannels', 'S1', ('nchar'))
698+
distance = root_grp.createVariable('Distance', 'f4')
704699

705-
sr[:] = resample_rate
706700
lon1[:] = refds.unique_coordinates[ref_net_sta][0] if len(refds.unique_coordinates[ref_net_sta]) else -999
707701
lat1[:] = refds.unique_coordinates[ref_net_sta][1] if len(refds.unique_coordinates[ref_net_sta]) else -999
708702
lon2[:] = tempds.unique_coordinates[temp_net_sta][0] if len(tempds.unique_coordinates[temp_net_sta]) else -999
709703
lat2[:] = tempds.unique_coordinates[temp_net_sta][1] if len(tempds.unique_coordinates[temp_net_sta]) else -999
710-
xcorrchan[:] = stringtochar(np.array(['%s.%s'%(ref_cha, temp_cha)], 'S10'))
704+
if( np.min([v != -999 for v in [lon1[:], lat1[:], lon2[:], lat2[:]]]) ):
705+
distance[:], _, _ = gps2dist_azimuth(lat1[:], lon1[:], lat2[:], lon2[:])
706+
# end if
711707

712708
# Add data
713709
if(ensemble_stack):
@@ -747,9 +743,33 @@ def IntervalStackXCorr(refds, tempds,
747743

748744
lag[:] = x
749745

746+
# Add and populate a new group for parameters used
747+
pg = root_grp.createGroup('Parameters')
748+
749+
params = {'corr_chans': '%s.%s'%(ref_cha, temp_cha),
750+
'instr_corr_applied_1': 1 if ref_sta_inv else 0,
751+
'instr_corr_applied_2': 1 if temp_sta_inv else 0,
752+
'instr_corr_output': instrument_response_output,
753+
'instr_corr_water_level_db': water_level,
754+
'resample_rate': resample_rate if resample_rate else -999,
755+
'buffer_seconds': buffer_seconds,
756+
'interval_seconds': interval_seconds,
757+
'window_seconds': window_seconds,
758+
'bandpass_fmin': flo if flo else -999,
759+
'bandpass_fmax': fhi if fhi else -999,
760+
'clip_to_2std': int(clip_to_2std),
761+
'one_bit_normalize': int(one_bit_normalize),
762+
'zero_mean_1std_normalize': int(clip_to_2std==False and one_bit_normalize == False),
763+
'spectral_whitening': int(whitening),
764+
'envelope_normalize': int(envelope_normalize),
765+
'ensemble_stack': int(ensemble_stack)}
766+
767+
for k, v in params.items():
768+
setattr(pg, k, v)
769+
# end for
770+
750771
root_grp.close()
751772
# end for
752773

753774
return x, xcorrResultsDict, windowCountResultsDict
754775
# end func
755-
Binary file not shown.

‎tests/seismic/xcorqc/test_interval_stack_xcorr.py

-1
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,6 @@ def test_interval_stack_xcorr_(tmpdir, cha, inv1, inv2, interval_seconds, window
108108

109109
# skipping inconsistent parameterizations
110110
if (one_bit_normalize and clip_to_2std): return
111-
if (whitening and (inv1 or inv2)): return
112111

113112
output_folder = str(tmpdir.mkdir('output'))
114113

0 commit comments

Comments
 (0)
Please sign in to comment.