1
+ #!/usr/env python
2
+ """
3
+ Description:
4
+ Reads waveforms from an ASDF file, optionally applies instrument response correction,
5
+ resamples and outputs them to another ASDF file. This preprocessing is crucial for
6
+ large-scale studies involving > 10000 Green's Functions, e.g. in ambient noise
7
+ tomography. This approach significantly reduces IO bottlenecks and computational
8
+ costs associated with having to apply instrument response corrections on data from
9
+ a given station in alternative workflows.
10
+ References:
11
+
12
+ CreationDate: 18/07/19
13
+ Developer: rakib.hassan@ga.gov.au
14
+
15
+ Revision History:
16
+ LastUpdate: 18/07/19 RH
17
+ LastUpdate: dd/mm/yyyy Who Optional description
18
+ """
19
+
20
+ import click
21
+ import os
22
+ from mpi4py import MPI
23
+ import pyasdf
24
+ from tqdm import tqdm
25
+ from obspy import UTCDateTime , read_inventory , Inventory , Stream
26
+ from collections import defaultdict
27
+ from obspy .core .util .misc import get_window_times
28
+ import gc
29
+ from obspy .core .util .misc import limit_numpy_fft_cache
30
+
31
+ def split_list (lst , npartitions ):
32
+ k , m = divmod (len (lst ), npartitions )
33
+ return [lst [i * k + min (i , m ):(i + 1 ) * k + min (i + 1 , m )] for i in range (npartitions )]
34
+ # end func
35
+
36
+ def getStationInventory (master_inventory , inventory_cache , netsta ):
37
+ netstaInv = None
38
+ if (master_inventory ):
39
+ if (inventory_cache is None ): inventory_cache = defaultdict (list )
40
+ net , sta = netsta .split ('.' )
41
+
42
+ if (isinstance (inventory_cache [netsta ], Inventory )):
43
+ netstaInv = inventory_cache [netsta ]
44
+ else :
45
+ inv = master_inventory .select (network = net , station = sta )
46
+ if (len (inv .networks )):
47
+ inventory_cache [netsta ] = inv
48
+ netstaInv = inv
49
+ # end if
50
+ # end if
51
+ # end if
52
+
53
+ return netstaInv , inventory_cache
54
+ # end func
55
+
56
+ def create_station_asdf (input_asdf , output_folder , resample_rate ,
57
+ instrument_response_inventory , instrument_response_output , water_level ):
58
+ # mpi attributes
59
+ comm = MPI .COMM_WORLD
60
+ nproc = comm .Get_size ()
61
+ rank = comm .Get_rank ()
62
+
63
+ # input asdf file
64
+ ids = pyasdf .ASDFDataSet (input_asdf , mode = 'r' )
65
+
66
+ # get stations
67
+ stations = ids .get_all_coordinates ().keys ()
68
+
69
+ # local work-load
70
+ stations = split_list (stations , nproc )[rank ]
71
+
72
+ # read inventory
73
+ stationInvCache = None
74
+ # read inventory
75
+ inv = None
76
+ try :
77
+ inv = read_inventory (instrument_response_inventory )
78
+ except Exception as e :
79
+ print (e )
80
+ raise RuntimeError ('Failed to read inventory: %s' % (instrument_response_inventory ))
81
+ # end try
82
+
83
+ for s in stations :
84
+ # output asdf file
85
+ ofn = os .path .join (output_folder ,
86
+ os .path .splitext (os .path .basename (input_asdf ))[0 ] + '.%s.h5' % (s ))
87
+
88
+ if (os .path .exists (ofn )): os .remove (ofn )
89
+ ods = pyasdf .ASDFDataSet (ofn , mode = 'w' , mpi = False , compression = 'gzip-3' )
90
+
91
+ sta = ids .waveforms [s ]
92
+ for tag in tqdm (sta .list (), desc = 'Rank %d, Station %s:' % (rank , s )):
93
+ # get response object
94
+ sinv , stationInvCache = getStationInventory (inv , stationInvCache , s )
95
+
96
+ st = sta [tag ]
97
+ dayst = Stream ()
98
+ for tr in st :
99
+ start_time = tr .stats .starttime
100
+ offset = (UTCDateTime (year = start_time .year , month = start_time .month ,
101
+ day = start_time .day ) - start_time )
102
+ for wtr in tr .slide (3600 * 24 , 3600 * 24 , offset = offset , include_partial_windows = True ):
103
+ wtr = wtr .copy ()
104
+ dayst += wtr
105
+ # end for
106
+ # end for
107
+ gc .collect ()
108
+
109
+ # remove response
110
+ if (sinv ):
111
+ for tr in dayst :
112
+ limit_numpy_fft_cache (max_size_in_mb_per_cache = 10 )
113
+ try :
114
+ tr .remove_response (sinv , output = instrument_response_output .upper (),
115
+ water_level = water_level )
116
+ except Exception as e :
117
+ print (e )
118
+ # end try
119
+ gc .collect ()
120
+ # end for
121
+ # end if
122
+
123
+ # detrend and taper
124
+ taper_length = 20.0 # seconds
125
+ for tr in dayst :
126
+ if tr .stats .npts < 4 * taper_length * tr .stats .sampling_rate :
127
+ dayst .remove (tr )
128
+ else :
129
+ tr .detrend (type = "demean" )
130
+ tr .detrend (type = "linear" )
131
+ tr .taper (max_percentage = None , max_length = 1.0 )
132
+ # end if
133
+ # end for
134
+ gc .collect ()
135
+
136
+ # apply low-pass filter and create day traces
137
+ for tr in dayst :
138
+ tr .filter ('lowpass' , freq = resample_rate * 0.5 , corners = 6 , zerophase = True )
139
+ tr .interpolate (resample_rate , method = 'weighted_average_slopes' )
140
+ # end for
141
+ gc .collect ()
142
+
143
+ # add traces
144
+ for tr in dayst :
145
+ try :
146
+ ods .add_waveforms (tr , tag = 'raw_recording' )
147
+ except Exception as e :
148
+ print (e )
149
+ print (tr )
150
+ # end try
151
+ # end for
152
+ #break
153
+ # end for
154
+ gc .collect ()
155
+
156
+ ods .add_stationxml (ids .waveforms [s ].StationXML )
157
+
158
+ print ('Closing asdf file..' )
159
+ del ods
160
+
161
+ #break
162
+ # end for
163
+ del ids
164
+ # end func
165
+
166
+ CONTEXT_SETTINGS = dict (help_option_names = ['-h' , '--help' ])
167
+ @click .command (context_settings = CONTEXT_SETTINGS )
168
+ @click .argument ('input-asdf' , required = True ,
169
+ type = click .Path (exists = True ))
170
+ @click .argument ('output-folder' , required = True ,
171
+ type = click .Path (exists = True ))
172
+ @click .option ('--resample-rate' , default = 10 ,
173
+ help = "Resample rate in Hz; default is 10 Hz" )
174
+ @click .option ('--instrument-response-inventory' , default = None ,
175
+ type = click .Path ('r' ),
176
+ help = "FDSNxml inventory containing instrument response information. Note that when this parameter is provided "
177
+ ", instrument response corrections are automatically applied for matching stations with response "
178
+ "information." )
179
+ @click .option ('--instrument-response-output' ,
180
+ type = click .Choice (['vel' , 'disp' ]),
181
+ default = 'vel' , help = "Output of instrument response correction; must be either 'vel' (default) for velocity"
182
+ " or 'disp' for displacement. Note, this parameter has no effect if instrument response"
183
+ " correction is not performed." )
184
+ @click .option ('--water-level' , default = 50. , help = "Water-level in dB to limit amplification during instrument response correction"
185
+ "to a certain cut-off value. Note, this parameter has no effect if instrument"
186
+ "response correction is not performed." )
187
+ def process (input_asdf , output_folder , resample_rate , instrument_response_inventory ,
188
+ instrument_response_output , water_level ):
189
+
190
+ create_station_asdf (input_asdf , output_folder , resample_rate , instrument_response_inventory ,
191
+ instrument_response_output , water_level )
192
+ # end func
193
+
194
+ if (__name__ == '__main__' ):
195
+ process ()
0 commit comments