Fixing paths to modules, pep8 compliance, adding simple test mode

GeoscienceAustralia · Feb 14, 2019 · cb59446 · cb59446
1 parent ccec142
commit cb59446
Show file tree

Hide file tree

Showing 4 changed files with 195 additions and 163 deletions.
diff --git a/.gitignore b/.gitignore
@@ -73,3 +73,4 @@ seismic/inventory/callgrind.*
 seismic/inventory/fdsn_stn_inv_dump.txt
 seismic/inventory/vitalstats*.txt
 **~
+seismic/xcorqc/*refdata.h5
diff --git a/seismic/xcorqc/7G_example.py b/seismic/xcorqc/7G_example.py
@@ -1,7 +1,7 @@
 from obspy import UTCDateTime
 import pyasdf
 
-from scripts.ASDFdatabase import SeisDB
+from seismic.ASDFdatabase.seisdb import SeisDB
 from xcorqc import IntervalStackXCorr
 
 # =========================== User Input Required =========================== #

diff --git a/seismic/xcorqc/correlator.py b/seismic/xcorqc/correlator.py
@@ -12,23 +12,29 @@
     LastUpdate:     dd/mm/yyyy  Who     Optional description
 """
 
-from mpi4py import MPI
-import glob, os
+import os
+import glob
 from collections import defaultdict
-
 from math import sqrt
+
 import numpy as np
 from scipy.spatial import cKDTree
 
 from obspy import UTCDateTime
 import pyasdf
 
+# For execution under Windows, see 
+from mpi4py import MPI
+
 import click
 
 from seismic.ASDFdatabase.seisds import SeisDB
 from seismic.xcorqc.xcorqc import IntervalStackXCorr
 
 
+TEST_MODE = False
+
+
 # define utility functions
 def rtp2xyz(r, theta, phi):
     xout = np.zeros((r.shape[0], 3))
@@ -50,6 +56,7 @@ def xyz2rtp(x, y, z):
     return rout
 # end func
 
+
 class Dataset:
     def __init__(self, asdf_file_name, station_names='*', ignore_json_db=False, zchan=None, nchan=None, echan=None):
 
@@ -75,7 +82,7 @@ def __init__(self, asdf_file_name, station_names='*', ignore_json_db=False, zcha
                         self.has_jason_db = True
                     except:
                         raise RuntimeError('Failed to load json file:%s' % (f))
-                    #end try
+                    # end try
                     break
                 # end if
             # end for
@@ -94,7 +101,8 @@ def __init__(self, asdf_file_name, station_names='*', ignore_json_db=False, zcha
             sn = station._station_name.split('.')[1]
 
             if(station_subset != '*'):
-                if sn not in station_subset: continue
+                if sn not in station_subset:
+                    continue
 
             self.stations.append(sn)
             self.stations_metadata[sn] = station
@@ -106,9 +114,12 @@ def __init__(self, asdf_file_name, station_names='*', ignore_json_db=False, zcha
             for nw in station.StationXML:
                 for st in nw:
                     for ch in st:
-                        if 'Z' in ch.code or 'z' in ch.code: zchannels.add(ch.code)
-                        if 'N' in ch.code or 'n' in ch.code: nchannels.add(ch.code)
-                        if 'E' in ch.code or 'e' in ch.code: echannels.add(ch.code)
+                        if 'Z' in ch.code or 'z' in ch.code:
+                            zchannels.add(ch.code)
+                        if 'N' in ch.code or 'n' in ch.code:
+                            nchannels.add(ch.code)
+                        if 'E' in ch.code or 'e' in ch.code:
+                            echannels.add(ch.code)
                     # end for
                 # end for
             # end for
@@ -124,39 +135,50 @@ def __init__(self, asdf_file_name, station_names='*', ignore_json_db=False, zcha
         # end for
 
         if len(zchannels):
-            if(zchan is None): assert len(zchannels) == 1, 'Multiple z-channels found %s' % (zchannels)
-            else: zchannels = set([zchan]) if zchan in zchannels else None
-            if(zchannels is None): assert 0, 'Invalid z-channel name'
+            if(zchan is None):
+                assert len(zchannels) == 1, 'Multiple z-channels found %s' % (zchannels)
+            else:
+                zchannels = set([zchan]) if zchan in zchannels else None
+            if(zchannels is None):
+                assert 0, 'Invalid z-channel name'
 
         if len(nchannels):
-            if(nchan is None): assert len(nchannels) == 1, 'Multiple n-channels found %s' % (nchannels)
-            else: nchannels = set([nchan]) if nchan in nchannels else None
-            if(nchannels is None): assert 0, 'Invalid n-channel name'
+            if(nchan is None):
+                assert len(nchannels) == 1, 'Multiple n-channels found %s' % (nchannels)
+            else:
+                nchannels = set([nchan]) if nchan in nchannels else None
+            if(nchannels is None):
+                assert 0, 'Invalid n-channel name'
 
         if len(echannels):
-            if(echan is None): assert len(echannels) == 1, 'Multiple e-channels found %s' % (echannels)
-            else: echannels = set([echan]) if echan in echannels else None
-            if(echannels is None): assert 0, 'Invalid e-channel name'
+            if(echan is None):
+                assert len(echannels) == 1, 'Multiple e-channels found %s' % (echannels)
+            else:
+                echannels = set([echan]) if echan in echannels else None
+            if(echannels is None):
+                assert 0, 'Invalid e-channel name'
 
         self.zchannel = zchannels.pop() if len(zchannels) else None
         self.nchannel = nchannels.pop() if len(nchannels) else None
         self.echannel = echannels.pop() if len(echannels) else None
     # end func
 
     def get_closest_stations(self, station_name, other_dataset, nn=1):
-        assert isinstance(station_name, str) or isinstance(station_name, unicode), 'station_name must be a string'
+        assert isinstance(station_name, basestring), 'station_name must be a string'
         assert isinstance(other_dataset, Dataset), 'other_dataset must be an instance of Dataset'
         station_name = station_name.upper()
 
         assert station_name in self.stations, 'station %s not found'%(station_name)
 
         d, l = other_dataset._tree.query(self._cart_location[station_name], nn)
 
-        if(type(l) == int): l = [l]
+        if isinstance(l, int):
+            l = [l]
 
         l = l[l<len(other_dataset.stations)]
 
-        if(type(l) == int): l = [l]
+        if isinstance(l, int):
+            l = [l]
 
         assert len(l), 'No stations found..'
 
@@ -177,14 +199,13 @@ def get_closest_stations(self, station_name, other_dataset, nn=1):
                 type=int)
 @click.argument('window-seconds', required=True,
                 type=int)
-
 @click.option('--ds1-dec-factor', default=1, help="Decimation factor for data-source-1")
 @click.option('--ds2-dec-factor', default=1, help="Decimation factor for data-source-2")
 @click.option('--nearest-neighbours', default=-1, help="Number of nearest neighbouring stations in data-source-2"
-                                                      " to correlate against a given station in data-source-1. If"
-                                                      " set to -1, correlations for a cross-product of all stations"
-                                                      " in both data-sets are produced -- note, this is computationally"
-                                                      " expensive.")
+                                                       " to correlate against a given station in data-source-1. If"
+                                                       " set to -1, correlations for a cross-product of all stations"
+                                                       " in both data-sets are produced -- note, this is computationally"
+                                                       " expensive.")
 @click.option('--fmin', default=0.3, help="Lowest frequency for bandpass filter")
 @click.option('--fmax', default=1., help="Highest frequency for bandpass filter")
 @click.option('--station-names1', default='*', type=str,
@@ -233,7 +254,6 @@ def get_closest_stations(self, station_name, other_dataset, nn=1):
 def process(data_source1, data_source2, output_path, interval_seconds, window_seconds, ds1_dec_factor,
             ds2_dec_factor, nearest_neighbours, fmin, fmax, station_names1, station_names2, start_time,
             end_time, clip_to_2std, one_bit_normalize, read_buffer_size,
-
             ds1_zchan, ds1_nchan, ds1_echan, ds2_zchan, ds2_nchan, ds2_echan,
             ignore_ds1_json_db, ignore_ds2_json_db):
     """
@@ -308,9 +328,10 @@ def outputConfigParameters():
     for st1 in proc_stations[rank]:
         st2list = None
         if(nearest_neighbours != -1):
-            if (data_source1==data_source2):
-                st2list = set(ds1.get_closest_stations(st1, ds2, nn=nearest_neighbours+1))
-                if (st1 in  st2list): st2list.remove(st1)
+            if data_source1 == data_source2:
+                st2list = set(ds1.get_closest_stations(st1, ds2, nn=nearest_neighbours + 1))
+                if st1 in st2list:
+                    st2list.remove(st1)
                 st2list = list(st2list)
             else:
                 st2list = ds1.get_closest_stations(st1, ds2, nn=nearest_neighbours)
@@ -330,7 +351,16 @@ def outputConfigParameters():
     # end for
 # end func
 
-if (__name__ == '__main__'):
-    process()
-# end if
-
+if __name__ == '__main__':
+    if TEST_MODE:
+        process("7G.refdata.h5", "7G.refdata.h5", "7G_test", 3600 * 24, 3600,
+                nearest_neighbours=5,
+                start_time="2013-01-01T00:00:00", end_time="2016-01-01T00:00:00",
+                read_buffer_size=1,
+                ds1_dec_factor=1, ds2_dec_factor=1,
+                fmin=0.01, fmax=10.0,
+                clip_to_2std=True,
+                one_bit_normalize=True
+                )
+    else:
+        process()