Merge pull request #211 from HERA-Team/pspec_uvh5_input

Allow pspec pipeline to read other file types
HERA-Team · Jul 2, 2019 · 4bb8d6e · 4bb8d6e
2 parents 0d97f64 + d5307e5
commit 4bb8d6e
Show file tree

Hide file tree

Showing 6 changed files with 103 additions and 28 deletions.
diff --git a/hera_pspec/pspecdata.py b/hera_pspec/pspecdata.py
@@ -2742,15 +2742,16 @@ def pspec_run(dsets, filename, dsets_std=None, groupname=None,
               beam=None, cosmo=None, rephase_to_dset=None, 
               trim_dset_lsts=False, broadcast_dset_flags=True,
               time_thresh=0.2, Jy2mK=False, overwrite=True, 
-              verbose=True, store_cov=False, history=''):
+              file_type='miriad', verbose=True, store_cov=False, 
+              history=''):
     """
     Create a PSpecData object, run OQE delay spectrum estimation and write
     results to a PSpecContainer object.
 
     Parameters
     ----------
     dsets : list
-        Contains UVData objects or string filepaths to miriad files
+        Contains UVData objects or string filepaths to UVData-compatible files
 
     filename : str
         Output filepath for HDF5 PSpecContainer object
@@ -2880,6 +2881,10 @@ def pspec_run(dsets, filename, dsets_std=None, groupname=None,
 
     overwrite : boolean
         If True, overwrite outputs if they exist on disk.
+    
+    file_type : str, optional
+        If dsets passed as a list of filenames, specify which file format 
+        the files use. Default: 'miriad'.
 
     verbose : boolean
         If True, report feedback to standard output.
@@ -2941,7 +2946,7 @@ def pspec_run(dsets, filename, dsets_std=None, groupname=None,
         try:
             # load data into UVData objects if fed as list of strings
             t0 = time.time()
-            dsets = _load_dsets(dsets, bls=bls, pols=pols, verbose=verbose)
+            dsets = _load_dsets(dsets, bls=bls, pols=pols, file_type=file_type, verbose=verbose)
             utils.log("Loaded data in %1.1f sec." % (time.time() - t0),
                       lvl=1, verbose=verbose)
         except ValueError:
@@ -3204,9 +3209,10 @@ def raise_warning(warning, verbose=True):
         print(warning)
 
 
-def _load_dsets(fnames, bls=None, pols=None, logf=None, verbose=True):
+def _load_dsets(fnames, bls=None, pols=None, logf=None, verbose=True, 
+                file_type='miriad'):
     """
-    Helper function for loading Miriad datasets in pspec_run.
+    Helper function for loading UVData-compatible datasets in pspec_run.
     """
     dsets = []
     Ndsets = len(fnames)
@@ -3216,6 +3222,7 @@ def _load_dsets(fnames, bls=None, pols=None, logf=None, verbose=True):
 
         # read data
         uvd = UVData()
-        uvd.read_miriad(glob.glob(dset), bls=bls, polarizations=pols)
+        uvd.read(glob.glob(dset), bls=bls, polarizations=pols, 
+                 file_type=file_type)
         dsets.append(uvd)
     return dsets
diff --git a/hera_pspec/tests/test_pspecdata.py b/hera_pspec/tests/test_pspecdata.py
@@ -1445,12 +1445,14 @@ def test_validate_blpairs(self):
         pspecdata.validate_blpairs(blpairs, uvd, uvd)
 
 def test_pspec_run():
-    fnames = [os.path.join(DATA_PATH, d) for d in ['zen.even.xx.LST.1.28828.uvOCRSA',
-                                                   'zen.odd.xx.LST.1.28828.uvOCRSA']]
+    fnames = [os.path.join(DATA_PATH, d) 
+              for d in ['zen.even.xx.LST.1.28828.uvOCRSA',
+                        'zen.odd.xx.LST.1.28828.uvOCRSA']]
 
     beamfile = os.path.join(DATA_PATH, "HERA_NF_dipole_power.beamfits")
-    fnames_std=[os.path.join(DATA_PATH,d) for d in ['zen.even.std.xx.LST.1.28828.uvOCRSA',
-                                                    'zen.odd.std.xx.LST.1.28828.uvOCRSA']]
+    fnames_std = [os.path.join(DATA_PATH,d) 
+                  for d in ['zen.even.std.xx.LST.1.28828.uvOCRSA',
+                            'zen.odd.std.xx.LST.1.28828.uvOCRSA']]
     # test basic execution
     if os.path.exists("./out.hdf5"):
         os.remove("./out.hdf5")

diff --git a/hera_pspec/tests/test_utils.py b/hera_pspec/tests/test_utils.py
@@ -234,6 +234,10 @@ def test_get_reds(self):
         nt.assert_almost_equal(l[0], 0)
         nt.assert_almost_equal(a[0], 0)
         nt.assert_true(len(r), 105)
+
+        # Check errors when wrong types input
+        nt.assert_raises(TypeError, utils.get_reds, [1., 2.])
+
 
     def test_config_pspec_blpairs(self):
         # test basic execution
@@ -251,6 +255,14 @@ def test_config_pspec_blpairs(self):
         # test xants
         groupings = utils.config_pspec_blpairs(uv_template, [('xx', 'xx')], [('even', 'odd')], xants=[0, 1, 2], verbose=False, exclude_auto_bls=True)
         nt.assert_equal(len(list(groupings.values())[0]), 9735)
+
+        # test exclude_patterns
+        groupings = utils.config_pspec_blpairs(uv_template, 
+                                               [('xx', 'xx'), ('yy', 'yy')], 
+                                               [('even', 'odd'), ('even', 'odd')], 
+                                               exclude_patterns=['1.288'],
+                                               verbose=False, exclude_auto_bls=True)
+        nt.assert_equal(len(groupings), 0)
 
         # test exceptions
         nt.assert_raises(AssertionError, utils.config_pspec_blpairs, uv_template, [('xx', 'xx'), ('xx', 'xx')], [('even', 'odd')], verbose=False)

diff --git a/hera_pspec/tests/test_uvpspec_utils.py b/hera_pspec/tests/test_uvpspec_utils.py
@@ -151,6 +151,9 @@ def test_get_red_blpairs():
     nt.assert_equal(len(blps), len(lens)) # Should be one length for each group
     nt.assert_equal(len(blps), len(angs)) # Ditto, for angles
 
+    # Check output type
+    nt.assert_equal(isinstance(blps[0][0], (np.int, int)), True)
+
     # Check that number of grouped blps = total no. of blps
     num_blps = 0
     for grp in blps:

diff --git a/hera_pspec/utils.py b/hera_pspec/utils.py
@@ -543,14 +543,15 @@ def replace(d):
                 # 'None' and '' turn into None
                 if d[k] == 'None': d[k] = None
                 # list of lists turn into lists of tuples
-                if isinstance(d[k], list) and np.all([isinstance(i, list) for i in d[k]]):
+                if isinstance(d[k], list) \
+                and np.all([isinstance(i, list) for i in d[k]]):
                     d[k] = [tuple(i) for i in d[k]]
                 elif isinstance(d[k], (dict, odict)): replace(d[k])
 
     # Open and read config file
     with open(config_file, 'r') as cfile:
         try:
-            cfg = yaml.load(cfile)
+            cfg = yaml.load(cfile, Loader=yaml.FullLoader)
         except yaml.YAMLError as exc:
             raise(exc)
 
@@ -569,7 +570,8 @@ def flatten(nested_list):
 
 def config_pspec_blpairs(uv_templates, pol_pairs, group_pairs, exclude_auto_bls=False,
                          exclude_permutations=True, bl_len_range=(0, 1e10),
-                         bl_deg_range=(0, 180), xants=None, verbose=True):
+                         bl_deg_range=(0, 180), xants=None, exclude_patterns=None, 
+                         file_type='miriad', verbose=True):
     """
     Given a list of miriad file templates and selections for
     polarization and group labels, construct a master list of
@@ -614,11 +616,20 @@ def config_pspec_blpairs(uv_templates, pol_pairs, group_pairs, exclude_auto_bls=
         A len-2 integer tuple specifying the range of baseline angles
         (degrees in ENU frame) to consider.
 
-    xants : list
-        A list of integer antenna numbers to exclude.
-
-    verbose : bool
-        If True, print feedback to stdout.
+    xants : list, optional
+        A list of integer antenna numbers to exclude. Default: None.
+    
+    exclude_patterns : list, optional
+        A list of patterns to exclude if found in the final list of input 
+        files (after the templates have been filled-in). This currently 
+        just takes a list of strings, and does not recognize wildcards. 
+        Default: None.
+        
+    file_type : str, optional
+        File type of the input files. Default: 'miriad'.
+    
+    verbose : bool, optional
+        If True, print feedback to stdout. Default: True.
 
     Returns
     -------
@@ -660,11 +671,40 @@ def config_pspec_blpairs(uv_templates, pol_pairs, group_pairs, exclude_auto_bls=
                     if _unique_file not in unique_files:
                         unique_files.append(_unique_file)
     unique_files = sorted(unique_files)
-
+
+    # Exclude user-specified patterns
+    if exclude_patterns is not None:
+        to_exclude = []
+
+        # Loop over files and patterns
+        for f in unique_files:
+            for pattern in exclude_patterns:
+
+                # Add to list of files to be excluded
+                if pattern in f:
+                    if verbose:
+                        print("File matches pattern '%s' and will be excluded: %s" \
+                              % (pattern, f))
+                    to_exclude.append(f)
+                    continue
+
+        # Exclude files that matched a pattern
+        for f in to_exclude:
+            try:
+                unique_files.remove(f)
+            except:
+                pass
+
+        # Test for empty list and fail if found
+        if len(unique_files) == 0:
+            if verbose:
+                print("config_pspec_blpairs: All files were filtered out!")
+            return []
+
     # use a single file from unique_files and a single pol-group combination to get antenna positions
     _file = unique_files[0].format(pol=pol_grps[0][0], group=pol_grps[0][1])
     uvd = UVData()
-    uvd.read_miriad(_file, read_data=False)
+    uvd.read(_file, read_data=False, file_type=file_type)
 
     # get baseline pairs
     (_bls1, _bls2, _, _,
@@ -925,7 +965,8 @@ def get_bl_lens_angs(blvecs, bl_error_tol=1.0):
 
 
 def get_reds(uvd, bl_error_tol=1.0, pick_data_ants=False, bl_len_range=(0, 1e4),
-             bl_deg_range=(0, 180), xants=None, add_autos=False):
+             bl_deg_range=(0, 180), xants=None, add_autos=False, 
+             file_type='miriad'):
     """
     Given a UVData object, a Miriad filepath or antenna position dictionary,
     calculate redundant baseline groups using hera_cal.redcal and optionally
@@ -934,8 +975,11 @@ def get_reds(uvd, bl_error_tol=1.0, pick_data_ants=False, bl_len_range=(0, 1e4),
     Parameters
     ----------
     uvd : UVData object or str or dictionary
-        UVData object or Miriad filepath string or antenna position dictionary.
+        UVData object or filepath string or antenna position dictionary.
         An antpos dict is formed via dict(zip(ants, ant_vecs)).
+        
+        N.B. If uvd is a filepath, use the `file_type` kwarg to specify the 
+        file type.
 
     bl_error_tol : float
         Redundancy tolerance in meters
@@ -955,6 +999,9 @@ def get_reds(uvd, bl_error_tol=1.0, pick_data_ants=False, bl_len_range=(0, 1e4),
 
     add_autos : bool
         If True, add into autocorrelation group to the redundant group list.
+    
+    file_type : str, optional
+        File type of the input files. Default: 'miriad'.
 
     Returns (reds, lens, angs)
     -------
@@ -972,16 +1019,18 @@ def get_reds(uvd, bl_error_tol=1.0, pick_data_ants=False, bl_len_range=(0, 1e4),
         # load filepath
         if isinstance(uvd, (str, np.str)):
             _uvd = UVData()
-            _uvd.read_miriad(uvd, read_data=False)
+            _uvd.read(uvd, read_data=False, file_type=file_type)
             uvd = _uvd
         # get antenna position dictionary
         antpos, ants = uvd.get_ENU_antpos(pick_data_ants=pick_data_ants)
         antpos_dict = dict(list(zip(ants, antpos)))
-
-    # use antenna position dictionary
     elif isinstance(uvd, (dict, odict)):
+        # use antenna position dictionary
         antpos_dict = uvd
-
+    else:
+        raise TypeError("uvd must be a UVData object, filename string, or dict "
+                        "of antenna positions.")
+
     # get redundant baselines
     reds = redcal.get_pos_reds(antpos_dict, bl_error_tol=bl_error_tol)
 

diff --git a/hera_pspec/uvpspec_utils.py b/hera_pspec/uvpspec_utils.py
@@ -1084,7 +1084,9 @@ def _get_red_blpairs(uvp, bl_len_tol=1., bl_ang_tol=1.):
         # This line only keeps blpairs where both bls belong to the same red grp!
         matches = np.where(np.logical_and(bl1_grp == i, bl2_grp == i))
 
-        # Unpack into tuple of bl integer pairs
-        red_grps.append( list(zip(bl1[matches], bl2[matches])) )
+        # Unpack into list of blpair integers
+        blpair_ints = [int("%d%d" % _blp) 
+                       for _blp in zip(bl1[matches], bl2[matches])]
+        red_grps.append(blpair_ints)
 
     return red_grps, red_lens, red_angs