HERA-Team · mwilensky768 · Feb 8, 2024 · Feb 8, 2024 · Feb 9, 2024 · r-pascua
diff --git a/hera_pspec/tests/test_utils.py b/hera_pspec/tests/test_utils.py
@@ -384,6 +384,13 @@ def test_uvp_noise_error_arser():
     assert args.groups == ["dset0_dset1"]
     assert args.spectra is None
 
+def test_extract_autos_post_lstbin_parser():
+    parser = utils.extract_autos_post_lstbin_parser()
+    args = parser.parse_args(["sum", "foo.bar", "--flist", "foo", "bar", "baz"])
+    assert args.sumdiff == "sum"
+    assert args.label == "foo.bar"
+    assert args.flist == ["foo", "bar", "baz"]
+
 def test_job_monitor():
     # open empty files
     datafiles = ["./{}".format(i) for i in ['a', 'b', 'c', 'd']]

diff --git a/hera_pspec/utils.py b/hera_pspec/utils.py
@@ -1513,6 +1513,26 @@ def uvp_noise_error_parser():
                                     "to compute, 'P_N' or 'P_SN'")
     return a
 
+def extract_autos_post_lstbin_parser():
+    """
+    Get the argparser for the extract_autos script
+
+    Args:
+        N/A
+    Returns:
+        parser (ArgumentParser): 
+            The desired parser.
+    """
+    parser = argparse.ArgumentParser(description="Argument parser for "
+                                     "autos from the chunked files into a "
+                                     "waterfall file.")
+    parser.add_argument("sumdiff", type=str, help="A string identifying whether"
+                        " the files are sum or diff files.")
+    parser.add_argument("label", type=str, help="The file label.")
+    parser.add_argument("--flist", type=str, nargs="*", 
+                        help="The list of chunked files.")
+    return parser
+
 def apply_P_SN_correction(uvp, P_SN='P_SN', P_N='P_N'):
     """
     Apply correction factor to P_SN errorbar in stats_array to account

diff --git a/scripts/extract_autos_post_lstbin.py b/scripts/extract_autos_post_lstbin.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+"""
+Pipeline script to extract autocorrelations from chunked files into waterfall.
+"""
+from hera_pspec import utils
+from pyuvdata import UVData
+from hera_cal._cli_tools import parse_args, run_with_profiling
+import warnings
+
+def main(args):
+    def check_for_sumdiff(file):
+        if not args.sumdiff in file:
+            raise ValueError(f"Supposedly processing {args.sumdiff} files but "
+                             f"{args.sumdiff} not in the filename.")
+        return
+
+    # In case there are files without autos
+    found_autos = False
+    for file_ind, file in enumerate(args.flist):
+        check_for_sumdiff(file)
+        try:
+            main_uvd = UVData()
+            main_uvd.read(file, ant_str="auto")
+            found_autos = True
+            break
+        except ValueError: # There were no autos in that file
+            continue
+
+    if found_autos:
+        start_ind = file_ind + 1
+        if start_ind < len(args.flist): 
+            for file in args.flist[file_ind + 1:]:
+                check_for_sumdiff(file)
+                try:
+                    new_uvd = UVData()
+                    new_uvd.read(file, ant_str="auto")
+                    main_uvd.__add__(new_uvd, inplace=True)
+                except ValueError:
+                    continue
+        else:
+            warnings.warn("Only one file had autocorrelatons. Inputs are almost "
+                          "certainly incorrect.")
+
+        outfile = f"zen.LST.0.00000.{args.sumdiff}.{args.label}.foreground_filled.xtalk_filtered.chunked.waterfall.autos.uvh5"
+        main_uvd.write_uvh5(outfile, clobber=True)
+    else:
+        raise ValueError("No autocorrelations found in any files. Check inputs.")
+
+parser = utils.extract_autos_post_lstbin_parser()
+args = parse_args(parser)
+run_with_profiling(main, args, args)
+