From fcd9258a2425f7a654c565b9e51ae68f51b6affb Mon Sep 17 00:00:00 2001
From: Nicholas Kern <nkern@berkeley.edu>
Date: Thu, 17 May 2018 16:53:37 -0700
Subject: [PATCH 1/6] added combine_uvp func to uvpspec.py

---
 hera_pspec/pspecdata.py |  24 ++-
 hera_pspec/uvpspec.py   | 462 ++++++++++++++++++++++++++++++++++------
 2 files changed, 413 insertions(+), 73 deletions(-)

diff --git a/hera_pspec/pspecdata.py b/hera_pspec/pspecdata.py
index 24fece92..1eccd056 100644
--- a/hera_pspec/pspecdata.py
+++ b/hera_pspec/pspecdata.py
@@ -7,6 +7,7 @@
 from hera_pspec import uvpspec, utils, version
 from hera_pspec import uvpspec_utils as uvputils
 from pyuvdata import utils as uvutils
+import datetime
 
 
 class PSpecData(object):
@@ -1469,10 +1470,21 @@ def pspec(self, bls1, bls2, dsets, pols, input_data_weight='identity', norm='I',
         uvp.weighting = input_data_weight
         uvp.vis_units, uvp.norm_units = self.units(little_h=little_h)
         uvp.telescope_location = dset1.telescope_location
-        uvp.history = dset1.history + dset2.history + history
+        filename1 = getattr(dset1.extra_keywords, 'filename', None)
+        filename2 = getattr(dset2.extra_keywords, 'filename', None)
+        label1 = self.labels[self.dset_idx(dsets[0])]
+        label2 = self.labels[self.dset_idx(dsets[1])]
+        uvp.labels = np.array([label1, label2], np.str)
+        uvp.label_1_array = np.zeros((uvp.Nspws, uvp.Nblpairts, uvp.Npols), np.int)
+        uvp.label_2_array = np.ones((uvp.Nspws, uvp.Nblpairts, uvp.Npols), np.int)
+        uvp.history = "UVPSpec written on {} with hera_pspec git hash {}\n{}\n" \
+                      "dataset1: filename: {}, label: {}, history:\n{}\n{}\n" \
+                      "dataset2: filename: {}, label: {}, history:\n{}\n{}\n" \
+                      "".format(datetime.datetime.now(), version.git_hash, '-'*20, 
+                                filename1, label1, dset1.history, '-'*20, 
+                                filename2, label2, dset2.history, '-'*20)
         uvp.taper = taper
         uvp.norm = norm
-        uvp.git_hash = version.git_hash
         
         if self.primary_beam is not None:
             # attach cosmology
@@ -1482,14 +1494,6 @@ def pspec(self, bls1, bls2, dsets, pols, input_data_weight='identity', norm='I',
             uvp.OmegaP, uvp.OmegaPP = self.primary_beam.get_Omegas(uvp.pol_array)
             if hasattr(self.primary_beam, 'filename'):
                 uvp.beamfile = self.primary_beam.filename
-        if hasattr(dset1.extra_keywords, 'filename'):
-            uvp.filename1 = dset1.extra_keywords['filename']
-        if hasattr(dset2.extra_keywords, 'filename'):
-            uvp.filename2 = dset2.extra_keywords['filename']
-        lbl1 = self.labels[self.dset_idx(dsets[0])]
-        lbl2 = self.labels[self.dset_idx(dsets[1])]
-        if lbl1 is not None: uvp.label1 = lbl1
-        if lbl2 is not None: uvp.label2 = lbl2
 
         # fill data arrays
         uvp.data_array = data_array
diff --git a/hera_pspec/uvpspec.py b/hera_pspec/uvpspec.py
index fb2745e1..eedde99c 100644
--- a/hera_pspec/uvpspec.py
+++ b/hera_pspec/uvpspec.py
@@ -6,6 +6,7 @@
 from hera_pspec.parameter import PSpecParam
 from pyuvdata import uvutils as uvutils
 import h5py
+import operator
 
 
 class UVPSpec(object):
@@ -25,60 +26,58 @@ def __init__(self):
         self._Nspwdlys = PSpecParam("Nspwdlys", description="Total number of delay bins across all sub-bands.", expected_type=int)
         self._Nspws = PSpecParam("Nspws", description="Number of spectral windows.", expected_type=int)
         self._Ndlys = PSpecParam("Ndlys", description="Total number of delay bins.", expected_type=int)
-        self._Nfreqs = PSpecParam("Nfreqs", description="Total number of frequency bins in the original data.", expected_type=int)
+        self._Nfreqs = PSpecParam("Nfreqs", description="Total number of frequency bins in the data.", expected_type=int)
         self._Npols = PSpecParam("Npols", description="Number of polarizations in the data.", expected_type=int)
         self._history = PSpecParam("history", description='The file history.', expected_type=str)
 
         # Data attributes
         desc = "Power spectrum data dictionary with spw integer as keys and values as complex ndarrays."
-        self._data_array = PSpecParam("data_array", description=desc, expected_type=dict, form="(Nblpairts, Ndlys, Npols)")
+        self._data_array = PSpecParam("data_array", description=desc, expected_type=np.complex128, form="(Nblpairts, spw_Ndlys, Npols)")
         desc = "Weight dictionary for original two datasets. The second axis holds [dset1_wgts, dset2_wgts] in that order."
-        self._wgt_array = PSpecParam("wgt_array", description=desc, expected_type=dict, form="(Nblpairts, Nfreqs, 2, Npols)")
+        self._wgt_array = PSpecParam("wgt_array", description=desc, expected_type=np.float64, form="(Nblpairts, spw_Nfreqs, 2, Npols)")
         desc = "Integration time dictionary. This holds the average integration time [seconds] of each delay spectrum in the data. " \
                "This is not necessarily equal to the integration time of the visibility data: If data have been coherently averaged " \
                "(i.e. averaged before squaring), than this is the sum of each spectrum's integration time."
-        self._integration_array = PSpecParam("integration_array", description=desc, expected_type=dict, form="(Nblpairts, Npols)")
+        self._integration_array = PSpecParam("integration_array", description=desc, expected_type=np.float64, form="(Nblpairts, Npols)")
         desc = "Nsample dictionary, if the pspectra have been incoherently averaged (i.e. averaged after squaring), this is " \
                "the effective number of samples in that average (float type). This is not the same as the pyuvdata.UVData nsample_array."
-        self._nsample_array = PSpecParam("nsample_array", description=desc, expected_type=dict, form="(Nblpairts, Npols)")
-        self._spw_array = PSpecParam("spw_array", description="Spw integer array.", form="(Nspwdlys,)")
-        self._freq_array = PSpecParam("freq_array", description="Frequency array of the original data in Hz.", form="(Nspwdlys,)")
-        self._dly_array = PSpecParam("dly_array", description="Delay array in seconds.", form="(Nspwdlys,)")
+        self._nsample_array = PSpecParam("nsample_array", description=desc, expected_type=np.float64, form="(Nblpairts, Npols)")
+        self._spw_array = PSpecParam("spw_array", description="Spw integer array.", form="(Nspwdlys,)", expected_type=np.int32)
+        self._freq_array = PSpecParam("freq_array", description="Frequency array of the original data in Hz.", form="(Nfreqs,)", expected_type=np.float64)
+        self._dly_array = PSpecParam("dly_array", description="Delay array in seconds.", form="(Nspwdlys,)", expected_type=np.float64)
         desc = "Polarization integers of power spectra. Stokes 1:4 (I,Q,U,V); circular -1:-4 (RR,LL,RL,LR); linear -5:-8 (XX,YY,XY,YX)"
-        self._pol_array = PSpecParam("pol_array", description=desc, form="(Npols,)")
-        self._lst_1_array = PSpecParam("lst_1_array", description="LST array of the first bl in the bl-pair [radians].", form="(Nblpairts,)")
-        self._lst_2_array = PSpecParam("lst_2_array", description="LST array of the second bl in the bl-pair [radians].", form="(Nblpairts,)")
-        self._lst_avg_array = PSpecParam("lst_avg_array", description="Average of the lst_1_array and lst_2_array [radians].", form="(Nblpairts,)")
-        self._time_1_array = PSpecParam("time_1_array", description="Time array of the first bl in the bl-pair [Julian Date].", form="(Nblpairts,)")
-        self._time_1_array = PSpecParam("time_2_array", description="Time array of the second bl in the bl-pair [Julian Date].", form="(Nblpairts,)")
-        self._time_avg_array = PSpecParam("time_avg_array", description="Average of the time_1_array and time_2_array [Julian Date].", form='(Nblpairts,)')
-        self._blpair_array = PSpecParam("blpair_array", description="Baseline-pair integer for all baseline-pair times.", form="(Nblpairts,)")
+        self._pol_array = PSpecParam("pol_array", description=desc, form="(Npols,)", expected_type=np.ndarray)
+        self._lst_1_array = PSpecParam("lst_1_array", description="LST array of the first bl in the bl-pair [radians].", form="(Nblpairts,)", expected_type=np.float64)
+        self._lst_2_array = PSpecParam("lst_2_array", description="LST array of the second bl in the bl-pair [radians].", form="(Nblpairts,)", expected_type=np.float64)
+        self._lst_avg_array = PSpecParam("lst_avg_array", description="Average of the lst_1_array and lst_2_array [radians].", form="(Nblpairts,)", expected_type=np.float64)
+        self._time_1_array = PSpecParam("time_1_array", description="Time array of the first bl in the bl-pair [Julian Date].", form="(Nblpairts,)", expected_type=np.float64)
+        self._time_1_array = PSpecParam("time_2_array", description="Time array of the second bl in the bl-pair [Julian Date].", form="(Nblpairts,)", expected_type=np.float64)
+        self._time_avg_array = PSpecParam("time_avg_array", description="Average of the time_1_array and time_2_array [Julian Date].", form='(Nblpairts,)', expected_type=np.float64)
+        self._blpair_array = PSpecParam("blpair_array", description="Baseline-pair integer for all baseline-pair times.", form="(Nblpairts,)", expected_type=np.int32)
+        self._scalar_array = PSpecParam("scalar_array", description="Power spectrum normalization scalar from pspecbeam module.", expected_type=np.float64, form="(Nspws, Npols)")
 
         # Baseline attributes
         self._Nbls = PSpecParam("Nbls", description="Number of unique baseline integers.", expected_type=int)
-        self._bl_vecs = PSpecParam("bl_vecs", description="ndarray of baseline separation vectors in the ITRF frame [meters]. To get it in ENU frame see self.get_ENU_bl_vecs().", expected_type=np.ndarray, form="(Nbls,)")
-        self._bl_array = PSpecParam("bl_array", description="All unique baseline (antenna-pair) integers.", expected_type=np.ndarray, form="(Nbls,)")
+        self._bl_vecs = PSpecParam("bl_vecs", description="ndarray of baseline separation vectors in the ITRF frame [meters]. To get it in ENU frame see self.get_ENU_bl_vecs().", expected_type=np.float64, form="(Nbls,)")
+        self._bl_array = PSpecParam("bl_array", description="All unique baseline (antenna-pair) integers.", expected_type=np.int32, form="(Nbls,)")
 
         # Misc Attributes
         self._channel_width = PSpecParam("channel_width", description="width of visibility frequency channels in Hz.", expected_type=float)
-        self._telescope_location = PSpecParam("telescope_location", description="telescope location in ECEF frame [meters]. To get it in Lat/Lon/Alt see pyuvdata.utils.LatLonAlt_from_XYZ().", expected_type=np.ndarray)
+        self._telescope_location = PSpecParam("telescope_location", description="telescope location in ECEF frame [meters]. To get it in Lat/Lon/Alt see pyuvdata.utils.LatLonAlt_from_XYZ().", expected_type=np.float64)
         self._weighting = PSpecParam("weighting", description="Form of data weighting used when forming power spectra.", expected_type=str)
         self._norm = PSpecParam("norm", description="Normalization method adopted in OQE (M matrix).", expected_type=str)
         self._taper = PSpecParam("taper", description='Taper function applied to visibility data before FT."', expected_type=str)
         self._vis_units = PSpecParam("vis_units", description="Units of the original visibility data used to form the power spectra.", expected_type=str)
         self._norm_units = PSpecParam("norm_units", description="Power spectra normalization units, i.e. telescope units [Hz str] or cosmological [(h^-3) Mpc^3].", expected_type=str)
-        self._filename1 = PSpecParam("filename1", description="filename of data from first dataset", expected_type=str)
-        self._filename2 = PSpecParam("filename2", description="filename of data from second dataset", expected_type=str)
-        self._label1 = PSpecParam("label1", description="label of data from first dataset", expected_type=str)
-        self._label2 = PSpecParam("label2", description="label of data from second dataset", expected_type=str)
-        self._git_hash = PSpecParam("git_hash", description="GIT hash of hera_pspec when pspec was generated.", expected_type=str)
+        self._labels = PSpecParam("labels", description="Array of dataset string labels.", expected_type=np.str)
+        self._label_1_array = PSpecParam("label_1_array", description="Integer array w/ shape of data that indexes labels and gives label of dset1.", form="(Nspws, Nblpairts, Npols)", expected_type=np.int)
+        self._label_2_array = PSpecParam("label_2_array", description="Integer array w/ shape of data that indexes labels and gives label of dset2.", form="(Nspws, Nblpairts, Npols)", expected_type=np.int)
         self._folded = PSpecParam("folded", description="if power spectra are folded (i.e. averaged) onto purely positive delay axis. Default is False", expected_type=bool)
-        self._scalar_array = PSpecParam("scalar_array", description="Power spectrum normalization scalar from pspecbeam module.", expected_type=np.ndarray, form="(Nspws, Npols)")
         self._beamfile = PSpecParam("beamfile", description="filename of beam-model used to normalized pspectra.", expected_type=str)
-        self._OmegaP = PSpecParam("OmegaP", description="Integral of unitless beam power over the sky [steradians].", form="(Nbeam_freqs, Npols)")
-        self._OmegaPP = PSpecParam("OmegaP", description="Integral of unitless beam power squared over the sky [steradians].", form="(Nbeam_freqs, Npols)")
-        self._beam_freqs = PSpecParam("beam_freqs", description="Frequency bins of the OmegaP and OmegaPP beam-integral arrays [Hz].", form="(Nbeam_freqs,)")
-        self._cosmo = PSpecParam("cosmo", description="Instance of conversion.Cosmo_Conversions class.")
+        self._OmegaP = PSpecParam("OmegaP", description="Integral of unitless beam power over the sky [steradians].", form="(Nbeam_freqs, Npols)", expected_type=np.float64)
+        self._OmegaPP = PSpecParam("OmegaP", description="Integral of unitless beam power squared over the sky [steradians].", form="(Nbeam_freqs, Npols)", expected_type=np.float64)
+        self._beam_freqs = PSpecParam("beam_freqs", description="Frequency bins of the OmegaP and OmegaPP beam-integral arrays [Hz].", form="(Nbeam_freqs,)", expected_type=np.float64)
+        self._cosmo = PSpecParam("cosmo", description="Instance of conversion.Cosmo_Conversions class.", expected_type=conversions.Cosmo_Conversions)
 
         # collect all parameters: required and non-required
         self._all_params = sorted(map(lambda p: p[1:], fnmatch.filter(self.__dict__.keys(), '_*')))
@@ -89,18 +88,18 @@ def __init__(self):
                             "spw_array", "freq_array", "dly_array", "pol_array", "lst_1_array",
                             "lst_2_array", "time_1_array", "time_2_array", "blpair_array", "Nbls",
                             "bl_vecs", "bl_array", "channel_width", "telescope_location", "weighting",
-                            "vis_units", "norm_units", "taper", "norm", "git_hash", "nsample_array",
+                            "vis_units", "norm_units", "taper", "norm", "nsample_array",
                             'lst_avg_array', 'time_avg_array', 'folded', "scalar_array"]
 
         # all parameters must fall into one and only one of the following groups, which are used in __eq__
         self._immutables = ["Ntimes", "Nblpairts", "Nblpairs", "Nspwdlys", "Nspws", "Ndlys",
                             "Npols", "Nfreqs", "history", "Nbls", "channel_width", "weighting",
-                            "vis_units", "filename1", "filename2", "label1", "label2", "norm",
-                            "norm_units", "taper", "git_hash", "cosmo", "beamfile" ,'folded']
+                            "vis_units", "norm", "norm_units", "taper", "cosmo", "beamfile" ,'folded']
         self._ndarrays = ["spw_array", "freq_array", "dly_array", "pol_array", "lst_1_array",
                           'lst_avg_array', 'time_avg_array', "lst_2_array", "time_1_array",
                           "time_2_array", "blpair_array", "OmegaP", "OmegaPP", "beam_freqs",
-                          "bl_vecs", "bl_array", "telescope_location", "scalar_array"]
+                          "bl_vecs", "bl_array", "telescope_location", "scalar_array", 'labels',
+                          'label_1_array', 'label_2_array']
         self._dicts = ["data_array", "wgt_array", "integration_array", "nsample_array"]
 
         # define which attributes are considred meta data. Large attrs should be constructed as datasets
@@ -114,7 +113,6 @@ def __init__(self):
 
         # Default parameter values
         self.folded = False
-        self.git_hash = version.git_hash
 
     def get_data(self, key, *args):
         """
@@ -335,6 +333,27 @@ def get_kparas(self, spw, little_h=True):
 
         return k_para
 
+    def get_spw_ranges(self):
+        """
+        Get the frequency range and Nfreqs of each spectral window in the object.
+
+        Returns
+        -------
+        spw_ranges : list of len-3 tuples (freq_start, freq_end, Nfreqs) in Hz
+            Contains start, stop and bin-width of frequencies [Hz] of each spectral window.
+            To turn this into the frequency array of the spectral window use 
+                spw_freqs = np.linspace(freq_start, freq_end, Nfreqs, endpoint=False)
+        """
+        spw_ranges = []
+        # iterate over spectral windows
+        for spw in np.unique(self.spw_array):
+            spw_freqs = self.freq_array[self.spw_to_indices(spw)]
+            Nfreqs = len(spw_freqs)
+            spw_df = np.median(np.diff(spw_freqs))
+            spw_ranges.append( (spw_freqs.min(), spw_freqs.min() + Nfreqs * spw_df, Nfreqs) )
+
+        return spw_ranges
+
     def convert_to_deltasq(self, little_h=True, inplace=True):
         """
         Convert from P(k) to Delta^2(k) by multiplying by k^3 / (2pi^2).
@@ -476,7 +495,7 @@ def spw_to_indices(self, spw):
             Spectral window index or list of indices.
         """
         # convert to list if int
-        if isinstance(spw, (np.int, int)):
+        if isinstance(spw, (int, np.integer)):
             spw = [spw]
 
         # assert exists in data
@@ -492,7 +511,7 @@ def spw_to_indices(self, spw):
 
     def pol_to_indices(self, pol):
         """
-        Map a polarization integer or str to its index in pol_array
+        Map a polarization integer or str to its index in pol_array.
 
         Parameters
         ----------
@@ -752,7 +771,7 @@ def read_from_group(self, grp, just_meta=False, spws=None, bls=None,
         """
         # Clear all data in the current object
         self._clear()
-        
+
         # Load-in meta data
         for k in grp.attrs:
             if k in self._meta_attrs:
@@ -760,7 +779,7 @@ def read_from_group(self, grp, just_meta=False, spws=None, bls=None,
         for k in grp:
             if k in self._meta_dsets:
                 setattr(self, k, grp[k][:])
-        
+
         # Use _select() to pick out only the requested baselines/spws
         if just_meta:
             uvputils._select(self, spws=spws, bls=bls,
@@ -771,14 +790,14 @@ def read_from_group(self, grp, just_meta=False, spws=None, bls=None,
                              only_pairs_in_bls=only_pairs_in_bls, 
                              blpairs=blpairs, times=times, pols=pols, 
                              h5file=grp)
-        
+
         # handle cosmo
         if hasattr(self, 'cosmo'):
             self.cosmo = conversions.Cosmo_Conversions(**ast.literal_eval(self.cosmo))
 
         self.check(just_meta=just_meta)
     
-    
+
     def read_hdf5(self, filepath, just_meta=False, spws=None, bls=None,
                   blpairs=None, times=None, pols=None, 
                   only_pairs_in_bls=False):
@@ -995,26 +1014,39 @@ def check(self, just_meta=False):
         just_meta : bool, optional
             If True, only check metadata. Default: False.
         """
-        # check required parameters exist
-        if just_meta:
-            req_metas = sorted(set(self._req_params).intersection(set(self._meta)))
-            for p in req_metas:
-                assert hasattr(self, p), "required parameter {} hasn't been defined".format(p)
-        else:
-            for p in self._req_params:
-                assert hasattr(self, p), "required parameter {} hasn't been defined".format(p)
-            # check data
-            assert isinstance(self.data_array, (dict, odict)), "self.data_array must be a dictionary type"
-            assert np.min(map(lambda k: self.data_array[k].dtype in (np.complex, complex, np.complex128), self.data_array.keys())), "self.data_array values must be complex type"
-            # check wgts
-            assert isinstance(self.wgt_array, (dict, odict)), "self.wgt_array must be a dictionary type"
-            assert np.min(map(lambda k: self.wgt_array[k].dtype in (np.float, float), self.wgt_array.keys())), "self.wgt_array values must be float type"
-            # check integration
-            assert isinstance(self.integration_array, (dict, odict)), "self.integration_array must be a dictionary type"
-            assert np.min(map(lambda k: self.integration_array[k].dtype in (np.float, float, np.float64), self.integration_array.keys())), "self.integration_array values must be float type"
-            # check nsample
-            assert isinstance(self.nsample_array, (dict, odict)), "self.nsample_array must be a dictionary type"
-            assert np.min(map(lambda k: self.nsample_array[k].dtype in (np.float, float, np.float64), self.nsample_array.keys())), "self.nsample_array values must be float type"
+        # iterate over all possible parameters
+        for p in self._all_params:
+            # only enforce existance if not just_meta
+            if not just_meta:
+                if p in self._req_params:
+                    assert hasattr(self, p), "required parameter {} doesn't exist".format(p)
+
+            # if attribute exists, check its type
+            if hasattr(self, p):
+                a = getattr(self, '_'+p)
+                if hasattr(a, 'expected_type'):
+                    err_msg = "attribute {} does not have expected data type {}".format(p, a.expected_type)
+                    # ndarrays
+                    if p in self._ndarrays:
+                        assert isinstance(getattr(self, p), np.ndarray), "attribute {} needs to be an ndarray".format(p)
+                        if issubclass(getattr(self, p).dtype.type, a.expected_type):
+                            pass
+                        else:
+                            # try to cast into its dtype
+                            try:
+                                setattr(self, p, getattr(self, p).astype(a.expected_type))
+                            except:
+                                raise ValueError(err_msg)
+                    # dicts
+                    elif p in self._dicts:
+                        assert isinstance(getattr(self, p), (dict, odict)), "attribute {} needs to be a dictionary".format(p)
+                        # iterate over keys
+                        for k in getattr(self, p).keys():
+                            assert isinstance(getattr(self, p)[k], np.ndarray), "values of attribute {} need to be ndarrays".format(p)
+                            assert issubclass(getattr(self, p)[k].dtype.type, a.expected_type), err_msg
+                    # immutables
+                    elif p in self._immutables:
+                        assert isinstance(getattr(self, p), a.expected_type), err_msg
 
     def _clear(self):
         """
@@ -1045,11 +1077,13 @@ def __str__(self):
             if hasattr(self, k):
                 s += "%18s: shape %s\n" % (k, getattr(self, k).shape)
         return s
-    
-    def __eq__(self, other):
+
+    def __eq__(self, other, params=None, verbose=False):
         """ Check equivalence between attributes of two UVPSpec objects """
+        if params is None:
+            params = self._all_params
         try:
-            for p in self._all_params:
+            for p in params:
                 if p not in self._req_params \
                   and (not hasattr(self, p) and not hasattr(other, p)):
                     continue
@@ -1062,10 +1096,17 @@ def __eq__(self, other):
                         assert np.isclose(getattr(self, p)[i], \
                                getattr(other, p)[i]).all()
         except AssertionError:
+            if verbose: 
+                print "UVPSpec parameter '{}' not equivalent between {} and {}" \
+                      "".format(p, self.__repr__(), other.__repr__())
             return False
 
         return True
 
+    def __add__(self, other, verbose=False):
+        """ Concatenate the data of two UVPSpec objects together along a single axis """
+        self = combine_uvp([self, other], verbose=verbose)
+
     @property
     def units(self):
         """Return power spectrum units. See self.vis_units and self.norm_units."""
@@ -1374,3 +1415,298 @@ def compute_scalar(self, spw, pol, num_steps=1000, little_h=True,
         return scalar
 
 
+def combine_uvp(uvps, verbose=True):
+    """
+    Combine multiple UVPSpec objects into a single object, concatenating along one of
+    either spectral window [spw], baseline-pair-times [blpairts], or polarization [pol].
+    Certain meta-data of all of the UVPSpec objs must match exactly. 
+    In addition, one can only concatenate data along a single data axis, with the condition 
+    that all other axes match exactly.
+
+    Parameters
+    ----------
+    uvps : list 
+        A list of UVPSpec objects to combine.
+
+    Returns
+    -------
+    u : UVPSpec object
+        A UVPSpec object with the data of all the inputs combined.
+    """
+    # perform type checks and get concatenation axis
+    (uvps, concat_ax, new_spws, new_blpts, new_pols,
+     static_meta) = get_uvp_overlap(uvps, verbose=verbose)
+    Nuvps = len(uvps)
+
+    # create a new uvp
+    u = UVPSpec()
+
+    # sort new data axes
+    new_spws = sorted(new_spws)
+    new_blpts = sorted(new_blpts)
+    new_pols = sorted(new_pols)
+    Nspws = len(new_spws)
+    Nblpairts = len(new_blpts)
+    Npols = len(new_pols)
+
+    # create new empty data arrays and fill spw arrays
+    u.data_array = odict()
+    u.integration_array = odict()
+    u.wgt_array = odict()
+    u.nsample_array = odict()
+    u.scalar_array = np.empty((Nspws, Npols), np.float)
+    u.freq_array, u.spw_array, u.dly_array = [], [], []
+    for i, spw in enumerate(new_spws):
+        u.data_array[i] = np.empty((Nblpairts, spw[2], Npols), np.complex128)
+        u.integration_array[i] = np.empty((Nblpairts, Npols), np.float64)
+        u.wgt_array[i] = np.empty((Nblpairts, spw[2], 2, Npols), np.float64)
+        u.nsample_array[i] = np.empty((Nblpairts, Npols), np.float64)
+        spw_Nfreqs = spw[-1]
+        spw_freqs = np.linspace(*spw, endpoint=False)
+        spw_dlys = np.fft.fftshift(np.fft.fftfreq(spw_Nfreqs, np.median(np.diff(spw_freqs))))
+        u.spw_array.extend(np.ones(spw_Nfreqs, np.int) * i)
+        u.freq_array.extend(spw_freqs)
+        u.dly_array.extend(spw_dlys)
+    u.spw_array = np.array(u.spw_array)       
+    u.freq_array = np.array(u.freq_array)       
+    u.dly_array = np.array(u.dly_array)  
+    u.pol_array = np.array(new_pols)     
+    u.Nspws = Nspws
+    u.Nblpairts = Nblpairts
+    u.Npols = Npols
+    u.Nfreqs = len(u.freq_array)
+    u.Nspwdlys = len(u.spw_array)
+    u.Ndlys = len(np.unique(u.dly_array))
+    u.time_1_array, u.time_2_array = np.empty(Nblpairts, np.float), np.empty(Nblpairts, np.float)
+    u.time_avg_array, u.lst_avg_array = np.empty(Nblpairts, np.float), np.empty(Nblpairts, np.float)
+    u.lst_1_array, u.lst_2_array = np.empty(Nblpairts, np.float), np.empty(Nblpairts, np.float)
+    u.blpair_array = np.empty(Nblpairts, np.int)
+    u.labels = sorted(set(np.concatenate([uvp.labels for uvp in uvps])))
+    u.label_1_array = np.empty((Nspws, Nblpairts, Npols), np.int)
+    u.label_2_array = np.empty((Nspws, Nblpairts, Npols), np.int)
+
+    # get each uvp's data axes
+    uvp_spws = [_uvp.get_spw_ranges() for _uvp in uvps]
+    uvp_blpts = [zip(_uvp.blpair_array, _uvp.time_avg_array) for _uvp in uvps]
+    uvp_pols = [_uvp.pol_array.tolist() for _uvp in uvps]
+
+    # fill in data arrays depending on concat ax
+    if concat_ax == 'spw':
+        for i, spw in enumerate(new_spws):
+            l = [spw in _u for _u in uvp_spws].index(True)
+            m = [spw == _spw for _spw in uvp_spws[l]].index(True)
+            for k, p in enumerate(new_pols):
+                q = uvp_pols[l].index(p)
+                u.scalar_array[i, k] = uvps[l].scalar_array[m, q]
+                for j, blpt in enumerate(new_blpts):
+                    n = uvp_blpts[l].index(blpt)
+                    u.data_array[i][j, :, k] = uvps[l].data_array[m][n, :, q]
+                    u.wgt_array[i][j, :, :, k] = uvps[l].wgt_array[m][n, :, :, q]
+                    u.integration_array[i][j, k] = uvps[l].integration_array[m][n, q]
+                    u.nsample_array[i][j, k] = uvps[l].integration_array[m][n, q]
+                    u.label_1_array[i, j, k] = u.labels.index(uvps[l].labels[uvps[l].label_1_array[m, n, q]])
+                    u.label_2_array[i, j, k] = u.labels.index(uvps[l].labels[uvps[l].label_1_array[m, n, q]])
+
+        for j, blpt in enumerate(new_blpts):
+            n = uvp_blpts[0].index(blpt)
+            u.time_1_array[j] = uvps[0].time_1_array[n]
+            u.time_2_array[j] = uvps[0].time_2_array[n]
+            u.time_avg_array[j] = uvps[0].time_avg_array[n]
+            u.lst_1_array[j] = uvps[0].lst_1_array[n]
+            u.lst_2_array[j] = uvps[0].lst_2_array[n]
+            u.lst_avg_array[j] = uvps[0].lst_avg_array[n]
+            u.blpair_array[j] = uvps[0].blpair_array[n]
+
+    elif concat_ax == 'blpairts':
+        for j, blpt in enumerate(new_blpts):
+            l = [blpt in _blpts for _blpts in uvp_blpts].index(True)
+            n = uvp_blpts[l].index(blpt)
+            for i, spw in enumerate(new_spws):
+                m = [spw == _spw for _spw in uvp_spws[l]].index(True)
+                for k, p in enumerate(new_pols):
+                    q = uvp_pols[l].index(p)
+                    u.data_array[i][j, :, k] = uvps[l].data_array[m][n, :, q]
+                    u.wgt_array[i][j, :, :, k] = uvps[l].wgt_array[m][n, :, :, q]
+                    u.integration_array[i][j, k] = uvps[l].integration_array[m][n, q]
+                    u.nsample_array[i][j, k] = uvps[l].integration_array[m][n, q]
+                    u.label_1_array[i, j, k] = u.labels.index(uvps[l].labels[uvps[l].label_1_array[m, n, q]])
+                    u.label_2_array[i, j, k] = u.labels.index(uvps[l].labels[uvps[l].label_1_array[m, n, q]])
+
+            u.time_1_array[j] = uvps[l].time_1_array[n]
+            u.time_2_array[j] = uvps[l].time_2_array[n]
+            u.time_avg_array[j] = uvps[l].time_avg_array[n]
+            u.lst_1_array[j] = uvps[l].lst_1_array[n]
+            u.lst_2_array[j] = uvps[l].lst_2_array[n]
+            u.lst_avg_array[j] = uvps[l].lst_avg_array[n]
+            u.blpair_array[j] = uvps[l].blpair_array[n]
+
+    elif concat_ax == 'pol':
+        for k, p in enumerate(new_pols):
+            l = [p in _pols for _pols in uvp_pols].index(True)
+            q = uvp_pols[l].index(p)
+            for i, spw in enumerate(new_spws):
+                m = [spw == _spw for _spw in uvp_spws[l]].index(True)
+                u.scalar_array[i, k] = uvps[l].scalar_array[m, q]
+                for j, blpt in enumerate(new_blpts):
+                    n = uvp_blpts[l].index(blpt)
+                    u.data_array[i][j, :, k] = uvps[l].data_array[m][n, :, q]
+                    u.wgt_array[i][j, :, :, k] = uvps[l].wgt_array[m][n, :, :, q]
+                    u.integration_array[i][j, k] = uvps[l].integration_array[m][n, q]
+                    u.nsample_array[i][j, k] = uvps[l].integration_array[m][n, q]
+                    u.label_1_array[i, j, k] = u.labels.index(uvps[l].labels[uvps[l].label_1_array[m, n, q]])
+                    u.label_2_array[i, j, k] = u.labels.index(uvps[l].labels[uvps[l].label_1_array[m, n, q]])
+
+        for j, blpt in enumerate(new_blpts):
+            n = uvp_blpts[0].index(blpt)
+            u.time_1_array[j] = uvps[0].time_1_array[n]
+            u.time_2_array[j] = uvps[0].time_2_array[n]
+            u.time_avg_array[j] = uvps[0].time_avg_array[n]
+            u.lst_1_array[j] = uvps[0].lst_1_array[n]
+            u.lst_2_array[j] = uvps[0].lst_2_array[n]
+            u.lst_avg_array[j] = uvps[0].lst_avg_array[n]
+            u.blpair_array[j] = uvps[0].blpair_array[n]
+    
+    # set baselines
+    u.Nblpairs = len(np.unique(u.blpair_array))
+    uvp_bls = [uvp.bl_array for uvp in uvps]
+    new_bls = sorted(reduce(operator.or_, [set(bls) for bls in uvp_bls]))
+    u.bl_array = np.array(new_bls)
+    u.Nbls = len(u.bl_array)
+    u.bl_vecs = []
+    for b, bl in enumerate(new_bls):
+        l = [bl in _bls for _bls in uvp_bls].index(True)
+        h = [bl == _bl for _bl in uvp_bls[l]].index(True)
+        u.bl_vecs.append(uvps[l].bl_vecs[h])
+    u.bl_vecs = np.array(u.bl_vecs)
+    u.Ntimes = len(np.unique(u.time_avg_array))
+    u.history = reduce(operator.add, [uvp.history for uvp in uvps])
+    u.labels = np.array(u.labels, np.str)
+
+    for k in static_meta.keys():
+        setattr(u, k, static_meta[k])
+
+    # run check
+    u.check()
+
+    return u
+
+
+def get_uvp_overlap(uvps, verbose=True):
+    """
+    Given a list of UVPSpec objects or a list of paths to UVPSpec objects,
+    find a single data axis ['spw', 'blpairts', 'pol'] where all objects contain 
+    non-overlapping data.
+
+    Parameters
+    ----------
+    uvps : list of UVPSpec objects or list of string paths to UVPSpec objects
+
+    verbose : bool, optional
+        print feedback to standard output
+
+    Returns
+    -------
+    uvps : list
+        list of input UVPSpec objects 
+
+    concat_ax : str
+        data axis ['spw', 'blpairts', 'pols'] across data can be concatenated
+
+    unique_spws : list
+        list of unique spectral window tuples (spw_freq_start, spw_freq_end, spw_Nfreqs)
+        across all input uvps
+
+    unique_blpts : list
+        list of unique baseline-pair-time tuples (blpair_integer, time_avg_array JD float)
+        across all input uvps
+
+    unique_pols : list
+        list of unique polarization integers across all input uvps
+    """
+    # type check
+    assert isinstance(uvps, (list, tuple, np.ndarray)), "uvps must be fed as a list"
+    assert isinstance(uvps[0], (UVPSpec, str, np.str)), "uvps must be fed as a list of UVPSpec objects or strings"
+    Nuvps = len(uvps)
+
+    # load uvps if fed as strings
+    if isinstance(uvps[0], (str, np.str)):
+        _uvps = []
+        for u in uvps:
+            uvp = UVPSpec()
+            uvp.read_hdf5(u, just_meta=True)
+            _uvps.append(uvp)
+        uvps = _uvps
+
+    # ensure static metadata agree between all objects
+    static_meta = ['channel_width', 'telescope_location', 'weighting', 'OmegaP', 'beam_freqs', 'OmegaPP', 
+                   'beamfile', 'norm', 'taper', 'vis_units', 'norm_units', 'folded', 'cosmo']
+    for m in static_meta:
+        for u in uvps[1:]:
+            if hasattr(uvps[0], m) and hasattr(u, m):
+                assert uvps[0].__eq__(u, params=[m]), "Cannot concatenate UVPSpec objs: not all agree on '{}' attribute".format(m)
+            else:
+                assert not hasattr(uvps[0], m) and not hasattr(u, m), "Cannot concatenate UVPSpec objs: not all agree on '{}' attribute".format(m)
+
+    static_meta = odict([(k, getattr(uvps[0], k, None)) for k in static_meta if getattr(uvps[0], k, None) is not None])
+
+    # create unique data axis lists
+    unique_spws = []
+    unique_blpts = []
+    unique_pols = []
+    data_overlaps = odict()
+
+    # iterate over uvps
+    for i, uvp1 in enumerate(uvps):
+        # get uvp1 sets and append to unique lists
+        uvp1_spws = uvp1.get_spw_ranges()
+        uvp1_blpts = zip(uvp1.blpair_array, uvp1.time_avg_array)
+        uvp1_pols = uvp1.pol_array
+        for s in uvp1_spws: 
+            if s not in unique_spws: unique_spws.append(s)
+        for b in uvp1_blpts: 
+            if b not in unique_blpts: unique_blpts.append(b)
+        for p in uvp1_pols: 
+            if p not in unique_pols: unique_pols.append(p)
+
+        # iterate over uvps
+        for j, uvp2 in enumerate(uvps):
+            if j <= i: continue
+            # get uvp1 sets and determine overlap w/ uvp1
+            uvp2_spws = uvp2.get_spw_ranges()
+            uvp2_blpts = zip(uvp2.blpair_array, uvp2.time_avg_array)
+            uvp2_pols = uvp2.pol_array
+
+            spw_overlap = len(set(uvp1_spws) ^ set(uvp2_spws)) == 0
+            blpts_overlap = len(set(uvp1_blpts) ^ set(uvp2_blpts)) == 0
+            pol_overlap = len(set(uvp1_pols) ^ set(uvp2_pols)) == 0
+
+            # assert all except 1 axis overlaps
+            overlaps = [spw_overlap, blpts_overlap, pol_overlap]
+            assert sum(overlaps) != 3, "uvp {} and {} have overlapping data, cannot combine".format(i, j)
+            assert sum(overlaps) > 1, "uvp {} and {} are non-overlapping across multiple data axes, cannot combine".format(i, j)
+            concat_ax = ['spw', 'blpairts', 'pol'][overlaps.index(False)]
+            data_overlaps[(i, j)] = concat_ax
+            if verbose:
+                print "uvp {} and {} have non-overlap across {} axis".format(i, j, concat_ax)
+
+    # assert all uvp pairs have the same (single) non-overlap (concat) axis
+    err_msg = "Non-overlapping data in uvps span multiple data axes:\n{}" \
+              "".format("\n".join(map(lambda i: "{} & {}: {}".format(i[0][0], i[0][1], i[1]), data_overlaps.items())))
+    assert len(set(data_overlaps.values())) == 1, err_msg
+
+    # perform additional checks given concat ax
+    if concat_ax == 'spw':
+        # check for spws overlapping in freq
+        for i, spw1 in enumerate(unique_spws):
+            for j, spw2 in enumerate(unique_spws[i+1:]):
+                assert not (spw1[1] > spw2[0] and spw1[0] < spw2[1]), "cannot combine overlapping spws: {} & {}".format(spw1, spw2)
+
+    if concat_ax == 'blpairts':
+        # check scalar_array
+        assert np.all([np.isclose(uvps[0].scalar_array, u.scalar_array) for u in uvps[1:]]), "" \
+               "scalar_array must be the same for all uvps given concatenation along blpairts."
+
+    return uvps, concat_ax, unique_spws, unique_blpts, unique_pols, static_meta
+
+
+

From 7539d246db172401d2ac7df1d86cd08342ce575d Mon Sep 17 00:00:00 2001
From: Nicholas Kern <nkern@berkeley.edu>
Date: Fri, 18 May 2018 09:41:06 -0700
Subject: [PATCH 2/6] mv uvpspec.combine_uvp -> uvpspec.concat_uvp, created
 testing.py added tests for concat_uvp

---
 hera_pspec/__init__.py            |   3 +-
 hera_pspec/pspecdata.py           |  17 +--
 hera_pspec/testing.py             | 160 ++++++++++++++++++++++++++++
 hera_pspec/tests/test_grouping.py |   5 +-
 hera_pspec/tests/test_uvpspec.py  | 168 ++++++++++++++----------------
 hera_pspec/utils.py               |   5 +
 hera_pspec/uvpspec.py             | 135 ++++++++++++++----------
 7 files changed, 337 insertions(+), 156 deletions(-)
 create mode 100644 hera_pspec/testing.py

diff --git a/hera_pspec/__init__.py b/hera_pspec/__init__.py
index 94203031..aef41fa7 100644
--- a/hera_pspec/__init__.py
+++ b/hera_pspec/__init__.py
@@ -1,8 +1,7 @@
 """
 __init__.py file for hera_pspec
 """
-
-from hera_pspec import version, conversions, grouping, pspecbeam, plot, pstokes
+from hera_pspec import version, conversions, grouping, pspecbeam, plot, pstokes, testing
 from hera_pspec import uvpspec_utils as uvputils
 
 from hera_pspec.uvpspec import UVPSpec
diff --git a/hera_pspec/pspecdata.py b/hera_pspec/pspecdata.py
index 1eccd056..593c43dc 100644
--- a/hera_pspec/pspecdata.py
+++ b/hera_pspec/pspecdata.py
@@ -165,8 +165,11 @@ def validate_datasets(self, verbose=True):
 
         # Check if dsets are all the same shape along freq axis
         Nfreqs = [d.Nfreqs for d in self.dsets]
+        channel_widths = [d.channel_width for d in self.dsets]
         if np.unique(Nfreqs).size > 1:
             raise ValueError("all dsets must have the same Nfreqs")
+        if np.unique(channel_widths).size > 1:
+            raise ValueError("all dsets must have the same channel_widths")
 
         # Check shape along time axis
         Ntimes = [d.Ntimes for d in self.dsets]
@@ -1216,7 +1219,7 @@ def pspec(self, bls1, bls2, dsets, pols, input_data_weight='identity', norm='I',
         dset2 = self.dsets[self.dset_idx(dsets[1])]
 
         # assert form of bls1 and bls2
-        assert len(bls1) == len(bls2), "length of bls1 must equal length of bls2"
+        assert len(bls1) == len(bls2) and len(bls1) > 0, "length of bls1 must equal length of bls2 and be > 0"
         for i in range(len(bls1)):
             if isinstance(bls1[i], tuple):
                 assert isinstance(bls2[i], tuple), "bls1[{}] type must match bls2[{}] type".format(i, i)
@@ -1358,7 +1361,6 @@ def pspec(self, bls1, bls2, dsets, pols, input_data_weight='identity', norm='I',
                         pass
                     else:
                         if verbose: print("  Building G...")
-                        print key1, "---", key2
                         Gv = self.get_G(key1, key2)
                         Hv = self.get_H(key1, key2, taper=taper)
                         built_GH = True
@@ -1466,7 +1468,7 @@ def pspec(self, bls1, bls2, dsets, pols, input_data_weight='identity', norm='I',
         uvp.pol_array = np.array(spw_pol, np.int)
         uvp.Npols = len(spw_pol)
         uvp.scalar_array = np.array(sclr_arr)
-        uvp.channel_width = dset1.channel_width
+        uvp.channel_width = dset1.channel_width  # all dsets are validated to agree
         uvp.weighting = input_data_weight
         uvp.vis_units, uvp.norm_units = self.units(little_h=little_h)
         uvp.telescope_location = dset1.telescope_location
@@ -1474,9 +1476,12 @@ def pspec(self, bls1, bls2, dsets, pols, input_data_weight='identity', norm='I',
         filename2 = getattr(dset2.extra_keywords, 'filename', None)
         label1 = self.labels[self.dset_idx(dsets[0])]
         label2 = self.labels[self.dset_idx(dsets[1])]
-        uvp.labels = np.array([label1, label2], np.str)
-        uvp.label_1_array = np.zeros((uvp.Nspws, uvp.Nblpairts, uvp.Npols), np.int)
-        uvp.label_2_array = np.ones((uvp.Nspws, uvp.Nblpairts, uvp.Npols), np.int)
+        uvp.labels = sorted(set([label1, label2]))
+        uvp.label_1_array = np.ones((uvp.Nspws, uvp.Nblpairts, uvp.Npols), np.int) \
+                            * uvp.labels.index(label1)
+        uvp.label_2_array = np.ones((uvp.Nspws, uvp.Nblpairts, uvp.Npols), np.int) \
+                            * uvp.labels.index(label2)
+        uvp.labels = np.array(uvp.labels, np.str)
         uvp.history = "UVPSpec written on {} with hera_pspec git hash {}\n{}\n" \
                       "dataset1: filename: {}, label: {}, history:\n{}\n{}\n" \
                       "dataset2: filename: {}, label: {}, history:\n{}\n{}\n" \
diff --git a/hera_pspec/testing.py b/hera_pspec/testing.py
new file mode 100644
index 00000000..13b4aacc
--- /dev/null
+++ b/hera_pspec/testing.py
@@ -0,0 +1,160 @@
+#!/usr/bin/env python2
+import numpy as np
+import copy, operator, itertools
+from collections import OrderedDict as odict
+from hera_pspec import uvpspec, pspecdata, conversions, pspecbeam
+from pyuvdata import UVData
+
+
+def build_vanilla_uvpspec(beam=None):
+    """
+    Build an example vanilla UVPSpec object from scratch, with all necessary metadata.
+
+    Parameters
+    ----------
+    beam : PSpecBeamUV object
+
+    Returns
+    -------
+    uvp : UVPSpec object
+    """
+    uvp = uvpspec.UVPSpec()
+
+    Ntimes = 10
+    Nfreqs = 50
+    Ndlys = Nfreqs
+    Nspws = 1
+    Nspwdlys = Nspws * Nfreqs
+
+    # [((1, 2), (1, 2)), ((2, 3), (2, 3)), ((1, 3), (1, 3))]
+    blpairs = [1002001002, 2003002003, 1003001003]
+    bls = [1002, 2003, 1003]
+    Nbls = len(bls)
+    Nblpairs = len(blpairs)
+    Nblpairts = Nblpairs * Ntimes
+
+    blpair_array = np.tile(blpairs, Ntimes)
+    bl_array = np.array(bls)
+    bl_vecs = np.array([[  5.33391548e+00,  -1.35907816e+01,  -7.91624188e-09],
+                        [ -8.67982998e+00,   4.43554478e+00,  -1.08695203e+01],
+                        [ -3.34591450e+00,  -9.15523687e+00,  -1.08695203e+01]])
+    time_array = np.repeat(np.linspace(2458042.1, 2458042.2, Ntimes), Nblpairs)
+    time_1_array = time_array
+    time_2_array = time_array
+    lst_array = np.repeat(np.ones(Ntimes, dtype=np.float), Nblpairs)
+    lst_1_array = lst_array
+    lst_2_array = lst_array
+    time_avg_array = time_array
+    lst_avg_array = lst_array
+    spws = np.arange(Nspws)
+    spw_array = np.tile(spws, Ndlys)
+    freq_array = np.repeat(np.linspace(100e6, 105e6, Nfreqs, endpoint=False), Nspws)
+    dly_array = np.fft.fftshift(np.repeat(np.fft.fftfreq(Nfreqs, np.median(np.diff(freq_array))), Nspws))
+    pol_array = np.array([-5])
+    Npols = len(pol_array)
+    vis_units = 'unknown'
+    norm_units = 'Hz str'
+    weighting = 'identity'
+    channel_width = np.median(np.diff(freq_array))
+    history = 'example'
+    taper = "none"
+    norm = "I"
+    git_hash = "random"
+    scalar_array = np.ones((Nspws, Npols), np.float)
+    label1 = 'red'
+    label2 = 'blue'
+    labels = np.array([label1, label2])
+    label_1_array = np.ones((Nspws, Nblpairts, Npols), np.int) * 0
+    label_2_array = np.ones((Nspws, Nblpairts, Npols), np.int) * 1
+    if beam is not None:
+        OmegaP, OmegaPP = beam.get_Omegas(beam.primary_beam.polarization_array[0])
+        beam_freqs = beam.beam_freqs
+
+    telescope_location = np.array([5109325.85521063, 
+                                   2005235.09142983, 
+                                  -3239928.42475397])
+
+    cosmo = conversions.Cosmo_Conversions()
+
+    data_array, wgt_array, integration_array, nsample_array = {}, {}, {}, {}
+    for s in spws:
+        data_array[s] = np.ones((Nblpairts, Ndlys, Npols), dtype=np.complex) \
+                      * blpair_array[:, None, None] / 1e9
+        wgt_array[s] = np.ones((Nblpairts, Ndlys, 2, Npols), dtype=np.float)
+        integration_array[s] = np.ones((Nblpairts, Npols), dtype=np.float)
+        nsample_array[s] = np.ones((Nblpairts, Npols), dtype=np.float)
+
+    params = ['Ntimes', 'Nfreqs', 'Nspws', 'Nspwdlys', 'Nblpairs', 'Nblpairts', 
+              'Npols', 'Ndlys', 'Nbls', 'blpair_array', 'time_1_array', 
+              'time_2_array', 'lst_1_array', 'lst_2_array', 'spw_array', 
+              'dly_array', 'freq_array', 'pol_array', 'data_array', 'wgt_array',
+              'integration_array', 'bl_array', 'bl_vecs', 'telescope_location', 
+              'vis_units', 'channel_width', 'weighting', 'history', 'taper', 'norm', 
+              'git_hash', 'nsample_array', 'time_avg_array', 'lst_avg_array', 
+              'cosmo', 'scalar_array', 'labels', 'norm_units', 'labels', 'label_1_array',
+              'label_2_array']
+
+    if beam is not None:
+        params += ['OmegaP', 'OmegaPP', 'beam_freqs']
+    
+    # Set all parameters
+    for p in params:
+        setattr(uvp, p, locals()[p])
+    
+    uvp.check()
+
+    return uvp, cosmo
+
+def build_uvpspec_from_data(data, bls, spw_ranges=None, beam=None, taper='none', cosmo=None, verbose=False):
+    """
+    Build an example UVPSpec object from a visibility file and PSpecData.
+
+    Parameters
+    ----------
+    data : UVData object or path to miriad file
+
+    bls : list of at least 2 baseline tuples
+        Ex: [(24, 25), (37, 38), ...]
+
+    spw_ranges : list of spw tuples
+        See PSpecData.pspec docstring
+
+    beam : PSpecBeamUV object or path to beamfits healpix beam
+
+    taper : string
+        Optional tapering applied to the data before OQE.
+
+    cosmo : Cosmo_Conversions object
+
+    verbose : bool
+        if True, report feedback to standard output
+
+    Returns
+    -------
+    uvp : UVPSpec object
+    """
+    # load data
+    if isinstance(data, str):
+        uvd = UVData()
+        uvd.read_miriad(dfile)
+    elif isinstance(data, UVData):
+        uvd = data
+
+    # load beam
+    if isinstance(beam, str):
+        beam = pspecbeam.PSpecBeamUV(beam, cosmo=cosmo)
+    if beam is not None and cosmo is not None:
+        beam.cosmo = cosmo
+
+    # instantiate pspecdata
+    ds = pspecdata.PSpecData(dsets=[uvd, uvd], wgts=[None, None], labels=['d1', 'd2'], beam=beam)
+
+    # get red bls
+    bls1, bls2, _ = pspecdata.construct_blpairs(bls, exclude_auto_bls=True)
+
+    # run pspec
+    uvp = ds.pspec(bls1, bls2, (0, 1), input_data_weight='identity', spw_ranges=spw_ranges, 
+                   taper=taper, verbose=verbose)
+
+    return uvp
+
diff --git a/hera_pspec/tests/test_grouping.py b/hera_pspec/tests/test_grouping.py
index fba57e52..609f6f15 100644
--- a/hera_pspec/tests/test_grouping.py
+++ b/hera_pspec/tests/test_grouping.py
@@ -3,8 +3,7 @@
 import numpy as np
 import os
 from hera_pspec.data import DATA_PATH
-from test_uvpspec import build_example_uvpspec
-from hera_pspec import uvpspec, conversions, parameter, pspecbeam, pspecdata
+from hera_pspec import uvpspec, conversions, parameter, pspecbeam, pspecdata, testing
 from hera_pspec import uvpspec_utils as uvputils
 from hera_pspec import grouping
 
@@ -13,7 +12,7 @@ class Test_grouping(unittest.TestCase):
     def setUp(self):
         beamfile = os.path.join(DATA_PATH, 'NF_HERA_Beams.beamfits')
         self.beam = pspecbeam.PSpecBeamUV(beamfile)
-        uvp, cosmo = build_example_uvpspec(beam=self.beam)
+        uvp, cosmo = testing.build_vanilla_uvpspec(beam=self.beam)
         uvp.check()
         self.uvp = uvp
 
diff --git a/hera_pspec/tests/test_uvpspec.py b/hera_pspec/tests/test_uvpspec.py
index 935da667..38ec067b 100644
--- a/hera_pspec/tests/test_uvpspec.py
+++ b/hera_pspec/tests/test_uvpspec.py
@@ -4,95 +4,12 @@
 import os
 import sys
 from hera_pspec.data import DATA_PATH
-from hera_pspec import uvpspec, conversions, parameter, pspecbeam, pspecdata
+from hera_pspec import uvpspec, conversions, parameter, pspecbeam, pspecdata, testing
 from hera_pspec import uvpspec_utils as uvputils
 import copy
 import h5py
 from collections import OrderedDict as odict
-
-def build_example_uvpspec(beam=None):
-    """
-    Build an example UVPSpec object, with all necessary metadata.
-    """
-    uvp = uvpspec.UVPSpec()
-
-    Ntimes = 10
-    Nfreqs = 50
-    Ndlys = Nfreqs
-    Nspws = 1
-    Nspwdlys = Nspws * Nfreqs
-
-    # [((1, 2), (1, 2)), ((2, 3), (2, 3)), ((1, 3), (1, 3))]
-    blpairs = [1002001002, 2003002003, 1003001003]
-    bls = [1002, 2003, 1003]
-    Nbls = len(bls)
-    Nblpairs = len(blpairs)
-    Nblpairts = Nblpairs * Ntimes
-
-    blpair_array = np.tile(blpairs, Ntimes)
-    bl_array = np.array(bls)
-    bl_vecs = np.array([[  5.33391548e+00,  -1.35907816e+01,  -7.91624188e-09],
-                        [ -8.67982998e+00,   4.43554478e+00,  -1.08695203e+01],
-                        [ -3.34591450e+00,  -9.15523687e+00,  -1.08695203e+01]])
-    time_array = np.repeat(np.linspace(2458042.1, 2458042.2, Ntimes), Nblpairs)
-    time_1_array = time_array
-    time_2_array = time_array
-    lst_array = np.repeat(np.ones(Ntimes, dtype=np.float), Nblpairs)
-    lst_1_array = lst_array
-    lst_2_array = lst_array
-    time_avg_array = time_array
-    lst_avg_array = lst_array
-    spws = np.arange(Nspws)
-    spw_array = np.tile(spws, Ndlys)
-    freq_array = np.repeat(np.linspace(100e6, 105e6, Nfreqs, endpoint=False), Nspws)
-    dly_array = np.fft.fftshift(np.repeat(np.fft.fftfreq(Nfreqs, np.median(np.diff(freq_array))), Nspws))
-    pol_array = np.array([-5])
-    Npols = len(pol_array)
-    vis_units = 'unknown'
-    norm_units = 'Hz str'
-    weighting = 'identity'
-    channel_width = np.median(np.diff(freq_array))
-    history = 'example'
-    taper = "none"
-    norm = "I"
-    git_hash = "random"
-    scalar_array = np.ones((Nspws, Npols), np.float)
-    label1 = 'red'
-    #label2 = 'blue' # Leave commented out to make sure non-named UVPSpecs work!
-    if beam is not None:
-        OmegaP, OmegaPP = beam.get_Omegas(beam.primary_beam.polarization_array[0])
-        beam_freqs = beam.beam_freqs
-
-    telescope_location = np.array([5109325.85521063, 
-                                   2005235.09142983, 
-                                  -3239928.42475397])
-
-    cosmo = conversions.Cosmo_Conversions()
-
-    data_array, wgt_array, integration_array, nsample_array = {}, {}, {}, {}
-    for s in spws:
-        data_array[s] = np.ones((Nblpairts, Ndlys, Npols), dtype=np.complex) \
-                      * blpair_array[:, None, None] / 1e9
-        wgt_array[s] = np.ones((Nblpairts, Ndlys, 2, Npols), dtype=np.float)
-        integration_array[s] = np.ones((Nblpairts, Npols), dtype=np.float)
-        nsample_array[s] = np.ones((Nblpairts, Npols), dtype=np.float)
-
-    params = ['Ntimes', 'Nfreqs', 'Nspws', 'Nspwdlys', 'Nblpairs', 'Nblpairts', 
-              'Npols', 'Ndlys', 'Nbls', 'blpair_array', 'time_1_array', 
-              'time_2_array', 'lst_1_array', 'lst_2_array', 'spw_array', 
-              'dly_array', 'freq_array', 'pol_array', 'data_array', 'wgt_array',
-              'integration_array', 'bl_array', 'bl_vecs', 'telescope_location', 
-              'vis_units', 'channel_width', 'weighting', 'history', 'taper', 'norm', 
-              'git_hash', 'nsample_array', 'time_avg_array', 'lst_avg_array', 
-              'cosmo', 'scalar_array', 'label1', 'norm_units']
-    if beam is not None:
-        params += ['OmegaP', 'OmegaPP', 'beam_freqs']
-    
-    # Set all parameters
-    for p in params:
-        setattr(uvp, p, locals()[p])
-    
-    return uvp, cosmo
+from pyuvdata import UVData
 
 
 class Test_UVPSpec(unittest.TestCase):
@@ -100,13 +17,9 @@ class Test_UVPSpec(unittest.TestCase):
     def setUp(self):
         beamfile = os.path.join(DATA_PATH, 'NF_HERA_Beams.beamfits')
         self.beam = pspecbeam.PSpecBeamUV(beamfile)
-        uvp, cosmo = build_example_uvpspec(beam=self.beam)
-        uvp.check()
+        uvp, cosmo = testing.build_vanilla_uvpspec(beam=self.beam)
         self.uvp = uvp
 
-        # test equivalence
-        nt.assert_equal(uvp, uvp)
-
     def tearDown(self):
         pass
 
@@ -116,6 +29,10 @@ def runTest(self):
     def test_param(self):
         a = parameter.PSpecParam("example", description="example", expected_type=int)
 
+    def test_eq(self):
+        # test equivalence
+        nt.assert_equal(self.uvp, self.uvp)
+
     def test_get_funcs(self):
         # get_data
         d = self.uvp.get_data((0, ((1, 2), (1, 2)), 'xx'))
@@ -400,6 +317,77 @@ def test_set_cosmology(self):
         nt.assert_equal(uvp.cosmo, new_cosmo2)
         nt.assert_true(hasattr(uvp, 'OmegaP'))
 
+    def test_concate_uvps(self):
+        # setup uvp build
+        uvd = UVData()
+        uvd.read_miriad(os.path.join(DATA_PATH, 'zen.even.xx.LST.1.28828.uvOCRSA'))
+        beam = pspecbeam.PSpecBeamUV(os.path.join(DATA_PATH, "NF_HERA_Beams.beamfits"))
+        bls = [(37, 38), (38, 39), (52, 53)]
+        uvp1 = testing.build_uvpspec_from_data(uvd, bls, spw_ranges=[(20, 30), (60, 90)], beam=beam)
+
+        # test failure due to overlapping data
+        uvp2 = copy.deepcopy(uvp1)
+        nt.assert_raises(AssertionError, uvpspec.concate_uvp, [uvp1, uvp2])
+        # test success across pol
+        uvp2.pol_array[0] = -6
+        out = uvpspec.concate_uvp([uvp1, uvp2], verbose=False)
+        nt.assert_equal(out.Npols, 2)
+        nt.assert_true(len(set(out.pol_array) ^ set([-5, -6])) == 0)
+
+        # test multiple non-overlapping data axes
+        uvp2.freq_array[0] = 0.0
+        nt.assert_raises(AssertionError, uvpspec.concate_uvp, [uvp1, uvp2])
+
+        # test partial data overlap failure
+        uvp2 = testing.build_uvpspec_from_data(uvd, [(37, 38), (38, 39), (53, 54)], spw_ranges=[(20, 30), (60, 90)], beam=beam)
+        nt.assert_raises(AssertionError, uvpspec.concate_uvp, [uvp1, uvp2])
+        uvp2 = testing.build_uvpspec_from_data(uvd, bls, spw_ranges=[(20, 30), (60, 105)], beam=beam)
+        nt.assert_raises(AssertionError, uvpspec.concate_uvp, [uvp1, uvp2])
+        uvp2 = copy.deepcopy(uvp1)
+        uvp2.pol_array[0] = -6
+        uvp2 = uvpspec.concate_uvp([uvp1, uvp2], verbose=False)
+        nt.assert_raises(AssertionError, uvpspec.concate_uvp, [uvp1, uvp2])
+
+        # test concat across spw
+        uvp2 = testing.build_uvpspec_from_data(uvd, bls, spw_ranges=[(85, 101)], beam=beam)
+        out = uvpspec.concate_uvp([uvp1, uvp2], verbose=False)
+        nt.assert_equal(out.Nspws, 3)
+        nt.assert_equal(out.Nfreqs, 51)
+        nt.assert_equal(out.Nspwdlys, 56)
+
+        # test concat across blpairts
+        uvp2 = testing.build_uvpspec_from_data(uvd, [(53, 54), (67, 68)], spw_ranges=[(20, 30), (60, 90)], beam=beam)
+        out = uvpspec.concate_uvp([uvp1, uvp2], verbose=False)
+        nt.assert_equal(out.Nblpairs, 8)
+        nt.assert_equal(out.Nbls, 5)
+
+        # test failure due to variable static metadata
+        uvp2.weighting = 'foo'
+        nt.assert_raises(AssertionError, uvpspec.concate_uvp, [uvp1, uvp2])
+        uvp2.weighting = 'identity'
+        del uvp2.OmegaP
+        del uvp2.OmegaPP
+        nt.assert_raises(AssertionError, uvpspec.concate_uvp, [uvp1, uvp2])
+
+        # test feed as strings
+        uvp1 = testing.build_uvpspec_from_data(uvd, bls, spw_ranges=[(20, 30)], beam=beam)
+        uvp2 = copy.deepcopy(uvp1)
+        uvp2.pol_array[0] = -6
+        uvp1.write_hdf5('uvp1.hdf5', overwrite=True)
+        uvp2.write_hdf5('uvp2.hdf5', overwrite=True)
+        out = uvpspec.concate_uvp(['uvp1.hdf5', 'uvp2.hdf5'], verbose=False)
+        nt.assert_true(out.Npols, 2)
+        for ff in ['uvp1.hdf5', 'uvp2.hdf5']:
+            if os.path.exists(ff):
+                os.remove(ff)
+
+        # test UVPSpec __add__
+        uvp3 = copy.deepcopy(uvp1)
+        uvp3.pol_array[0] = -7
+        out = uvp1 + uvp2 + uvp3
+        nt.assert_equal(out.Npols, 3)
+
+
 def test_conj_blpair_int():
     conj_blpair = uvputils._conj_blpair_int(1002003004)
     nt.assert_equal(conj_blpair, 3004001002)
diff --git a/hera_pspec/utils.py b/hera_pspec/utils.py
index c9f3ab61..b48bbe5a 100644
--- a/hera_pspec/utils.py
+++ b/hera_pspec/utils.py
@@ -232,3 +232,8 @@ def load_config(config_file):
             raise(exc)
     return cfg
 
+def flatten(nested_list):
+    """
+    Flatten a list of nested lists
+    """
+    return [item for sublist in nested_list for item in sublist]
diff --git a/hera_pspec/uvpspec.py b/hera_pspec/uvpspec.py
index eedde99c..c7220c2a 100644
--- a/hera_pspec/uvpspec.py
+++ b/hera_pspec/uvpspec.py
@@ -1,7 +1,7 @@
 import numpy as np
 from collections import OrderedDict as odict
 import os, copy, shutil, operator, ast, fnmatch
-from hera_pspec import conversions, noise, version, pspecbeam, grouping
+from hera_pspec import conversions, noise, version, pspecbeam, grouping, utils
 from hera_pspec import uvpspec_utils as uvputils
 from hera_pspec.parameter import PSpecParam
 from pyuvdata import uvutils as uvutils
@@ -26,7 +26,7 @@ def __init__(self):
         self._Nspwdlys = PSpecParam("Nspwdlys", description="Total number of delay bins across all sub-bands.", expected_type=int)
         self._Nspws = PSpecParam("Nspws", description="Number of spectral windows.", expected_type=int)
         self._Ndlys = PSpecParam("Ndlys", description="Total number of delay bins.", expected_type=int)
-        self._Nfreqs = PSpecParam("Nfreqs", description="Total number of frequency bins in the data.", expected_type=int)
+        self._Nfreqs = PSpecParam("Nfreqs", description="Total number of unique frequency bins in the data.", expected_type=int)
         self._Npols = PSpecParam("Npols", description="Number of polarizations in the data.", expected_type=int)
         self._history = PSpecParam("history", description='The file history.', expected_type=str)
 
@@ -42,18 +42,18 @@ def __init__(self):
         desc = "Nsample dictionary, if the pspectra have been incoherently averaged (i.e. averaged after squaring), this is " \
                "the effective number of samples in that average (float type). This is not the same as the pyuvdata.UVData nsample_array."
         self._nsample_array = PSpecParam("nsample_array", description=desc, expected_type=np.float64, form="(Nblpairts, Npols)")
-        self._spw_array = PSpecParam("spw_array", description="Spw integer array.", form="(Nspwdlys,)", expected_type=np.int32)
+        self._spw_array = PSpecParam("spw_array", description="Spw integer array.", form="(Nspwdlys,)", expected_type=np.uint16)
         self._freq_array = PSpecParam("freq_array", description="Frequency array of the original data in Hz.", form="(Nfreqs,)", expected_type=np.float64)
         self._dly_array = PSpecParam("dly_array", description="Delay array in seconds.", form="(Nspwdlys,)", expected_type=np.float64)
         desc = "Polarization integers of power spectra. Stokes 1:4 (I,Q,U,V); circular -1:-4 (RR,LL,RL,LR); linear -5:-8 (XX,YY,XY,YX)"
-        self._pol_array = PSpecParam("pol_array", description=desc, form="(Npols,)", expected_type=np.ndarray)
+        self._pol_array = PSpecParam("pol_array", description=desc, form="(Npols,)", expected_type=np.int32)
         self._lst_1_array = PSpecParam("lst_1_array", description="LST array of the first bl in the bl-pair [radians].", form="(Nblpairts,)", expected_type=np.float64)
         self._lst_2_array = PSpecParam("lst_2_array", description="LST array of the second bl in the bl-pair [radians].", form="(Nblpairts,)", expected_type=np.float64)
         self._lst_avg_array = PSpecParam("lst_avg_array", description="Average of the lst_1_array and lst_2_array [radians].", form="(Nblpairts,)", expected_type=np.float64)
         self._time_1_array = PSpecParam("time_1_array", description="Time array of the first bl in the bl-pair [Julian Date].", form="(Nblpairts,)", expected_type=np.float64)
         self._time_1_array = PSpecParam("time_2_array", description="Time array of the second bl in the bl-pair [Julian Date].", form="(Nblpairts,)", expected_type=np.float64)
         self._time_avg_array = PSpecParam("time_avg_array", description="Average of the time_1_array and time_2_array [Julian Date].", form='(Nblpairts,)', expected_type=np.float64)
-        self._blpair_array = PSpecParam("blpair_array", description="Baseline-pair integer for all baseline-pair times.", form="(Nblpairts,)", expected_type=np.int32)
+        self._blpair_array = PSpecParam("blpair_array", description="Baseline-pair integer for all baseline-pair times.", form="(Nblpairts,)", expected_type=np.int64)
         self._scalar_array = PSpecParam("scalar_array", description="Power spectrum normalization scalar from pspecbeam module.", expected_type=np.float64, form="(Nspws, Npols)")
 
         # Baseline attributes
@@ -70,8 +70,8 @@ def __init__(self):
         self._vis_units = PSpecParam("vis_units", description="Units of the original visibility data used to form the power spectra.", expected_type=str)
         self._norm_units = PSpecParam("norm_units", description="Power spectra normalization units, i.e. telescope units [Hz str] or cosmological [(h^-3) Mpc^3].", expected_type=str)
         self._labels = PSpecParam("labels", description="Array of dataset string labels.", expected_type=np.str)
-        self._label_1_array = PSpecParam("label_1_array", description="Integer array w/ shape of data that indexes labels and gives label of dset1.", form="(Nspws, Nblpairts, Npols)", expected_type=np.int)
-        self._label_2_array = PSpecParam("label_2_array", description="Integer array w/ shape of data that indexes labels and gives label of dset2.", form="(Nspws, Nblpairts, Npols)", expected_type=np.int)
+        self._label_1_array = PSpecParam("label_1_array", description="Integer array w/ shape of data that indexes labels and gives label of dset1.", form="(Nspws, Nblpairts, Npols)", expected_type=np.int32)
+        self._label_2_array = PSpecParam("label_2_array", description="Integer array w/ shape of data that indexes labels and gives label of dset2.", form="(Nspws, Nblpairts, Npols)", expected_type=np.int32)
         self._folded = PSpecParam("folded", description="if power spectra are folded (i.e. averaged) onto purely positive delay axis. Default is False", expected_type=bool)
         self._beamfile = PSpecParam("beamfile", description="filename of beam-model used to normalized pspectra.", expected_type=str)
         self._OmegaP = PSpecParam("OmegaP", description="Integral of unitless beam power over the sky [steradians].", form="(Nbeam_freqs, Npols)", expected_type=np.float64)
@@ -88,8 +88,9 @@ def __init__(self):
                             "spw_array", "freq_array", "dly_array", "pol_array", "lst_1_array",
                             "lst_2_array", "time_1_array", "time_2_array", "blpair_array", "Nbls",
                             "bl_vecs", "bl_array", "channel_width", "telescope_location", "weighting",
-                            "vis_units", "norm_units", "taper", "norm", "nsample_array",
-                            'lst_avg_array', 'time_avg_array', 'folded', "scalar_array"]
+                            "vis_units", "norm_units", "taper", "norm", "nsample_array", 'lst_avg_array', 
+                            'time_avg_array', 'folded', "scalar_array", "labels", "label_1_array",
+                            "label_2_array"]
 
         # all parameters must fall into one and only one of the following groups, which are used in __eq__
         self._immutables = ["Ntimes", "Nblpairts", "Nblpairs", "Nspwdlys", "Nspws", "Ndlys",
@@ -264,7 +265,7 @@ def get_blpair_seps(self):
         # construct blpair_bls
         blpairs = np.unique(self.blpair_array)
         bl1 = np.floor(blpairs / 1e6)
-        blpair_bls = np.vstack([bl1, blpairs - bl1*1e6]).astype(np.int).T
+        blpair_bls = np.vstack([bl1, blpairs - bl1*1e6]).astype(np.int32).T
 
         # iterate over blpairs
         for blp, bl in zip(blpairs, blpair_bls):
@@ -333,10 +334,15 @@ def get_kparas(self, spw, little_h=True):
 
         return k_para
 
-    def get_spw_ranges(self):
+    def get_spw_ranges(self, spws=None):
         """
         Get the frequency range and Nfreqs of each spectral window in the object.
 
+        Parameters
+        ----------
+        spws : list of spw integers, optional
+            Default is all spws present in data.
+
         Returns
         -------
         spw_ranges : list of len-3 tuples (freq_start, freq_end, Nfreqs) in Hz
@@ -344,9 +350,14 @@ def get_spw_ranges(self):
             To turn this into the frequency array of the spectral window use 
                 spw_freqs = np.linspace(freq_start, freq_end, Nfreqs, endpoint=False)
         """
+        # type check
+        if isinstance(spws, (int, np.integer)):
+            spws = [spws]
+        if spws is None:
+            spws = np.arange(self.Nspws)
         spw_ranges = []
         # iterate over spectral windows
-        for spw in np.unique(self.spw_array):
+        for spw in spws:
             spw_freqs = self.freq_array[self.spw_to_indices(spw)]
             Nfreqs = len(spw_freqs)
             spw_df = np.median(np.diff(spw_freqs))
@@ -471,7 +482,7 @@ def blpair_to_indices(self, blpair):
         # convert blpair to integer if fed as tuple
         if isinstance(blpair, tuple):
             blpair = [self.antnums_to_blpair(blpair)]
-        elif isinstance(blpair, (np.int, int)):
+        elif isinstance(blpair, (np.integer, int)):
             blpair = [blpair]
         elif isinstance(blpair, list):
             if isinstance(blpair[0], tuple):
@@ -527,7 +538,7 @@ def pol_to_indices(self, pol):
         # convert pol to int if str
         if isinstance(pol, (str, np.str)):
             pol = [uvutils.polstr2num(pol)]
-        elif isinstance(pol, (int, np.int)):
+        elif isinstance(pol, (int, np.integer)):
             pol = [pol]
         elif isinstance(pol, (list, tuple)):
             for i in range(len(pol)):
@@ -567,7 +578,7 @@ def time_to_indices(self, time, blpairs=None):
             return np.arange(self.Nblpairts)[time_select]
         else:
             blp_select = np.zeros(self.Nblpairts, np.bool)
-            if isinstance(blpairs, (tuple, int, np.int)):
+            if isinstance(blpairs, (tuple, int, np.integer)):
                 blpairs = [blpairs]
             for blp in blpairs:
                 blp_select[self.blpair_to_indices(blp)] = True
@@ -623,7 +634,7 @@ def key_to_indices(self, key, *args):
                 key = (key['spw'], key['blpair'], key['pol'])
         elif len(args) > 0:
             assert len(args) == 2, "length of key must be 3."
-            assert isinstance(args[0], (tuple, int, np.int)) and isinstance(args[1], (np.str, str, int, np.int)), "key must be ordered as (spw, blpair, pol)"
+            assert isinstance(args[0], (tuple, int, np.integer)) and isinstance(args[1], (np.str, str, int, np.integer)), "key must be ordered as (spw, blpair, pol)"
             key = (key, args[0], args[1])
 
         # assign key elements
@@ -632,9 +643,9 @@ def key_to_indices(self, key, *args):
         pol = key[2]
 
         # assert types
-        assert isinstance(spw, (int, np.int)), "spw must be an integer"
-        assert isinstance(blpair, (int, np.int, tuple)), "blpair must be an integer or nested tuple"
-        assert isinstance(pol, (np.str, str, np.int, int)), "pol must be a string or integer"
+        assert isinstance(spw, (int, np.integer)), "spw must be an integer"
+        assert isinstance(blpair, (int, np.integer, tuple)), "blpair must be an integer or nested tuple"
+        assert isinstance(pol, (np.str, str, np.integer, int)), "pol must be a string or integer"
 
         # convert blpair to int if not int
         if type(blpair) == tuple:
@@ -796,7 +807,7 @@ def read_from_group(self, grp, just_meta=False, spws=None, bls=None,
             self.cosmo = conversions.Cosmo_Conversions(**ast.literal_eval(self.cosmo))
 
         self.check(just_meta=just_meta)
-    
+
 
     def read_hdf5(self, filepath, just_meta=False, spws=None, bls=None,
                   blpairs=None, times=None, pols=None, 
@@ -1046,7 +1057,11 @@ def check(self, just_meta=False):
                             assert issubclass(getattr(self, p)[k].dtype.type, a.expected_type), err_msg
                     # immutables
                     elif p in self._immutables:
-                        assert isinstance(getattr(self, p), a.expected_type), err_msg
+                        if not isinstance(getattr(self, p), a.expected_type):
+                            try:
+                                setattr(self, p, a.expected_type(getattr(self, p)))
+                            except:
+                                raise AssertionError(err_msg)
 
     def _clear(self):
         """
@@ -1090,7 +1105,10 @@ def __eq__(self, other, params=None, verbose=False):
                 if p in self._immutables:
                     assert getattr(self, p) == getattr(other, p)
                 elif p in self._ndarrays:
-                    assert np.isclose(getattr(self, p), getattr(other, p)).all()
+                    if issubclass(getattr(self, p).dtype.type, np.str):
+                        assert np.all(getattr(self, p) == getattr(other, p))
+                    else:
+                        assert np.isclose(getattr(self, p), getattr(other, p)).all()
                 elif p in self._dicts:
                     for i in getattr(self, p):
                         assert np.isclose(getattr(self, p)[i], \
@@ -1105,7 +1123,7 @@ def __eq__(self, other, params=None, verbose=False):
 
     def __add__(self, other, verbose=False):
         """ Concatenate the data of two UVPSpec objects together along a single axis """
-        self = combine_uvp([self, other], verbose=verbose)
+        return concate_uvp([self, other], verbose=verbose)
 
     @property
     def units(self):
@@ -1181,7 +1199,7 @@ def generate_noise_spectra(self, spw, pol, Tsys, blpairs=None, little_h=True,
             (Ntimes, Ndlys).
         """
         # Assert polarization type
-        if isinstance(pol, (np.int, int)):
+        if isinstance(pol, (np.integer, int)):
             pol = uvutils.polnum2str(pol)
 
         # Get polarization index
@@ -1415,9 +1433,9 @@ def compute_scalar(self, spw, pol, num_steps=1000, little_h=True,
         return scalar
 
 
-def combine_uvp(uvps, verbose=True):
+def concate_uvp(uvps, verbose=True):
     """
-    Combine multiple UVPSpec objects into a single object, concatenating along one of
+    Concatenate multiple UVPSpec objects into a single object, combining along one of
     either spectral window [spw], baseline-pair-times [blpairts], or polarization [pol].
     Certain meta-data of all of the UVPSpec objs must match exactly. 
     In addition, one can only concatenate data along a single data axis, with the condition 
@@ -1435,7 +1453,7 @@ def combine_uvp(uvps, verbose=True):
     """
     # perform type checks and get concatenation axis
     (uvps, concat_ax, new_spws, new_blpts, new_pols,
-     static_meta) = get_uvp_overlap(uvps, verbose=verbose)
+     static_meta) = get_uvp_overlap(uvps, just_meta=False, verbose=verbose)
     Nuvps = len(uvps)
 
     # create a new uvp
@@ -1464,7 +1482,7 @@ def combine_uvp(uvps, verbose=True):
         spw_Nfreqs = spw[-1]
         spw_freqs = np.linspace(*spw, endpoint=False)
         spw_dlys = np.fft.fftshift(np.fft.fftfreq(spw_Nfreqs, np.median(np.diff(spw_freqs))))
-        u.spw_array.extend(np.ones(spw_Nfreqs, np.int) * i)
+        u.spw_array.extend(np.ones(spw_Nfreqs, np.int32) * i)
         u.freq_array.extend(spw_freqs)
         u.dly_array.extend(spw_dlys)
     u.spw_array = np.array(u.spw_array)       
@@ -1474,16 +1492,16 @@ def combine_uvp(uvps, verbose=True):
     u.Nspws = Nspws
     u.Nblpairts = Nblpairts
     u.Npols = Npols
-    u.Nfreqs = len(u.freq_array)
+    u.Nfreqs = len(np.unique(u.freq_array))
     u.Nspwdlys = len(u.spw_array)
     u.Ndlys = len(np.unique(u.dly_array))
     u.time_1_array, u.time_2_array = np.empty(Nblpairts, np.float), np.empty(Nblpairts, np.float)
     u.time_avg_array, u.lst_avg_array = np.empty(Nblpairts, np.float), np.empty(Nblpairts, np.float)
     u.lst_1_array, u.lst_2_array = np.empty(Nblpairts, np.float), np.empty(Nblpairts, np.float)
-    u.blpair_array = np.empty(Nblpairts, np.int)
+    u.blpair_array = np.empty(Nblpairts, np.int64)
     u.labels = sorted(set(np.concatenate([uvp.labels for uvp in uvps])))
-    u.label_1_array = np.empty((Nspws, Nblpairts, Npols), np.int)
-    u.label_2_array = np.empty((Nspws, Nblpairts, Npols), np.int)
+    u.label_1_array = np.empty((Nspws, Nblpairts, Npols), np.int32)
+    u.label_2_array = np.empty((Nspws, Nblpairts, Npols), np.int32)
 
     # get each uvp's data axes
     uvp_spws = [_uvp.get_spw_ranges() for _uvp in uvps]
@@ -1591,16 +1609,20 @@ def combine_uvp(uvps, verbose=True):
     return u
 
 
-def get_uvp_overlap(uvps, verbose=True):
+def get_uvp_overlap(uvps, just_meta=True, verbose=True):
     """
     Given a list of UVPSpec objects or a list of paths to UVPSpec objects,
-    find a single data axis ['spw', 'blpairts', 'pol'] where all objects contain 
-    non-overlapping data.
+    find a single data axis ['spw', 'blpairts', 'pol'] where *all* objects contain 
+    completely non-overlapping data. Overlapping data are delay spectra that
+    have identical spw, blpair-time and pol metadata between each other.
 
     Parameters
     ----------
     uvps : list of UVPSpec objects or list of string paths to UVPSpec objects
 
+    just_meta : boolean, optional
+        If uvps contain list of strings, load only the metadata in to get uvp overlap.
+
     verbose : bool, optional
         print feedback to standard output
 
@@ -1633,7 +1655,7 @@ def get_uvp_overlap(uvps, verbose=True):
         _uvps = []
         for u in uvps:
             uvp = UVPSpec()
-            uvp.read_hdf5(u, just_meta=True)
+            uvp.read_hdf5(u, just_meta=just_meta)
             _uvps.append(uvp)
         uvps = _uvps
 
@@ -1653,7 +1675,7 @@ def get_uvp_overlap(uvps, verbose=True):
     unique_spws = []
     unique_blpts = []
     unique_pols = []
-    data_overlaps = odict()
+    data_concat_axes = odict()
 
     # iterate over uvps
     for i, uvp1 in enumerate(uvps):
@@ -1671,36 +1693,39 @@ def get_uvp_overlap(uvps, verbose=True):
         # iterate over uvps
         for j, uvp2 in enumerate(uvps):
             if j <= i: continue
-            # get uvp1 sets and determine overlap w/ uvp1
+            # get uvp2 sets
             uvp2_spws = uvp2.get_spw_ranges()
             uvp2_blpts = zip(uvp2.blpair_array, uvp2.time_avg_array)
             uvp2_pols = uvp2.pol_array
 
-            spw_overlap = len(set(uvp1_spws) ^ set(uvp2_spws)) == 0
-            blpts_overlap = len(set(uvp1_blpts) ^ set(uvp2_blpts)) == 0
-            pol_overlap = len(set(uvp1_pols) ^ set(uvp2_pols)) == 0
+            # determine if uvp1 and uvp2 are an identical match
+            spw_match = len(set(uvp1_spws) ^ set(uvp2_spws)) == 0
+            blpts_match = len(set(uvp1_blpts) ^ set(uvp2_blpts)) == 0
+            pol_match = len(set(uvp1_pols) ^ set(uvp2_pols)) == 0
+
+            # ensure no partial-overlaps
+            if not spw_match:
+                assert len(set(uvp1_spws) & set(uvp2_spws)) == 0, "uvp {} and {} have partial overlap across spw, cannot combine".format(i, j)
+            if not blpts_match:
+                assert len(set(uvp1_blpts) & set(uvp2_blpts)) == 0, "uvp {} and {} have partial overlap across blpairts, cannot combine".format(i, j)
+            if not pol_match:
+                assert len(set(uvp1_pols) & set(uvp2_pols)) == 0, "uvp {} and {} have partial overlap across pol, cannot combine".format(i, j)
 
             # assert all except 1 axis overlaps
-            overlaps = [spw_overlap, blpts_overlap, pol_overlap]
-            assert sum(overlaps) != 3, "uvp {} and {} have overlapping data, cannot combine".format(i, j)
-            assert sum(overlaps) > 1, "uvp {} and {} are non-overlapping across multiple data axes, cannot combine".format(i, j)
-            concat_ax = ['spw', 'blpairts', 'pol'][overlaps.index(False)]
-            data_overlaps[(i, j)] = concat_ax
+            matches = [spw_match, blpts_match, pol_match]
+            assert sum(matches) != 3, "uvp {} and {} have completely overlapping data, cannot combine".format(i, j)
+            assert sum(matches) > 1, "uvp {} and {} are non-overlapping across multiple data axes, cannot combine".format(i, j)
+            concat_ax = ['spw', 'blpairts', 'pol'][matches.index(False)]
+            data_concat_axes[(i, j)] = concat_ax
             if verbose:
-                print "uvp {} and {} have non-overlap across {} axis".format(i, j, concat_ax)
+                print "uvp {} and {} are concatable across {} axis".format(i, j, concat_ax)
 
     # assert all uvp pairs have the same (single) non-overlap (concat) axis
     err_msg = "Non-overlapping data in uvps span multiple data axes:\n{}" \
-              "".format("\n".join(map(lambda i: "{} & {}: {}".format(i[0][0], i[0][1], i[1]), data_overlaps.items())))
-    assert len(set(data_overlaps.values())) == 1, err_msg
+              "".format("\n".join(map(lambda i: "{} & {}: {}".format(i[0][0], i[0][1], i[1]), data_concat_axes.items())))
+    assert len(set(data_concat_axes.values())) == 1, err_msg
 
     # perform additional checks given concat ax
-    if concat_ax == 'spw':
-        # check for spws overlapping in freq
-        for i, spw1 in enumerate(unique_spws):
-            for j, spw2 in enumerate(unique_spws[i+1:]):
-                assert not (spw1[1] > spw2[0] and spw1[0] < spw2[1]), "cannot combine overlapping spws: {} & {}".format(spw1, spw2)
-
     if concat_ax == 'blpairts':
         # check scalar_array
         assert np.all([np.isclose(uvps[0].scalar_array, u.scalar_array) for u in uvps[1:]]), "" \

From 2fc1417ae2e7e4746f04e295e7b499d2432c1745 Mon Sep 17 00:00:00 2001
From: Nicholas Kern <nkern@berkeley.edu>
Date: Fri, 18 May 2018 10:27:18 -0700
Subject: [PATCH 3/6] updated test_container w/ testing.py

---
 hera_pspec/tests/test_container.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hera_pspec/tests/test_container.py b/hera_pspec/tests/test_container.py
index b5041644..d159b9bb 100644
--- a/hera_pspec/tests/test_container.py
+++ b/hera_pspec/tests/test_container.py
@@ -3,14 +3,14 @@
 import numpy as np
 import os, sys, copy
 from hera_pspec.data import DATA_PATH
-from hera_pspec import PSpecContainer, UVPSpec
-from test_uvpspec import build_example_uvpspec
+from hera_pspec import PSpecContainer, UVPSpec, testing
+
 
 class Test_PSpecContainer(unittest.TestCase):
     
     def setUp(self):
         self.fname = os.path.join(DATA_PATH, '_test_container.hdf5')
-        self.uvp, self.cosmo = build_example_uvpspec()
+        self.uvp, self.cosmo = testing.build_vanilla_uvpspec()
 
     def tearDown(self):
         # Remove HDF5 file

From 21ab6f02d6a28f1d1e42308d613046cf2840e31e Mon Sep 17 00:00:00 2001
From: Nicholas Kern <nkern@berkeley.edu>
Date: Fri, 18 May 2018 10:28:56 -0700
Subject: [PATCH 4/6] updated uvpspec_utils w/ np.integer type

---
 hera_pspec/uvpspec_utils.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/hera_pspec/uvpspec_utils.py b/hera_pspec/uvpspec_utils.py
index 01d28636..feb3d8e3 100644
--- a/hera_pspec/uvpspec_utils.py
+++ b/hera_pspec/uvpspec_utils.py
@@ -25,15 +25,15 @@ def _get_blpairs_from_bls(uvp, bls, only_pairs_in_bls=False):
     """
     # get blpair baselines in integer form
     bl1 = np.floor(uvp.blpair_array / 1e6)
-    blpair_bls = np.vstack([bl1, uvp.blpair_array - bl1*1e6]).astype(np.int).T
+    blpair_bls = np.vstack([bl1, uvp.blpair_array - bl1*1e6]).astype(np.int32).T
     # ensure bls is in integer form
     if isinstance(bls, tuple):
-        assert isinstance(bls[0], (int, np.int)), "bls must be fed as a list of baseline tuples Ex: [(1, 2), ...]"
+        assert isinstance(bls[0], (int, np.integer)), "bls must be fed as a list of baseline tuples Ex: [(1, 2), ...]"
         bls = [uvp.antnums_to_bl(bls)]
     elif isinstance(bls, list):
         if isinstance(bls[0], tuple):
             bls = map(lambda b: uvp.antnums_to_bl(b), bls)
-    elif isinstance(bls, (int, np.int)):
+    elif isinstance(bls, (int, np.integer)):
         bls = [bls]
     # get indices
     if only_pairs_in_bls:
@@ -87,14 +87,14 @@ def _select(uvp, spws=None, bls=None, only_pairs_in_bls=False, blpairs=None, tim
     if bls is not None:
         # get blpair baselines in integer form
         bl1 = np.floor(uvp.blpair_array / 1e6)
-        blpair_bls = np.vstack([bl1, uvp.blpair_array - bl1*1e6]).astype(np.int).T
+        blpair_bls = np.vstack([bl1, uvp.blpair_array - bl1*1e6]).astype(np.int32).T
         blp_select = _get_blpairs_from_bls(uvp, bls, only_pairs_in_bls=only_pairs_in_bls)
 
     if blpairs is not None:
         if bls is None:
             blp_select = np.zeros(uvp.Nblpairts, np.bool)
         # assert form
-        assert isinstance(blpairs[0], (tuple, int, np.int)), "blpairs must be fed as a list of baseline-pair tuples or baseline-pair integers"
+        assert isinstance(blpairs[0], (tuple, int, np.integer)), "blpairs must be fed as a list of baseline-pair tuples or baseline-pair integers"
         # if fed as list of tuples, convert to integers
         if isinstance(blpairs[0], tuple):
             blpairs = map(lambda blp: uvp.antnums_to_blpair(blp), blpairs)
@@ -133,7 +133,7 @@ def _select(uvp, spws=None, bls=None, only_pairs_in_bls=False, blpairs=None, tim
 
     if pols is not None:
         # assert form
-        assert isinstance(pols[0], (str, np.str, int, np.int)), "pols must be fed as a list of pol strings or pol integers"
+        assert isinstance(pols[0], (str, np.str, int, np.integer)), "pols must be fed as a list of pol strings or pol integers"
 
         # if fed as strings convert to integers
         if isinstance(pols[0], (np.str, str)):
@@ -287,7 +287,7 @@ def _blpair_to_bls(blpair):
     blpair : baseline-pair integer or nested antenna-pair tuples
     """
     # convert to antnums if fed as ints
-    if isinstance(blpair, (int, np.int)):
+    if isinstance(blpair, (int, np.integer)):
         blpair = _blpair_to_antnums(blpair)
 
     # convert first and second baselines to baseline ints

From 919ba58236017d4eae6da9307e1fea507cd5b256 Mon Sep 17 00:00:00 2001
From: Nicholas Kern <nkern@berkeley.edu>
Date: Fri, 18 May 2018 10:40:51 -0700
Subject: [PATCH 5/6] fixed test_utils

---
 hera_pspec/tests/test_utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hera_pspec/tests/test_utils.py b/hera_pspec/tests/test_utils.py
index 84491860..9cadd8cf 100644
--- a/hera_pspec/tests/test_utils.py
+++ b/hera_pspec/tests/test_utils.py
@@ -3,10 +3,10 @@
 import numpy as np
 import os, sys, copy
 from hera_pspec.data import DATA_PATH
-from hera_pspec import utils
+from hera_pspec import utils, testing
 from collections import OrderedDict as odict
 from pyuvdata import UVData
-from test_uvpspec import build_example_uvpspec
+
 
 def test_cov():
     # load another data file
@@ -60,7 +60,7 @@ def setUp(self):
                                           "zen.2458042.17772.xx.HH.uvXA"))
         
         # Create UVPSpec object
-        self.uvp, cosmo = build_example_uvpspec()
+        self.uvp, cosmo = testing.build_vanilla_uvpspec()
 
     def tearDown(self):
         pass

From 8d71f24bd2ec8dd3509eb18e3db0dad791b5ea9f Mon Sep 17 00:00:00 2001
From: Nicholas Kern <nkern@berkeley.edu>
Date: Tue, 22 May 2018 21:05:45 -0700
Subject: [PATCH 6/6] updated build_uvp in testing.py to reflect polarization
 updates

---
 hera_pspec/testing.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/hera_pspec/testing.py b/hera_pspec/testing.py
index 13b4aacc..de445417 100644
--- a/hera_pspec/testing.py
+++ b/hera_pspec/testing.py
@@ -140,6 +140,9 @@ def build_uvpspec_from_data(data, bls, spw_ranges=None, beam=None, taper='none',
     elif isinstance(data, UVData):
         uvd = data
 
+    # get pol
+    pol = uvd.polarization_array[0]
+
     # load beam
     if isinstance(beam, str):
         beam = pspecbeam.PSpecBeamUV(beam, cosmo=cosmo)
@@ -153,7 +156,7 @@ def build_uvpspec_from_data(data, bls, spw_ranges=None, beam=None, taper='none',
     bls1, bls2, _ = pspecdata.construct_blpairs(bls, exclude_auto_bls=True)
 
     # run pspec
-    uvp = ds.pspec(bls1, bls2, (0, 1), input_data_weight='identity', spw_ranges=spw_ranges, 
+    uvp = ds.pspec(bls1, bls2, (0, 1), (pol, pol), input_data_weight='identity', spw_ranges=spw_ranges, 
                    taper=taper, verbose=verbose)
 
     return uvp