From 25cb579940f944a61428258c57319d978aa6b9fe Mon Sep 17 00:00:00 2001 From: EXTERNAL-Ewall-Wice Date: Fri, 6 Jul 2018 13:46:57 -0700 Subject: [PATCH] Added covariance averaging. Passes unittests. Still needs validation --- hera_pspec/grouping.py | 415 +++++++++++++++++++----------------- hera_pspec/uvpspec.py | 219 ++++++++++--------- hera_pspec/uvpspec_utils.py | 65 +++--- 3 files changed, 376 insertions(+), 323 deletions(-) diff --git a/hera_pspec/grouping.py b/hera_pspec/grouping.py index e1a429ee..d1ef6403 100644 --- a/hera_pspec/grouping.py +++ b/hera_pspec/grouping.py @@ -6,38 +6,38 @@ import copy -def group_baselines(bls, Ngroups, keep_remainder=False, randomize=False, +def group_baselines(bls, Ngroups, keep_remainder=False, randomize=False, seed=None): """ Group baselines together into equal-sized sets. - - These groups can be passed into PSpecData.pspec(), where the corresponding - baselines will be averaged together (grouping reduces the number of + + These groups can be passed into PSpecData.pspec(), where the corresponding + baselines will be averaged together (grouping reduces the number of cross-spectra that need to be computed). - + Parameters ---------- bls : list of tuples Set of unique baselines tuples. - + Ngroups : int - Number of groups to create. The groups will be equal in size, except + Number of groups to create. The groups will be equal in size, except the last group (if there are remainder baselines). - + keep_remainder : bool, optional - Whether to keep remainder baselines that don't fit exactly into the - number of specified groups. If True, the remainder baselines are - appended to the last group in the output list. Otherwise, the remainder + Whether to keep remainder baselines that don't fit exactly into the + number of specified groups. If True, the remainder baselines are + appended to the last group in the output list. Otherwise, the remainder baselines are discarded. Default: False. - + randomize : bool, optional - Whether baselines should be added to groups in the order they appear in + Whether baselines should be added to groups in the order they appear in 'bls', or if they should be assigned at random. Default: False. - + seed : int, optional - Random seed to use if randomize=True. If None, no random seed will be + Random seed to use if randomize=True. If None, no random seed will be set. Default: None. - + Returns ------- grouped_bls : list of lists of tuples @@ -46,19 +46,19 @@ def group_baselines(bls, Ngroups, keep_remainder=False, randomize=False, Nbls = len(bls) # Total baselines n = Nbls / Ngroups # Baselines per group rem = Nbls - n*Ngroups - + # Sanity check on number of groups if Nbls < Ngroups: raise ValueError("Can't have more groups than baselines.") - + # Make sure only tuples were provided (can't have groups of groups) for bl in bls: assert isinstance(bl, tuple) - + # Randomize baseline order if requested if randomize: if seed is not None: random.seed(seed) bls = copy.deepcopy(bls) random.shuffle(bls) - + # Assign to groups sequentially grouped_bls = [bls[i*n:(i+1)*n] for i in range(Ngroups)] if keep_remainder and rem > 0: grouped_bls[-1] += bls[-rem:] @@ -67,120 +67,120 @@ def group_baselines(bls, Ngroups, keep_remainder=False, randomize=False, def sample_baselines(bls, seed=None): """ - Sample a set of baselines with replacement (to be used to generate a + Sample a set of baselines with replacement (to be used to generate a bootstrap sample). - + Parameters ---------- bls : list of either tuples or lists of tuples - Set of unique baselines to be sampled from. If groups of baselines - (contained in lists) are provided, each group will be treated as a - single object by the sampler; its members will not be sampled + Set of unique baselines to be sampled from. If groups of baselines + (contained in lists) are provided, each group will be treated as a + single object by the sampler; its members will not be sampled separately. - + seed : int, optional - Random seed to use if randomize=True. If None, no random seed will be + Random seed to use if randomize=True. If None, no random seed will be set. Default: None. - + Returns ------- sampled_bls : list of tuples or lists of tuples - Bootstrap-sampled set of baselines (will include multiple instances of + Bootstrap-sampled set of baselines (will include multiple instances of some baselines). """ if seed is not None: random.seed(seed) - + # Sample with replacement; return as many baselines/groups as were input return [random.choice(bls) for i in range(len(bls))] -def select_common(uvp_list, spws=True, blpairs=True, times=True, pols=True, +def select_common(uvp_list, spws=True, blpairs=True, times=True, pols=True, inplace=False): """ - Find spectral windows, baseline-pairs, times, and/or polarizations that a - set of UVPSpec objects have in common and return new UVPSpec objects that + Find spectral windows, baseline-pairs, times, and/or polarizations that a + set of UVPSpec objects have in common and return new UVPSpec objects that contain only those data. - + If there is no overlap, an error will be raised. - + Parameters ---------- uvp_list : list of UVPSpec List of input UVPSpec objects. - + spws : bool, optional - Whether to retain only the spectral windows that all UVPSpec objects - have in common. For a spectral window to be retained, the entire set of - delays in that window must match across all UVPSpec objects (this will + Whether to retain only the spectral windows that all UVPSpec objects + have in common. For a spectral window to be retained, the entire set of + delays in that window must match across all UVPSpec objects (this will not allow you to just pull out common delays). - - If set to False, the original spectral windows will remain in each + + If set to False, the original spectral windows will remain in each UVPSpec. Default: True. - + blpairs : bool, optional - Whether to retain only the baseline-pairs that all UVPSpec objects have + Whether to retain only the baseline-pairs that all UVPSpec objects have in common. Default: True. - + times : bool, optional - Whether to retain only the (average) times that all UVPSpec objects - have in common. This does not check to make sure that time_1 and time_2 - (i.e. the LSTs for the left-hand and right-hand visibilities that went - into the power spectrum calculation) are the same. See the + Whether to retain only the (average) times that all UVPSpec objects + have in common. This does not check to make sure that time_1 and time_2 + (i.e. the LSTs for the left-hand and right-hand visibilities that went + into the power spectrum calculation) are the same. See the UVPSpec.time_avg_array attribute for more details. Default: True. - + pols : bool, optional - Whether to retain only the polarizations that all UVPSpec objects have + Whether to retain only the polarizations that all UVPSpec objects have in common. Default: True. - + inplace : bool, optional - Whether the selection should be applied to the UVPSpec objects + Whether the selection should be applied to the UVPSpec objects in-place, or new copies of the objects should be returned. - + Returns ------- uvp_list : list of UVPSpec, optional - List of UVPSpec objects with the overlap selection applied. This will + List of UVPSpec objects with the overlap selection applied. This will only be returned if inplace = False. """ if len(uvp_list) < 2: raise IndexError("uvp_list must contain two or more UVPSpec objects.") - + # Get times that are common to all UVPSpec objects in the list if times: common_times = np.unique(uvp_list[0].time_avg_array) - has_times = [np.isin(common_times, uvp.time_avg_array) + has_times = [np.isin(common_times, uvp.time_avg_array) for uvp in uvp_list] common_times = common_times[np.all(has_times, axis=0)] print "common_times:", common_times - + # Get baseline-pairs that are common to all if blpairs: common_blpairs = np.unique(uvp_list[0].blpair_array) - has_blpairs = [np.isin(common_blpairs, uvp.blpair_array) + has_blpairs = [np.isin(common_blpairs, uvp.blpair_array) for uvp in uvp_list] common_blpairs = common_blpairs[np.all(has_blpairs, axis=0)] print "common_blpairs:", common_blpairs - + # Get polarizations that are common to all if pols: common_pols = np.unique(uvp_list[0].pol_array) has_pols = [np.isin(common_pols, uvp.pol_array) for uvp in uvp_list] common_pols = common_pols[np.all(has_pols, axis=0)] print "common_pols:", common_pols - + # Get common spectral windows (the entire window must match) # Each row of common_spws is a list of that spw's index in each UVPSpec if spws: common_spws = [] for spw in range(uvp_list[0].Nspws): dlys0 = uvp_list[0].get_dlys((spw,)) - + # Check if this window exists in all UVPSpec objects found_spws = [spw, ] missing = False for uvp in uvp_list: # Check if any spws in this UVPSpec match the current spw - matches_spw = np.array([ np.array_equal(dlys0, uvp.get_dlys((i,))) + matches_spw = np.array([ np.array_equal(dlys0, uvp.get_dlys((i,))) for i in range(uvp.Nspws) ]) if not np.any(matches_spw): missing = True @@ -188,92 +188,92 @@ def select_common(uvp_list, spws=True, blpairs=True, times=True, pols=True, else: # Store which spw of this UVPSpec it was found in found_spws.append( np.where(matches_spw)[0] ) - + # Only add this spw to the list if it was found in all UVPSpecs if missing: continue common_spws.append(found_spws) common_spws = np.array(common_spws).T # Transpose print "common_spws:", common_spws - + # Check that this won't be an empty selection if spws and len(common_spws) == 0: raise ValueError("No spectral windows were found that exist in all " "spectra (the entire spectral window must match).") - + if blpairs and len(common_blpairs) == 0: raise ValueError("No baseline-pairs were found that exist in all spectra.") - + if times and len(common_times) == 0: raise ValueError("No times were found that exist in all spectra.") - + if pols and len(common_pols) == 0: raise ValueError("No polarizations were found that exist in all spectra.") - + # Apply selections out_list = [] for i, uvp in enumerate(uvp_list): _spws, _blpairs, _times, _pols = None, None, None, None - + # Set indices of blpairs, times, and pols to keep if blpairs: _blpairs = common_blpairs if times: _times = common_times if pols: _pols = common_pols if spws: _spws = common_spws[i] - - _uvp = uvp.select(spws=_spws, blpairs=_blpairs, times=_times, + + _uvp = uvp.select(spws=_spws, blpairs=_blpairs, times=_times, pols=_pols, inplace=inplace) if not inplace: out_list.append(_uvp) - + # Return if not inplace if not inplace: return out_list -def average_spectra(uvp_in, blpair_groups=None, time_avg=False, +def average_spectra(uvp_in, blpair_groups=None, time_avg=False, blpair_weights=None, error_field=None, normalize_weights=True, inplace=True): """ - Average power spectra across the baseline-pair-time axis, weighted by + Average power spectra across the baseline-pair-time axis, weighted by each spectrum's integration time. - - This is an "incoherent" average, in the sense that this averages power - spectra, rather than visibility data. The 'nsample_array' and + + This is an "incoherent" average, in the sense that this averages power + spectra, rather than visibility data. The 'nsample_array' and 'integration_array' will be updated to reflect the averaging. - In the case of averaging across baseline pairs, the resultant averaged - spectrum is assigned to the zeroth blpair in the group. In the case of - time averaging, the time and LST arrays are assigned to the mean of the + In the case of averaging across baseline pairs, the resultant averaged + spectrum is assigned to the zeroth blpair in the group. In the case of + time averaging, the time and LST arrays are assigned to the mean of the averaging window. - Note that this is designed to be separate from spherical binning in k: - here we are not connecting k_perp modes to k_para modes. However, if - blpairs holds groups of iso baseline separation, then this is + Note that this is designed to be separate from spherical binning in k: + here we are not connecting k_perp modes to k_para modes. However, if + blpairs holds groups of iso baseline separation, then this is equivalent to cylindrical binning in 3D k-space. - If you want help constructing baseline-pair groups from baseline + If you want help constructing baseline-pair groups from baseline groups, see self.get_blpair_groups_from_bl_groups. Parameters ---------- uvp_in : UVPSpec Input power spectrum (to average over). - + blpair_groups : list of baseline-pair groups - List of list of tuples or integers. All power spectra in a - baseline-pair group are averaged together. If a baseline-pair + List of list of tuples or integers. All power spectra in a + baseline-pair group are averaged together. If a baseline-pair exists in more than one group, a warning is raised. - - Ex: blpair_groups = [ [((1, 2), (1, 2)), ((2, 3), (2, 3))], - [((4, 6), (4, 6))]] + + Ex: blpair_groups = [ [((1, 2), (1, 2)), ((2, 3), (2, 3))], + [((4, 6), (4, 6))]] or blpair_groups = [ [1002001002, 2003002003], [4006004006] ] time_avg : bool, optional If True, average power spectra across the time axis. Default: False. - + blpair_weights : list of weights (float or int), optional - Relative weight of each baseline-pair when performing the average. This - is useful for bootstrapping. This should have the same shape as - blpair_groups if specified. The weights are automatically normalized - within each baseline-pair group. Default: None (all baseline pairs have + Relative weight of each baseline-pair when performing the average. This + is useful for bootstrapping. This should have the same shape as + blpair_groups if specified. The weights are automatically normalized + within each baseline-pair group. Default: None (all baseline pairs have unity weights). error_field: string or list, optional @@ -287,31 +287,31 @@ def average_spectra(uvp_in, blpair_groups=None, time_avg=False, Whether to normalize the baseline-pair weights so that: Sum(blpair_weights) = N_blpairs If False, no normalization is applied to the weights. Default: True. - + inplace : bool, optional - If True, edit data in self, else make a copy and return. Default: + If True, edit data in self, else make a copy and return. Default: True. Notes ----- - Currently, every baseline-pair in a blpair group must have the same - Ntimes, unless time_avg=True. Future versions may support - baseline-pair averaging of heterogeneous time arrays. This includes - the scenario of repeated blpairs (e.g. in bootstrapping), which will + Currently, every baseline-pair in a blpair group must have the same + Ntimes, unless time_avg=True. Future versions may support + baseline-pair averaging of heterogeneous time arrays. This includes + the scenario of repeated blpairs (e.g. in bootstrapping), which will return multiple copies of their time_array. """ if inplace: uvp = uvp_in else: uvp = copy.deepcopy(uvp_in) - + # Copy these, so we don't modify the input lists blpair_groups = copy.deepcopy(blpair_groups) blpair_weights = copy.deepcopy(blpair_weights) # If blpair_groups were fed in, enforce type and structure if blpair_groups is not None: - + # Enforce shape of blpair_groups assert isinstance(blpair_groups[0], list), \ "blpair_groups must be fed as a list of baseline-pair lists. " \ @@ -319,7 +319,7 @@ def average_spectra(uvp_in, blpair_groups=None, time_avg=False, # Convert blpair_groups to list of blpair group integers if isinstance(blpair_groups[0][0], tuple): - new_blpair_grps = [map(lambda blp: uvp.antnums_to_blpair(blp), blpg) + new_blpair_grps = [map(lambda blp: uvp.antnums_to_blpair(blp), blpg) for blpg in blpair_groups] blpair_groups = new_blpair_grps else: @@ -331,10 +331,10 @@ def average_spectra(uvp_in, blpair_groups=None, time_avg=False, # Print warning if a blpair appears more than once in all of blpair_groups all_blpairs = [item for sublist in blpair_groups for item in sublist] - if len(set(all_blpairs)) < len(all_blpairs): + if len(set(all_blpairs)) < len(all_blpairs): print("Warning: some baseline-pairs are repeated between blpair "\ "averaging groups.") - + # Create baseline-pair weights list if not specified if blpair_weights is None: # Assign unity weights to baseline-pair groups that were specified @@ -361,6 +361,7 @@ def average_spectra(uvp_in, blpair_groups=None, time_avg=False, if stat not in uvp.stats_array.keys(): raise KeyError("error_field \"%s\" not found in stats_array keys." % stat) + # For baseline pairs not in blpair_groups, add them as their own group extra_blpairs = set(uvp.blpair_array) - set(all_blpairs) blpair_groups += map(lambda blp: [blp], extra_blpairs) @@ -371,48 +372,59 @@ def average_spectra(uvp_in, blpair_groups=None, time_avg=False, ints_array, nsmp_array = odict(), odict() stats_array = odict([[stat, odict()] for stat in stat_l]) + # will average covariance array by default + store_cov = hasattr(uvp,"cov_array") + if store_cov: + cov_array = odict() + + # Iterate over spectral windows for spw in range(uvp.Nspws): spw_data, spw_wgts, spw_ints, spw_nsmp = [], [], [], [] spw_stats = odict([[stat, []] for stat in stat_l]) - + if store_cov: + spw_cov = [] # Iterate over polarizations for i, p in enumerate(uvp.pol_array): pol_data, pol_wgts, pol_ints, pol_nsmp = [], [], [], [] pol_stats = odict([[stat, []] for stat in stat_l]) - + if store_cov: + pol_cov = [] # Iterate over baseline-pair groups for j, blpg in enumerate(blpair_groups): bpg_data, bpg_wgts, bpg_ints, bpg_nsmp = [], [], [], [] bpg_stats = odict([[stat, []] for stat in stat_l]) + if store_cov: + bpg_cov = [] w_list = [] - - # Sum over all weights within this baseline group to get - # normalization (if weights specified). The normalization is + + # Sum over all weights within this baseline group to get + # normalization (if weights specified). The normalization is # calculated so that Sum (blpair wgts) = no. baselines. if blpair_weights is not None: blpg_wgts = np.array(blpair_weights[j]) norm = np.sum(blpg_wgts) if normalize_weights else 1. - + if norm <= 0.: raise ValueError("Sum of baseline-pair weights in " "group %d is <= 0." % j) blpg_wgts *= float(blpg_wgts.size) / norm # Apply normalization else: blpg_wgts = np.ones(len(blpg)) - + # Iterate within a baseline-pair group and get integration- # weighted data for k, blp in enumerate(blpg): - + # Get no. samples and construct integration weight nsmp = uvp.get_nsamples(spw, blp, p)[:, None] data = uvp.get_data(spw, blp, p) wgts = uvp.get_wgts(spw, blp, p) ints = uvp.get_integrations(spw, blp, p)[:, None] w = (ints * np.sqrt(nsmp)) - + if store_cov: + cov = uvp.get_cov(spw,blp,p) # Get error bar weights and set invalid ones to zero. errws = {} for stat in stat_l: @@ -421,22 +433,24 @@ def average_spectra(uvp_in, blpair_groups=None, time_avg=False, errs[~no_data] = 1. / errs[~no_data] ** 2. errs[no_data] = 0. errws[stat] = errs - + # Take time average if desired if time_avg: data = (np.sum(data * w, axis=0) \ / np.sum(w, axis=0).clip(1e-10, np.inf))[None] wgts = (np.sum(wgts * w[:, None], axis=0) \ - / np.sum(w, axis=0).clip(1e-10, np.inf)[:, None])[None] + / np.sum(w, axis=0).clip(1e-10, np.inf)[:, None])[None] ints = (np.sum(ints * w, axis=0) \ / np.sum(w, axis=0).clip(1e-10, np.inf))[None] - nsmp = np.sum(nsmp, axis=0)[None] + nsmp = np.sum(nsmp, axis=0)[None] w = np.sum(w, axis=0)[None] + if store_cov: + cov = (np.sum(cov * w[:, None], axis=0) \ + / np.sum(w,axis=0).clip(1e-10, np.inf))[None] for stat in stat_l: errws[stat] = np.sum(errws[stat], axis=0)[None] - - # Add multiple copies of data for each baseline according + # Add multiple copies of data for each baseline according # to the weighting/multiplicity for m in range(int(blpg_wgts[k])): bpg_data.append(data * w) @@ -445,8 +459,11 @@ def average_spectra(uvp_in, blpair_groups=None, time_avg=False, bpg_nsmp.append(nsmp) [bpg_stats[stat].append(errws[stat]) for stat in stat_l] w_list.append(w) + if store_cov: + bpg_cov.append(cov * w[:, None]) + - # Take integration-weighted averages, with clipping to deal + # Take integration-weighted averages, with clipping to deal # with zeros bpg_data = np.sum(bpg_data, axis=0) \ / np.sum(w_list, axis=0).clip(1e-10, np.inf) @@ -455,6 +472,9 @@ def average_spectra(uvp_in, blpair_groups=None, time_avg=False, bpg_nsmp = np.sum(bpg_nsmp, axis=0) bpg_ints = np.sum(bpg_ints, axis=0) \ / np.sum(w_list, axis=0).clip(1e-10, np.inf) + if store_cov: + bpg_cov = np.sum(bpg_cov, axis=0) \ + / np.sum(w_list, axis=0)[:,None].clip(1e-10, np.inf) w_list = np.sum(w_list, axis=0) # For errors that aren't 0, sum weights and take inverse square @@ -465,16 +485,20 @@ def average_spectra(uvp_in, blpair_groups=None, time_avg=False, arr[not_valid] *= np.nan arr[~not_valid] = arr[~not_valid] ** (-0.5) bpg_stats[stat] = arr - + # Append to lists (polarization) pol_data.extend(bpg_data); pol_wgts.extend(bpg_wgts) pol_ints.extend(bpg_ints); pol_nsmp.extend(bpg_nsmp) [pol_stats[stat].extend(bpg_stats[stat]) for stat in stat_l] + if store_cov: + pol_cov.extend(bpg_cov) # Append to lists (spectral window) spw_data.append(pol_data); spw_wgts.append(pol_wgts) spw_ints.append(pol_ints); spw_nsmp.append(pol_nsmp) [spw_stats[stat].append(pol_stats[stat]) for stat in stat_l] + if store_cov: + spw_cov.append(pol_cov) # Append to dictionaries data_array[spw] = np.moveaxis(spw_data, 0, -1) @@ -483,17 +507,19 @@ def average_spectra(uvp_in, blpair_groups=None, time_avg=False, nsmp_array[spw] = np.moveaxis(spw_nsmp, 0, -1)[:, 0, :] for stat in stat_l: stats_array[stat][spw] = np.moveaxis(spw_stats[stat], 0, -1) + if store_cov: + cov_array[spw] = np.moveaxis(np.array(spw_cov), 0, -1) # Iterate over blpair groups one more time to assign metadata time_1, time_2, time_avg_arr = [], [], [] lst_1, lst_2, lst_avg_arr = [], [], [] blpair_arr, bl_arr = [], [] - + for i, blpg in enumerate(blpair_groups): - + # Get blpairts indices for zeroth blpair in this group blpairts = uvp.blpair_to_indices(blpg[0]) - + # Assign meta-data bl_arr.extend(list(uvputils._blpair_to_bls(blpg[0]))) if time_avg: @@ -522,27 +548,29 @@ def average_spectra(uvp_in, blpair_groups=None, time_avg=False, uvp.Nblpairts = len(time_avg_arr) uvp.Nblpairs = len(np.unique(blpair_arr)) uvp.Nbls = len(bl_arr) - + # Baselines uvp.bl_array = bl_arr uvp.bl_vecs = bl_vecs uvp.blpair_array = np.array(blpair_arr) - + # Times uvp.time_1_array = np.array(time_1) uvp.time_2_array = np.array(time_2) uvp.time_avg_array = np.array(time_avg_arr) - + # LSTs uvp.lst_1_array = np.array(lst_1) uvp.lst_2_array = np.array(lst_2) uvp.lst_avg_array = np.array(lst_avg_arr) - + # Data, weights, and no. samples uvp.data_array = data_array uvp.integration_array = ints_array uvp.wgt_array = wgts_array uvp.nsample_array = nsmp_array + if store_cov: + uvp.cov_array = cov_array if hasattr(uvp_in, 'label1'): uvp.label1 = uvp_in.label1 if hasattr(uvp_in, 'label2'): uvp.label2 = uvp_in.label2 @@ -553,23 +581,23 @@ def average_spectra(uvp_in, blpair_groups=None, time_avg=False, # Validity check uvp.check() - + # Return if inplace == False: return uvp def fold_spectra(uvp): """ - Average bandpowers from matching positive and negative delay bins onto a - purely positive delay axis. Negative delay bins are still populated, but + Average bandpowers from matching positive and negative delay bins onto a + purely positive delay axis. Negative delay bins are still populated, but are filled with zero power. This is an in-place operation. - - Will only work if uvp.folded == False, i.e. data is currently unfolded - across negative and positive delay. Because this averages the data, the - nsample array is multiplied by a factor of 2. - + + Will only work if uvp.folded == False, i.e. data is currently unfolded + across negative and positive delay. Because this averages the data, the + nsample array is multiplied by a factor of 2. + WARNING: This operation cannot be undone. - + Parameters ---------- uvp : UVPSpec @@ -595,7 +623,7 @@ def fold_spectra(uvp): else: # odd number of dlys left = uvp.data_array[spw][:, :Ndlys//2, :][:, ::-1, :] - right = uvp.data_array[spw][:, Ndlys//2+1:, :] + right = uvp.data_array[spw][:, Ndlys//2+1:, :] uvp.data_array[spw][:, Ndlys//2+1:, :] = np.mean([left, right], axis=0) uvp.data_array[spw][:, :Ndlys//2, :] = 0.0 uvp.nsample_array[spw] *= 2.0 @@ -603,58 +631,58 @@ def fold_spectra(uvp): uvp.folded = True -def bootstrap_average_blpairs(uvp_list, blpair_groups, time_avg=False, +def bootstrap_average_blpairs(uvp_list, blpair_groups, time_avg=False, seed=None): """ - Generate a bootstrap-sampled average over a set of power spectra. The - sampling is done over user-defined groups of baseline pairs (with + Generate a bootstrap-sampled average over a set of power spectra. The + sampling is done over user-defined groups of baseline pairs (with replacement). - - Multiple UVPSpec objects can be passed to this function. The bootstrap - sampling is carried out over all of the available baseline-pairs (in a - user-specified group) across all UVPSpec objects. (This means that each - UVPSpec object can contribute a different number of baseline-pairs to the + + Multiple UVPSpec objects can be passed to this function. The bootstrap + sampling is carried out over all of the available baseline-pairs (in a + user-specified group) across all UVPSpec objects. (This means that each + UVPSpec object can contribute a different number of baseline-pairs to the average each time.) - + Successive calls to the function will produce different random samples. - + Parameters ---------- uvp_list : list of UVPSpec objects List of UVPSpec objects to form a bootstrap sample from. - - The UVPSpec objects can have different numbers of baseline-pairs and - times, but the averages will only be taken over spectral windows and + + The UVPSpec objects can have different numbers of baseline-pairs and + times, but the averages will only be taken over spectral windows and polarizations that match across all UVPSpec objects. - + blpair_groups : list of baseline-pair groups - List of baseline-pair groups, where each group is a list of tuples or - integers. The bootstrap sampling and averaging is done over the + List of baseline-pair groups, where each group is a list of tuples or + integers. The bootstrap sampling and averaging is done over the baseline-pairs within each group. - - There is no requirement for each UVPSpec object to contain all - baseline-pairs within each specified blpair group, as long as they + + There is no requirement for each UVPSpec object to contain all + baseline-pairs within each specified blpair group, as long as they exist in at least one of the UVPSpec objects. - + time_avg : bool, optional Whether to average over the time axis or not. Default: False. - + seed : int, optional Random seed to use when drawing baseline-pairs. - + Returns ------- uvp_avg_list : list of UVPSpec - List of UVPSpec objects containing averaged power spectra. - - Each object is an averaged version of the corresponding input UVPSpec - object. Each average is over the baseline-pairs sampled from that - object only; to return the true bootstrap average, these objects + List of UVPSpec objects containing averaged power spectra. + + Each object is an averaged version of the corresponding input UVPSpec + object. Each average is over the baseline-pairs sampled from that + object only; to return the true bootstrap average, these objects should be averaged together. - + blpair_wgts_list : list of lists of integers - List of weights used for each baseline-pair in each group, from each - UVPSpec object. This describes how the bootstrap sample was generated. + List of weights used for each baseline-pair in each group, from each + UVPSpec object. This describes how the bootstrap sample was generated. Shape: (len(uvp_list), shape(blpair_groups)). """ # Validate input data @@ -662,41 +690,41 @@ def bootstrap_average_blpairs(uvp_list, blpair_groups, time_avg=False, if isinstance(uvp_list, UVPSpec): uvp_list = [uvp_list,] assert isinstance(uvp_list, list), \ "uvp_list must be a list of UVPSpec objects" - + # Check that uvp_list contains UVPSpec objects with the correct dimensions for uvp in uvp_list: assert isinstance(uvp, UVPSpec), \ "uvp_list must be a list of UVPSpec objects" - + # Check for same length of time axis if no time averaging will be done if not time_avg: if np.unique([uvp.Ntimes for uvp in uvp_list]).size > 1: raise IndexError("Input UVPSpec objects must have the same number " "of time samples if time_avg is False.") - + # Check that blpair_groups is a list of lists of groups assert isinstance(blpair_groups, list), \ "blpair_groups must be a list of lists of baseline-pair tuples/integers" for grp in blpair_groups: assert isinstance(grp, list), \ "blpair_groups must be a list of lists of baseline-pair tuples/integers" - + # Convert blpair tuples into blpair integers if necessary if isinstance(blpair_groups[0][0], tuple): - new_blp_grps = [map(lambda blp: uvputils._antnums_to_blpair(blp), blpg) + new_blp_grps = [map(lambda blp: uvputils._antnums_to_blpair(blp), blpg) for blpg in blpair_groups] blpair_groups = new_blp_grps - - # Homogenise input UVPSpec objects in terms of available polarizations + + # Homogenise input UVPSpec objects in terms of available polarizations # and spectral windows if len(uvp_list) > 1: uvp_list = select_common(uvp_list, spws=True, pols=True, inplace=False) - + # Loop over UVPSpec objects, looking for available blpairs in each avail_blpairs = [np.unique(uvp.blpair_array) for uvp in uvp_list] - all_avail_blpairs = np.unique([blp for blplist in avail_blpairs + all_avail_blpairs = np.unique([blp for blplist in avail_blpairs for blp in blplist]) - + # Check that all requested blpairs exist in at least one UVPSpec object all_req_blpairs = np.unique([blp for grp in blpair_groups for blp in grp]) missing = [] @@ -706,29 +734,29 @@ def bootstrap_average_blpairs(uvp_list, blpair_groups, time_avg=False, raise KeyError("The following baseline-pairs were specified, but do " "not exist in any of the input UVPSpec objects: %s" \ % str(missing)) - + # Set random seed if specified if seed is not None: np.random.seed(seed) - - # Sample with replacement from the full list of available baseline-pairs - # in each group, and create a blpair_group and blpair_weights entry for + + # Sample with replacement from the full list of available baseline-pairs + # in each group, and create a blpair_group and blpair_weights entry for # each UVPSpec object blpair_grps_list = [[] for uvp in uvp_list] blpair_wgts_list = [[] for uvp in uvp_list] - + # Loop over each requested blpair group for grp in blpair_groups: - + # Find which blpairs in this group are available in each UVPSpec object avail = [np.intersect1d(grp, blp_list) for blp_list in avail_blpairs] avail_flat = [blp for lst in avail for blp in lst] num_avail = len(avail_flat) - - # Draw set of random integers (with replacement) and convert into + + # Draw set of random integers (with replacement) and convert into # list of weights for each blpair in each UVPSpec draw = np.random.randint(low=0, high=num_avail, size=num_avail) wgt = np.array([(draw == i).sum() for i in range(num_avail)]) - + # Extract the blpair weights for each UVPSpec j = 0 for i in range(len(uvp_list)): @@ -737,16 +765,15 @@ def bootstrap_average_blpairs(uvp_list, blpair_groups, time_avg=False, _wgts = wgt[np.arange(j, j+n_blps)].astype(float) #+ 1e-4 blpair_wgts_list[i].append( list(_wgts) ) j += n_blps - - # Loop over UVPSpec objects and calculate averages in each blpair group, + + # Loop over UVPSpec objects and calculate averages in each blpair group, # using the bootstrap-sampled blpair weights uvp_avg = [] for i, uvp in enumerate(uvp_list): - _uvp = average_spectra(uvp, blpair_groups=blpair_grps_list[i], - blpair_weights=blpair_wgts_list[i], + _uvp = average_spectra(uvp, blpair_groups=blpair_grps_list[i], + blpair_weights=blpair_wgts_list[i], time_avg=time_avg, inplace=False) uvp_avg.append(_uvp) - + # Return list of averaged spectra for now return uvp_avg, blpair_wgts_list - diff --git a/hera_pspec/uvpspec.py b/hera_pspec/uvpspec.py index 0ca2991a..3d8237c3 100644 --- a/hera_pspec/uvpspec.py +++ b/hera_pspec/uvpspec.py @@ -83,51 +83,51 @@ def __init__(self): self._OmegaPP = PSpecParam("OmegaP", description="Integral of unitless beam power squared over the sky [steradians].", form="(Nbeam_freqs, Npols)", expected_type=np.float64) self._beam_freqs = PSpecParam("beam_freqs", description="Frequency bins of the OmegaP and OmegaPP beam-integral arrays [Hz].", form="(Nbeam_freqs,)", expected_type=np.float64) self._cosmo = PSpecParam("cosmo", description="Instance of conversion.Cosmo_Conversions class.", expected_type=conversions.Cosmo_Conversions) - + # Collect all parameters: required and non-required - self._all_params = sorted(map(lambda p: p[1:], + self._all_params = sorted(map(lambda p: p[1:], fnmatch.filter(self.__dict__.keys(), '_*'))) - # Specify required params: these are required for read / write and + # Specify required params: these are required for read / write and # self.check() - self._req_params = ["Ntimes", "Nblpairts", "Nblpairs", "Nspwdlys", - "Nspws", "Ndlys", "Npols", "Nfreqs", "history", + self._req_params = ["Ntimes", "Nblpairts", "Nblpairs", "Nspwdlys", + "Nspws", "Ndlys", "Npols", "Nfreqs", "history", "data_array", "wgt_array", "integration_array", - "spw_array", "freq_array", "dly_array", "pol_array", - "lst_1_array", "lst_2_array", "time_1_array", + "spw_array", "freq_array", "dly_array", "pol_array", + "lst_1_array", "lst_2_array", "time_1_array", "time_2_array", "blpair_array", "Nbls", - "bl_vecs", "bl_array", "channel_width", - "telescope_location", "weighting", "vis_units", - "norm_units", "taper", "norm", "nsample_array", - "lst_avg_array", "time_avg_array", "folded", + "bl_vecs", "bl_array", "channel_width", + "telescope_location", "weighting", "vis_units", + "norm_units", "taper", "norm", "nsample_array", + "lst_avg_array", "time_avg_array", "folded", "scalar_array", "labels", "label_1_array", "label_2_array","store_cov"] - # All parameters must fall into one and only one of the following + # All parameters must fall into one and only one of the following # groups, which are used in __eq__ - self._immutables = ["Ntimes", "Nblpairts", "Nblpairs", "Nspwdlys", - "Nspws", "Ndlys", "Npols", "Nfreqs", "history", - "Nbls", "channel_width", "weighting", "vis_units", - "norm", "norm_units", "taper", "cosmo", "beamfile", + self._immutables = ["Ntimes", "Nblpairts", "Nblpairs", "Nspwdlys", + "Nspws", "Ndlys", "Npols", "Nfreqs", "history", + "Nbls", "channel_width", "weighting", "vis_units", + "norm", "norm_units", "taper", "cosmo", "beamfile", 'folded', "store_cov"] - self._ndarrays = ["spw_array", "freq_array", "dly_array", "pol_array", - "lst_1_array", "lst_avg_array", "time_avg_array", - "lst_2_array", "time_1_array", "time_2_array", + self._ndarrays = ["spw_array", "freq_array", "dly_array", "pol_array", + "lst_1_array", "lst_avg_array", "time_avg_array", + "lst_2_array", "time_1_array", "time_2_array", "blpair_array", "OmegaP", "OmegaPP", "beam_freqs", - "bl_vecs", "bl_array", "telescope_location", - "scalar_array", "labels", "label_1_array", + "bl_vecs", "bl_array", "telescope_location", + "scalar_array", "labels", "label_1_array", "label_2_array"] - - self._dicts = ["data_array", "wgt_array", "integration_array", + + self._dicts = ["data_array", "wgt_array", "integration_array", "nsample_array", "cov_array"] self._dicts_of_dicts = ["stats_array"] # define which attributes are considred meta data. Large attrs should be constructed as datasets - self._meta_dsets = ["lst_1_array", "lst_2_array", "time_1_array", - "time_2_array", "blpair_array", "bl_vecs", - "bl_array", "lst_avg_array", "time_avg_array", - "OmegaP", "OmegaPP", "label_1_array", + self._meta_dsets = ["lst_1_array", "lst_2_array", "time_1_array", + "time_2_array", "blpair_array", "bl_vecs", + "bl_array", "lst_avg_array", "time_avg_array", + "OmegaP", "OmegaPP", "label_1_array", "label_2_array"] self._meta_attrs = sorted(set(self._all_params) - set(self._dicts) - set(self._meta_dsets) - set(self._dicts_of_dicts)) self._meta = sorted(set(self._meta_dsets).union(set(self._meta_attrs))) @@ -138,6 +138,36 @@ def __init__(self): # Default parameter values self.folded = False + def get_cov(self,key,*args): + """ + Slice into covariance array with a specified data key in the format + (spw, ((ant1, ant2),(ant3, ant4)), pol) + + or + + (spw, blpair-integer, pol) + + where spw is the spectral window integer, ant1 etc. arge integers, + and pol is either a polarization string (ex. 'XX') or integer (ex. -5). + + Parameters + ---------- + key: tuple + Baseline-pair key + + Returns + ------- + data : complex ndarray + Shape (Ntimes, Ndlys, Ndlys) + """ + spw, blpairts, pol = self.key_to_indices(key, *args) + # Need to deal with folded data! + # if data has been folded, return only positive delays + if hasattr(self,'cov_array'): + return self.cov_array[spw][blpairts, :, :, pol] + else: + raise AttributeError("No covariance array has been calculated.") + def get_data(self, key, *args): """ Slice into data_array with a specified data key in the format @@ -1011,7 +1041,7 @@ def write_to_group(self, group, run_check=True): group.create_dataset("cov_spw{}".format(i), data=self.cov_array[i], dtype=np.complex) - + # Store any statistics arrays if hasattr(self, "stats_array"): for s in self.stats_array.keys(): @@ -1020,7 +1050,7 @@ def write_to_group(self, group, run_check=True): group.create_dataset("stats_{}_{}".format(s, i), data=data[i], dtype=data[i].dtype) - + def write_hdf5(self, filepath, overwrite=False, run_check=True): """ Write a UVPSpec object to HDF5 file. @@ -1189,7 +1219,7 @@ def check(self, just_meta=False): assert isinstance(getattr(self, p)[k], (dict, odict)) for j in getattr(self, p)[k].keys(): assert isinstance(getattr(self, p)[k][j], np.ndarray) - + try: getattr(self, p)[k][j] = a.expected_type(getattr(self, p)[k][j]) except: @@ -1400,8 +1430,8 @@ def generate_noise_spectra(self, spw, pol, Tsys, blpairs=None, little_h=True, return P_N - - def average_spectra(self, blpair_groups=None, time_avg=False, + + def average_spectra(self, blpair_groups=None, time_avg=False, blpair_weights=None, error_field=None, inplace=True): """ Average power spectra across the baseline-pair-time axis, weighted by @@ -1427,9 +1457,9 @@ def average_spectra(self, blpair_groups=None, time_avg=False, Parameters ---------- blpair_groups : list - List of list of baseline-pair group tuples or integers. All power - spectra in a baseline-pair group are averaged together. If a - baseline-pair exists in more than one group, a warning is raised. + List of list of baseline-pair group tuples or integers. All power + spectra in a baseline-pair group are averaged together. If a + baseline-pair exists in more than one group, a warning is raised. Examples:: blpair_groups = [ [((1, 2), (1, 2)), ((2, 3), (2, 3))], @@ -1448,7 +1478,7 @@ def average_spectra(self, blpair_groups=None, time_avg=False, as blpair_groups if specified. The weights are automatically normalized within each baseline-pair group. Default: None (all baseline pairs have unity weights). - + error_field: string or list, optional If errorbars have been entered into stats_array, will do a weighted sum to shrink the error bars down to the size of the averaged @@ -1469,16 +1499,16 @@ def average_spectra(self, blpair_groups=None, time_avg=False, return multiple copies of their time_array. """ if inplace: - grouping.average_spectra(self, blpair_groups=blpair_groups, + grouping.average_spectra(self, blpair_groups=blpair_groups, time_avg=time_avg, - error_field=error_field, - blpair_weights=blpair_weights, + error_field=error_field, + blpair_weights=blpair_weights, inplace=True) else: - return grouping.average_spectra(self, blpair_groups=blpair_groups, + return grouping.average_spectra(self, blpair_groups=blpair_groups, time_avg=time_avg, error_field=error_field, - blpair_weights=blpair_weights, + blpair_weights=blpair_weights, inplace=False) def fold_spectra(self): @@ -1518,7 +1548,7 @@ def get_blpair_groups_from_bl_groups(self, blgroups, only_pairs_in_bls=False): """ blpair_groups = [] for blg in blgroups: - blp_select = uvputils._get_blpairs_from_bls(self, blg, + blp_select = uvputils._get_blpairs_from_bls(self, blg, only_pairs_in_bls=only_pairs_in_bls) blp = sorted(set(self.blpair_array[blp_select])) if len(blp) > 0: @@ -1571,10 +1601,10 @@ def compute_scalar(self, spw, pol, num_steps=1000, little_h=True, # compute scalar OP = self.OmegaP[:, self.pol_to_indices(pol)].squeeze() OPP = self.OmegaPP[:, self.pol_to_indices(pol)].squeeze() - scalar = pspecbeam._compute_pspec_scalar(self.cosmo, self.beam_freqs, + scalar = pspecbeam._compute_pspec_scalar(self.cosmo, self.beam_freqs, OPP / OP**2, spw_freqs, - num_steps=num_steps, - taper=self.taper, + num_steps=num_steps, + taper=self.taper, little_h=little_h, noise_scalar=noise_scalar) return scalar @@ -1617,7 +1647,7 @@ def combine_uvpspec(uvps, verbose=True): # Store covariance only if all uvps have stored covariance. store_cov = np.all([hasattr(uvp,'cov_array') for uvp in uvps]) - + # Create new empty data arrays and fill spw arrays u.data_array = odict() u.integration_array = odict() @@ -1627,30 +1657,30 @@ def combine_uvpspec(uvps, verbose=True): u.cov_array = odict() u.scalar_array = np.empty((Nspws, Npols), np.float) u.freq_array, u.spw_array, u.dly_array = [], [], [] - + # Loop over spectral windows for i, spw in enumerate(new_spws): - + # Initialize new arrays u.data_array[i] = np.empty((Nblpairts, spw[2], Npols), np.complex128) u.integration_array[i] = np.empty((Nblpairts, Npols), np.float64) u.wgt_array[i] = np.empty((Nblpairts, spw[2], 2, Npols), np.float64) u.nsample_array[i] = np.empty((Nblpairts, Npols), np.float64) if store_cov: - u.cov_array[i] = np.empty((Nblpairts, spw[2], spw[2], Npols), + u.cov_array[i] = np.empty((Nblpairts, spw[2], spw[2], Npols), np.complex128) - + # Set frequencies and delays spw_Nfreqs = spw[-1] spw_freqs = np.linspace(*spw, endpoint=False) spw_dlys = np.fft.fftshift( - np.fft.fftfreq(spw_Nfreqs, + np.fft.fftfreq(spw_Nfreqs, np.median(np.diff(spw_freqs)) ) ) u.spw_array.extend(np.ones(spw_Nfreqs, np.int32) * i) u.freq_array.extend(spw_freqs) u.dly_array.extend(spw_dlys) - - # Convert to numpy arrays + + # Convert to numpy arrays u.spw_array = np.array(u.spw_array) u.freq_array = np.array(u.freq_array) u.dly_array = np.array(u.dly_array) @@ -1663,7 +1693,7 @@ def combine_uvpspec(uvps, verbose=True): u.Nfreqs = len(np.unique(u.freq_array)) u.Nspwdlys = len(u.spw_array) u.Ndlys = len(np.unique(u.dly_array)) - + # Prepare time and label arrays u.time_1_array, u.time_2_array = np.empty(Nblpairts, np.float), \ np.empty(Nblpairts, np.float) @@ -1680,38 +1710,38 @@ def combine_uvpspec(uvps, verbose=True): uvp_spws = [_uvp.get_spw_ranges() for _uvp in uvps] uvp_blpts = [zip(_uvp.blpair_array, _uvp.time_avg_array) for _uvp in uvps] uvp_pols = [_uvp.pol_array.tolist() for _uvp in uvps] - + # Construct dict of label indices, to be used for re-ordering later u_lbls = {lbl: ll for ll, lbl in enumerate(u.labels)} - + # fill in data arrays depending on concat ax if concat_ax == 'spw': - + # Concatenate spectral windows for i, spw in enumerate(new_spws): l = [spw in _u for _u in uvp_spws].index(True) m = [spw == _spw for _spw in uvp_spws[l]].index(True) - + # Lookup indices of new_blpts in the uvp_blpts[l] array blpts_idxs = uvputils._fast_lookup_blpairts(uvp_blpts[l], new_blpts) if i == 0: blpts_idxs0 = blpts_idxs.copy() - + # Loop over polarizations for k, p in enumerate(new_pols): q = uvp_pols[l].index(p) u.scalar_array[i,k] = uvps[l].scalar_array[m,q] - + # Loop over blpair-times for j, blpt in enumerate(new_blpts): - + n = blpts_idxs[j] - + # Data/weight/integration arrays u.data_array[i][j,:,k] = uvps[l].data_array[m][n,:,q] u.wgt_array[i][j,:,:,k] = uvps[l].wgt_array[m][n,:,:,q] u.integration_array[i][j,k] = uvps[l].integration_array[m][n, q] u.nsample_array[i][j,k] = uvps[l].integration_array[m][n,q] - + # Labels lbl1 = uvps[l].label_1_array[m,n,q] lbl2 = uvps[l].label_2_array[m,n,q] @@ -1731,26 +1761,26 @@ def combine_uvpspec(uvps, verbose=True): u.blpair_array[j] = uvps[0].blpair_array[n] elif concat_ax == 'blpairts': - - # Get mapping of blpair-time indices between old UVPSpec objects and + + # Get mapping of blpair-time indices between old UVPSpec objects and # the new one - blpts_idxs = np.concatenate( + blpts_idxs = np.concatenate( [uvputils._fast_lookup_blpairts(_blpts, new_blpts) for _blpts in uvp_blpts] ) - + is_in = [uvputils._fast_is_in(_blpts, new_blpts) for _blpts in uvp_blpts] # Concatenate blpair-times for j, blpt in enumerate(new_blpts): - + l = [isn[j] for isn in is_in].index(True) n = blpts_idxs[j] - + # Loop over spectral windows for i, spw in enumerate(new_spws): m = [spw == _spw for _spw in uvp_spws[l]].index(True) - + # Loop over polarizations for k, p in enumerate(new_pols): q = uvp_pols[l].index(p) @@ -1761,13 +1791,13 @@ def combine_uvpspec(uvps, verbose=True): u.wgt_array[i][j,:,:,k] = uvps[l].wgt_array[m][n,:,:,q] u.integration_array[i][j,k] = uvps[l].integration_array[m][n,q] u.nsample_array[i][j,k] = uvps[l].integration_array[m][n,q] - + # Labels lbl1 = uvps[l].label_1_array[m,n,q] lbl2 = uvps[l].label_2_array[m,n,q] u.label_1_array[i,j,k] = u_lbls[uvps[l].labels[lbl1]] u.label_2_array[i,j,k] = u_lbls[uvps[l].labels[lbl2]] - + # Populate new LST, time, and blpair arrays u.time_1_array[j] = uvps[l].time_1_array[n] u.time_2_array[j] = uvps[l].time_2_array[n] @@ -1778,22 +1808,22 @@ def combine_uvpspec(uvps, verbose=True): u.blpair_array[j] = uvps[l].blpair_array[n] elif concat_ax == 'pol': - + # Concatenate polarizations for k, p in enumerate(new_pols): l = [p in _pols for _pols in uvp_pols].index(True) q = uvp_pols[l].index(p) - - # Get mapping of blpair-time indices between old UVPSpec objects + + # Get mapping of blpair-time indices between old UVPSpec objects # and the new one blpts_idxs = uvputils._fast_lookup_blpairts(uvp_blpts[l], new_blpts) - if k == 0: blpts_idxs0 = blpts_idxs.copy() - + if k == 0: blpts_idxs0 = blpts_idxs.copy() + # Loop over spectral windows for i, spw in enumerate(new_spws): m = [spw == _spw for _spw in uvp_spws[l]].index(True) u.scalar_array[i,k] = uvps[l].scalar_array[m,q] - + # Loop over blpair-times for j, blpt in enumerate(new_blpts): n = blpts_idxs[j] @@ -1803,7 +1833,7 @@ def combine_uvpspec(uvps, verbose=True): u.wgt_array[i][j,:,:,k] = uvps[l].wgt_array[m][n,:,:,q] u.integration_array[i][j,k] = uvps[l].integration_array[m][n,q] u.nsample_array[i][j,k] = uvps[l].integration_array[m][n,q] - + # Labels lbl1 = uvps[l].label_1_array[m,n,q] lbl2 = uvps[l].label_2_array[m,n,q] @@ -1819,7 +1849,7 @@ def combine_uvpspec(uvps, verbose=True): u.lst_2_array[j] = uvps[0].lst_2_array[n] u.lst_avg_array[j] = uvps[0].lst_avg_array[n] u.blpair_array[j] = uvps[0].blpair_array[n] - + # Set baselines u.Nblpairs = len(np.unique(u.blpair_array)) uvp_bls = [uvp.bl_array for uvp in uvps] @@ -1835,10 +1865,10 @@ def combine_uvpspec(uvps, verbose=True): u.Ntimes = len(np.unique(u.time_avg_array)) u.history = reduce(operator.add, [uvp.history for uvp in uvps]) u.labels = np.array(u.labels, np.str) - + for k in static_meta.keys(): setattr(u, k, static_meta[k]) - + # Run check to make sure the new UVPSpec object is valid u.check() return u @@ -1856,10 +1886,10 @@ def get_uvp_overlap(uvps, just_meta=True, verbose=True): an error is raised. - All uvpspec objects must have certain attributes that agree exactly. These + All uvpspec objects must have certain attributes that agree exactly. These include: - 'channel_width', 'telescope_location', 'weighting', 'OmegaP', - 'beam_freqs', 'OmegaPP', 'beamfile', 'norm', 'taper', 'vis_units', + 'channel_width', 'telescope_location', 'weighting', 'OmegaP', + 'beam_freqs', 'OmegaPP', 'beamfile', 'norm', 'taper', 'vis_units', 'norm_units', 'folded', 'cosmo', 'scalar' Parameters @@ -1878,17 +1908,17 @@ def get_uvp_overlap(uvps, just_meta=True, verbose=True): ------- uvps : list - List of input UVPSpec objects + List of input UVPSpec objects concat_ax : str Data axis ['spw', 'blpairts', 'pols'] across data can be concatenated unique_spws : list - List of unique spectral window tuples (spw_freq_start, spw_freq_end, + List of unique spectral window tuples (spw_freq_start, spw_freq_end, spw_Nfreqs) across all input uvps unique_blpts : list - List of unique baseline-pair-time tuples (blpair_integer, + List of unique baseline-pair-time tuples (blpair_integer, time_avg_array JD float) across all input uvps unique_pols : list @@ -1912,8 +1942,8 @@ def get_uvp_overlap(uvps, just_meta=True, verbose=True): # ensure static metadata agree between all objects - static_meta = ['channel_width', 'telescope_location', 'weighting', - 'OmegaP', 'beam_freqs', 'OmegaPP', 'beamfile', 'norm', + static_meta = ['channel_width', 'telescope_location', 'weighting', + 'OmegaP', 'beam_freqs', 'OmegaPP', 'beamfile', 'norm', 'taper', 'vis_units', 'norm_units', 'folded', 'cosmo'] for m in static_meta: for u in uvps[1:]: @@ -1924,9 +1954,9 @@ def get_uvp_overlap(uvps, just_meta=True, verbose=True): assert not hasattr(uvps[0], m) and not hasattr(u, m), \ "Cannot concatenate UVPSpec objs: not all agree on '{}' attribute".format(m) - static_meta = odict([(k, getattr(uvps[0], k, None)) for k in static_meta + static_meta = odict([(k, getattr(uvps[0], k, None)) for k in static_meta if getattr(uvps[0], k, None) is not None]) - + # create unique data axis lists unique_spws = [] unique_blpts = [] @@ -1938,7 +1968,7 @@ def get_uvp_overlap(uvps, just_meta=True, verbose=True): for uvp1 in uvps: for s in uvp1.get_spw_ranges(): if s not in unique_spws: unique_spws.append(s) - for p in uvp1.pol_array: + for p in uvp1.pol_array: if p not in unique_pols: unique_pols.append(p) uvp1_blpts = zip(uvp1.blpair_array, uvp1.time_avg_array) @@ -2003,6 +2033,3 @@ def get_uvp_overlap(uvps, just_meta=True, verbose=True): "scalar_array must be the same for all uvps given concatenation along blpairts." return uvps, concat_ax, unique_spws, unique_blpts, unique_pols, static_meta - - - diff --git a/hera_pspec/uvpspec_utils.py b/hera_pspec/uvpspec_utils.py index d9491caf..c9758f36 100644 --- a/hera_pspec/uvpspec_utils.py +++ b/hera_pspec/uvpspec_utils.py @@ -158,7 +158,7 @@ def _select(uvp, spws=None, bls=None, only_pairs_in_bls=False, blpairs=None, tim nsmp = odict() cov = odict() store_cov = hasattr(uvp, 'cov_array') - + for s in np.unique(uvp.spw_array): if h5file is not None: data[s] = h5file['data_spw{}'.format(s)][blp_select, :, pol_select] @@ -168,19 +168,20 @@ def _select(uvp, spws=None, bls=None, only_pairs_in_bls=False, blpairs=None, tim if store_cov: cov[s] = h5file['cov_spw{}'.format(s)][blp_select, :, :, pol_select] else: + if store_cov: + cov[s] = uvp.cov_array[s][blp_select, :, :, pol_select] data[s] = uvp.data_array[s][blp_select, :, pol_select] wgts[s] = uvp.wgt_array[s][blp_select, :, :, pol_select] ints[s] = uvp.integration_array[s][blp_select, pol_select] nsmp[s] = uvp.nsample_array[s][blp_select, pol_select] - if store_cov: - cov[s] = uvp.cov_array[s][blp_select, :, :, pol_select] - + + # Prepare to read stats arrays stats = odict() - + # If h5file, read in stats data if h5file is not None: - statnames = [f[f.find("_")+1: f.rfind("_")] for f in h5file.keys() + statnames = [f[f.find("_")+1: f.rfind("_")] for f in h5file.keys() if f.startswith("stats")] for sts in np.unique(np.array(statnames)): stats[sts] = odict() @@ -201,12 +202,12 @@ def _select(uvp, spws=None, bls=None, only_pairs_in_bls=False, blpairs=None, tim uvp.wgt_array = wgts uvp.integration_array = ints uvp.nsample_array = nsmp - + # Check for covariance array if store_cov: uvp.cov_array = cov - + except AttributeError as e: - # If no h5file fed and hasattr(uvp, data_array) is False then just + # If no h5file fed and hasattr(uvp, data_array) is False then just # load meta-data pass @@ -400,29 +401,29 @@ def _conj_blpair(blpair, which='both'): def _fast_is_in(src_blpts, query_blpts, time_prec=8): """ - Helper function to rapidly check if a given blpair-time couplet is in an + Helper function to rapidly check if a given blpair-time couplet is in an array. - + Parameters ---------- src_blpts : list of tuples or array_like - List of tuples or array of shape (N, 2), containing a list of (blpair, + List of tuples or array of shape (N, 2), containing a list of (blpair, time) couplets. - + query_blpts : list of tuples or array_like - List of tuples or array of shape (M, 2), containing a list of (blpair, + List of tuples or array of shape (M, 2), containing a list of (blpair, time) which will be looked up in src_blpts - + time_prec : int, optional - Number of decimals to round time array to when performing float + Number of decimals to round time array to when performing float comparision. Default: 8. - + Returns ------- is_in_arr: list of bools A list of booleans, which indicate which query_blpts are in src_blpts. """ - # This function converts baseline-pair-times, a tuple (blp, time) + # This function converts baseline-pair-times, a tuple (blp, time) # to a complex number blp + 1j * time, so that "in" function is much # faster. src_blpts = np.asarray(src_blpts) @@ -438,42 +439,40 @@ def _fast_is_in(src_blpts, query_blpts, time_prec=8): def _fast_lookup_blpairts(src_blpts, query_blpts, time_prec=8): """ - Helper function to allow fast lookups of array indices for large arrays of + Helper function to allow fast lookups of array indices for large arrays of blpair-time tuples. - + Parameters ---------- src_blpts : list of tuples or array_like - List of tuples or array of shape (N, 2), containing a list of (blpair, + List of tuples or array of shape (N, 2), containing a list of (blpair, time) couplets. - + query_blpts : list of tuples or array_like - List of tuples or array of shape (M, 2), containing a list of (blpair, + List of tuples or array of shape (M, 2), containing a list of (blpair, time) couplets that you want to find the indices of in source_blpts. - + time_prec : int, optional - Number of decimals to round time array to when performing float + Number of decimals to round time array to when performing float comparision. Default: 8. - + Returns ------- blpts_idxs : array_like - Array of integers of size (M,), which are indices in the source_blpts + Array of integers of size (M,), which are indices in the source_blpts array for each item in query_blpts. """ - # This function works by using a small hack -- the blpair-times are turned - # into complex numbers of the form (blpair + 1.j*time), allowing numpy + # This function works by using a small hack -- the blpair-times are turned + # into complex numbers of the form (blpair + 1.j*time), allowing numpy # array lookup functions to be used src_blpts = np.asarray(src_blpts) query_blpts = np.asarray(query_blpts) src_blpts = src_blpts[:,0] + 1.j*np.around(src_blpts[:,1], time_prec) query_blpts = query_blpts[:,0] + 1.j*np.around(query_blpts[:,1], time_prec) # Applies rounding to time values to ensure reliable float comparisons - + # Do np.where comparison for all new_blpts # (indices stored in second array returned by np.where) blpts_idxs = np.where(src_blpts == query_blpts[:,np.newaxis])[1] - - return blpts_idxs - + return blpts_idxs