diff --git a/hera_pspec/grouping.py b/hera_pspec/grouping.py index cb5202f1..d1ef6403 100644 --- a/hera_pspec/grouping.py +++ b/hera_pspec/grouping.py @@ -6,38 +6,38 @@ import copy -def group_baselines(bls, Ngroups, keep_remainder=False, randomize=False, +def group_baselines(bls, Ngroups, keep_remainder=False, randomize=False, seed=None): """ Group baselines together into equal-sized sets. - - These groups can be passed into PSpecData.pspec(), where the corresponding - baselines will be averaged together (grouping reduces the number of + + These groups can be passed into PSpecData.pspec(), where the corresponding + baselines will be averaged together (grouping reduces the number of cross-spectra that need to be computed). - + Parameters ---------- bls : list of tuples Set of unique baselines tuples. - + Ngroups : int - Number of groups to create. The groups will be equal in size, except + Number of groups to create. The groups will be equal in size, except the last group (if there are remainder baselines). - + keep_remainder : bool, optional - Whether to keep remainder baselines that don't fit exactly into the - number of specified groups. If True, the remainder baselines are - appended to the last group in the output list. Otherwise, the remainder + Whether to keep remainder baselines that don't fit exactly into the + number of specified groups. If True, the remainder baselines are + appended to the last group in the output list. Otherwise, the remainder baselines are discarded. Default: False. - + randomize : bool, optional - Whether baselines should be added to groups in the order they appear in + Whether baselines should be added to groups in the order they appear in 'bls', or if they should be assigned at random. Default: False. - + seed : int, optional - Random seed to use if randomize=True. If None, no random seed will be + Random seed to use if randomize=True. If None, no random seed will be set. Default: None. - + Returns ------- grouped_bls : list of lists of tuples @@ -46,19 +46,19 @@ def group_baselines(bls, Ngroups, keep_remainder=False, randomize=False, Nbls = len(bls) # Total baselines n = Nbls / Ngroups # Baselines per group rem = Nbls - n*Ngroups - + # Sanity check on number of groups if Nbls < Ngroups: raise ValueError("Can't have more groups than baselines.") - + # Make sure only tuples were provided (can't have groups of groups) for bl in bls: assert isinstance(bl, tuple) - + # Randomize baseline order if requested if randomize: if seed is not None: random.seed(seed) bls = copy.deepcopy(bls) random.shuffle(bls) - + # Assign to groups sequentially grouped_bls = [bls[i*n:(i+1)*n] for i in range(Ngroups)] if keep_remainder and rem > 0: grouped_bls[-1] += bls[-rem:] @@ -67,120 +67,120 @@ def group_baselines(bls, Ngroups, keep_remainder=False, randomize=False, def sample_baselines(bls, seed=None): """ - Sample a set of baselines with replacement (to be used to generate a + Sample a set of baselines with replacement (to be used to generate a bootstrap sample). - + Parameters ---------- bls : list of either tuples or lists of tuples - Set of unique baselines to be sampled from. If groups of baselines - (contained in lists) are provided, each group will be treated as a - single object by the sampler; its members will not be sampled + Set of unique baselines to be sampled from. If groups of baselines + (contained in lists) are provided, each group will be treated as a + single object by the sampler; its members will not be sampled separately. - + seed : int, optional - Random seed to use if randomize=True. If None, no random seed will be + Random seed to use if randomize=True. If None, no random seed will be set. Default: None. - + Returns ------- sampled_bls : list of tuples or lists of tuples - Bootstrap-sampled set of baselines (will include multiple instances of + Bootstrap-sampled set of baselines (will include multiple instances of some baselines). """ if seed is not None: random.seed(seed) - + # Sample with replacement; return as many baselines/groups as were input return [random.choice(bls) for i in range(len(bls))] -def select_common(uvp_list, spws=True, blpairs=True, times=True, pols=True, +def select_common(uvp_list, spws=True, blpairs=True, times=True, pols=True, inplace=False): """ - Find spectral windows, baseline-pairs, times, and/or polarizations that a - set of UVPSpec objects have in common and return new UVPSpec objects that + Find spectral windows, baseline-pairs, times, and/or polarizations that a + set of UVPSpec objects have in common and return new UVPSpec objects that contain only those data. - + If there is no overlap, an error will be raised. - + Parameters ---------- uvp_list : list of UVPSpec List of input UVPSpec objects. - + spws : bool, optional - Whether to retain only the spectral windows that all UVPSpec objects - have in common. For a spectral window to be retained, the entire set of - delays in that window must match across all UVPSpec objects (this will + Whether to retain only the spectral windows that all UVPSpec objects + have in common. For a spectral window to be retained, the entire set of + delays in that window must match across all UVPSpec objects (this will not allow you to just pull out common delays). - - If set to False, the original spectral windows will remain in each + + If set to False, the original spectral windows will remain in each UVPSpec. Default: True. - + blpairs : bool, optional - Whether to retain only the baseline-pairs that all UVPSpec objects have + Whether to retain only the baseline-pairs that all UVPSpec objects have in common. Default: True. - + times : bool, optional - Whether to retain only the (average) times that all UVPSpec objects - have in common. This does not check to make sure that time_1 and time_2 - (i.e. the LSTs for the left-hand and right-hand visibilities that went - into the power spectrum calculation) are the same. See the + Whether to retain only the (average) times that all UVPSpec objects + have in common. This does not check to make sure that time_1 and time_2 + (i.e. the LSTs for the left-hand and right-hand visibilities that went + into the power spectrum calculation) are the same. See the UVPSpec.time_avg_array attribute for more details. Default: True. - + pols : bool, optional - Whether to retain only the polarizations that all UVPSpec objects have + Whether to retain only the polarizations that all UVPSpec objects have in common. Default: True. - + inplace : bool, optional - Whether the selection should be applied to the UVPSpec objects + Whether the selection should be applied to the UVPSpec objects in-place, or new copies of the objects should be returned. - + Returns ------- uvp_list : list of UVPSpec, optional - List of UVPSpec objects with the overlap selection applied. This will + List of UVPSpec objects with the overlap selection applied. This will only be returned if inplace = False. """ if len(uvp_list) < 2: raise IndexError("uvp_list must contain two or more UVPSpec objects.") - + # Get times that are common to all UVPSpec objects in the list if times: common_times = np.unique(uvp_list[0].time_avg_array) - has_times = [np.isin(common_times, uvp.time_avg_array) + has_times = [np.isin(common_times, uvp.time_avg_array) for uvp in uvp_list] common_times = common_times[np.all(has_times, axis=0)] print "common_times:", common_times - + # Get baseline-pairs that are common to all if blpairs: common_blpairs = np.unique(uvp_list[0].blpair_array) - has_blpairs = [np.isin(common_blpairs, uvp.blpair_array) + has_blpairs = [np.isin(common_blpairs, uvp.blpair_array) for uvp in uvp_list] common_blpairs = common_blpairs[np.all(has_blpairs, axis=0)] print "common_blpairs:", common_blpairs - + # Get polarizations that are common to all if pols: common_pols = np.unique(uvp_list[0].pol_array) has_pols = [np.isin(common_pols, uvp.pol_array) for uvp in uvp_list] common_pols = common_pols[np.all(has_pols, axis=0)] print "common_pols:", common_pols - + # Get common spectral windows (the entire window must match) # Each row of common_spws is a list of that spw's index in each UVPSpec if spws: common_spws = [] for spw in range(uvp_list[0].Nspws): dlys0 = uvp_list[0].get_dlys((spw,)) - + # Check if this window exists in all UVPSpec objects found_spws = [spw, ] missing = False for uvp in uvp_list: # Check if any spws in this UVPSpec match the current spw - matches_spw = np.array([ np.array_equal(dlys0, uvp.get_dlys((i,))) + matches_spw = np.array([ np.array_equal(dlys0, uvp.get_dlys((i,))) for i in range(uvp.Nspws) ]) if not np.any(matches_spw): missing = True @@ -188,122 +188,130 @@ def select_common(uvp_list, spws=True, blpairs=True, times=True, pols=True, else: # Store which spw of this UVPSpec it was found in found_spws.append( np.where(matches_spw)[0] ) - + # Only add this spw to the list if it was found in all UVPSpecs if missing: continue common_spws.append(found_spws) common_spws = np.array(common_spws).T # Transpose print "common_spws:", common_spws - + # Check that this won't be an empty selection if spws and len(common_spws) == 0: raise ValueError("No spectral windows were found that exist in all " "spectra (the entire spectral window must match).") - + if blpairs and len(common_blpairs) == 0: raise ValueError("No baseline-pairs were found that exist in all spectra.") - + if times and len(common_times) == 0: raise ValueError("No times were found that exist in all spectra.") - + if pols and len(common_pols) == 0: raise ValueError("No polarizations were found that exist in all spectra.") - + # Apply selections out_list = [] for i, uvp in enumerate(uvp_list): _spws, _blpairs, _times, _pols = None, None, None, None - + # Set indices of blpairs, times, and pols to keep if blpairs: _blpairs = common_blpairs if times: _times = common_times if pols: _pols = common_pols if spws: _spws = common_spws[i] - - _uvp = uvp.select(spws=_spws, blpairs=_blpairs, times=_times, + + _uvp = uvp.select(spws=_spws, blpairs=_blpairs, times=_times, pols=_pols, inplace=inplace) if not inplace: out_list.append(_uvp) - + # Return if not inplace if not inplace: return out_list -def average_spectra(uvp_in, blpair_groups=None, time_avg=False, - blpair_weights=None, normalize_weights=True, inplace=True): +def average_spectra(uvp_in, blpair_groups=None, time_avg=False, + blpair_weights=None, error_field=None, + normalize_weights=True, inplace=True): """ - Average power spectra across the baseline-pair-time axis, weighted by + Average power spectra across the baseline-pair-time axis, weighted by each spectrum's integration time. - - This is an "incoherent" average, in the sense that this averages power - spectra, rather than visibility data. The 'nsample_array' and + + This is an "incoherent" average, in the sense that this averages power + spectra, rather than visibility data. The 'nsample_array' and 'integration_array' will be updated to reflect the averaging. - In the case of averaging across baseline pairs, the resultant averaged - spectrum is assigned to the zeroth blpair in the group. In the case of - time averaging, the time and LST arrays are assigned to the mean of the + In the case of averaging across baseline pairs, the resultant averaged + spectrum is assigned to the zeroth blpair in the group. In the case of + time averaging, the time and LST arrays are assigned to the mean of the averaging window. - Note that this is designed to be separate from spherical binning in k: - here we are not connecting k_perp modes to k_para modes. However, if - blpairs holds groups of iso baseline separation, then this is + Note that this is designed to be separate from spherical binning in k: + here we are not connecting k_perp modes to k_para modes. However, if + blpairs holds groups of iso baseline separation, then this is equivalent to cylindrical binning in 3D k-space. - If you want help constructing baseline-pair groups from baseline + If you want help constructing baseline-pair groups from baseline groups, see self.get_blpair_groups_from_bl_groups. Parameters ---------- uvp_in : UVPSpec Input power spectrum (to average over). - + blpair_groups : list of baseline-pair groups - List of list of tuples or integers. All power spectra in a - baseline-pair group are averaged together. If a baseline-pair + List of list of tuples or integers. All power spectra in a + baseline-pair group are averaged together. If a baseline-pair exists in more than one group, a warning is raised. - - Ex: blpair_groups = [ [((1, 2), (1, 2)), ((2, 3), (2, 3))], - [((4, 6), (4, 6))]] + + Ex: blpair_groups = [ [((1, 2), (1, 2)), ((2, 3), (2, 3))], + [((4, 6), (4, 6))]] or blpair_groups = [ [1002001002, 2003002003], [4006004006] ] time_avg : bool, optional If True, average power spectra across the time axis. Default: False. - + blpair_weights : list of weights (float or int), optional - Relative weight of each baseline-pair when performing the average. This - is useful for bootstrapping. This should have the same shape as - blpair_groups if specified. The weights are automatically normalized - within each baseline-pair group. Default: None (all baseline pairs have + Relative weight of each baseline-pair when performing the average. This + is useful for bootstrapping. This should have the same shape as + blpair_groups if specified. The weights are automatically normalized + within each baseline-pair group. Default: None (all baseline pairs have unity weights). - + + error_field: string or list, optional + If errorbars have been entered into stats_array, will do a weighted + sum to shrink the error bars down to the size of the averaged + data_array. Error_field strings be keys of stats_array. If list, + does this for every specified key. Every stats_array key that is + not specified is thrown out of the new averaged object. + normalize_weights: bool, optional Whether to normalize the baseline-pair weights so that: Sum(blpair_weights) = N_blpairs If False, no normalization is applied to the weights. Default: True. - + inplace : bool, optional - If True, edit data in self, else make a copy and return. Default: + If True, edit data in self, else make a copy and return. Default: True. Notes ----- - Currently, every baseline-pair in a blpair group must have the same - Ntimes, unless time_avg=True. Future versions may support - baseline-pair averaging of heterogeneous time arrays. This includes - the scenario of repeated blpairs (e.g. in bootstrapping), which will + Currently, every baseline-pair in a blpair group must have the same + Ntimes, unless time_avg=True. Future versions may support + baseline-pair averaging of heterogeneous time arrays. This includes + the scenario of repeated blpairs (e.g. in bootstrapping), which will return multiple copies of their time_array. """ if inplace: uvp = uvp_in else: uvp = copy.deepcopy(uvp_in) - + # Copy these, so we don't modify the input lists blpair_groups = copy.deepcopy(blpair_groups) blpair_weights = copy.deepcopy(blpair_weights) # If blpair_groups were fed in, enforce type and structure if blpair_groups is not None: - + # Enforce shape of blpair_groups assert isinstance(blpair_groups[0], list), \ "blpair_groups must be fed as a list of baseline-pair lists. " \ @@ -311,7 +319,7 @@ def average_spectra(uvp_in, blpair_groups=None, time_avg=False, # Convert blpair_groups to list of blpair group integers if isinstance(blpair_groups[0][0], tuple): - new_blpair_grps = [map(lambda blp: uvp.antnums_to_blpair(blp), blpg) + new_blpair_grps = [map(lambda blp: uvp.antnums_to_blpair(blp), blpg) for blpg in blpair_groups] blpair_groups = new_blpair_grps else: @@ -323,10 +331,10 @@ def average_spectra(uvp_in, blpair_groups=None, time_avg=False, # Print warning if a blpair appears more than once in all of blpair_groups all_blpairs = [item for sublist in blpair_groups for item in sublist] - if len(set(all_blpairs)) < len(all_blpairs): + if len(set(all_blpairs)) < len(all_blpairs): print("Warning: some baseline-pairs are repeated between blpair "\ "averaging groups.") - + # Create baseline-pair weights list if not specified if blpair_weights is None: # Assign unity weights to baseline-pair groups that were specified @@ -339,7 +347,21 @@ def average_spectra(uvp_in, blpair_groups=None, time_avg=False, except: raise IndexError("blpair_weights must have the same shape as " "blpair_groups") - + + # stat_l is a list of supplied error_fields, to sum over. + if isinstance(error_field, (list, tuple, np.ndarray)): + stat_l = list(error_field) + elif isinstance(error_field, (str, np.str)): + stat_l = [error_field] + else: + stat_l = [] + + for stat in stat_l: + if hasattr(uvp, "stats_array"): + if stat not in uvp.stats_array.keys(): + raise KeyError("error_field \"%s\" not found in stats_array keys." % stat) + + # For baseline pairs not in blpair_groups, add them as their own group extra_blpairs = set(uvp.blpair_array) - set(all_blpairs) blpair_groups += map(lambda blp: [blp], extra_blpairs) @@ -348,66 +370,100 @@ def average_spectra(uvp_in, blpair_groups=None, time_avg=False, # Create new data arrays data_array, wgts_array = odict(), odict() ints_array, nsmp_array = odict(), odict() + stats_array = odict([[stat, odict()] for stat in stat_l]) + + # will average covariance array by default + store_cov = hasattr(uvp,"cov_array") + if store_cov: + cov_array = odict() + # Iterate over spectral windows for spw in range(uvp.Nspws): spw_data, spw_wgts, spw_ints, spw_nsmp = [], [], [], [] - + spw_stats = odict([[stat, []] for stat in stat_l]) + if store_cov: + spw_cov = [] # Iterate over polarizations for i, p in enumerate(uvp.pol_array): pol_data, pol_wgts, pol_ints, pol_nsmp = [], [], [], [] + pol_stats = odict([[stat, []] for stat in stat_l]) + if store_cov: + pol_cov = [] # Iterate over baseline-pair groups for j, blpg in enumerate(blpair_groups): bpg_data, bpg_wgts, bpg_ints, bpg_nsmp = [], [], [], [] + bpg_stats = odict([[stat, []] for stat in stat_l]) + if store_cov: + bpg_cov = [] w_list = [] - - # Sum over all weights within this baseline group to get - # normalization (if weights specified). The normalization is + + # Sum over all weights within this baseline group to get + # normalization (if weights specified). The normalization is # calculated so that Sum (blpair wgts) = no. baselines. if blpair_weights is not None: blpg_wgts = np.array(blpair_weights[j]) norm = np.sum(blpg_wgts) if normalize_weights else 1. - + if norm <= 0.: raise ValueError("Sum of baseline-pair weights in " "group %d is <= 0." % j) blpg_wgts *= float(blpg_wgts.size) / norm # Apply normalization else: blpg_wgts = np.ones(len(blpg)) - + # Iterate within a baseline-pair group and get integration- # weighted data for k, blp in enumerate(blpg): - + # Get no. samples and construct integration weight nsmp = uvp.get_nsamples(spw, blp, p)[:, None] data = uvp.get_data(spw, blp, p) wgts = uvp.get_wgts(spw, blp, p) ints = uvp.get_integrations(spw, blp, p)[:, None] w = (ints * np.sqrt(nsmp)) - + if store_cov: + cov = uvp.get_cov(spw,blp,p) + # Get error bar weights and set invalid ones to zero. + errws = {} + for stat in stat_l: + errs = uvp.get_stats(stat, (spw, blp, p)).copy() + no_data = np.isnan(errs) + errs[~no_data] = 1. / errs[~no_data] ** 2. + errs[no_data] = 0. + errws[stat] = errs + # Take time average if desired if time_avg: data = (np.sum(data * w, axis=0) \ / np.sum(w, axis=0).clip(1e-10, np.inf))[None] wgts = (np.sum(wgts * w[:, None], axis=0) \ - / np.sum(w, axis=0).clip(1e-10, np.inf)[:, None])[None] + / np.sum(w, axis=0).clip(1e-10, np.inf)[:, None])[None] ints = (np.sum(ints * w, axis=0) \ / np.sum(w, axis=0).clip(1e-10, np.inf))[None] nsmp = np.sum(nsmp, axis=0)[None] w = np.sum(w, axis=0)[None] - - # Add multiple copies of data for each baseline according + if store_cov: + cov = (np.sum(cov * w[:, None], axis=0) \ + / np.sum(w,axis=0).clip(1e-10, np.inf))[None] + + for stat in stat_l: + errws[stat] = np.sum(errws[stat], axis=0)[None] + # Add multiple copies of data for each baseline according # to the weighting/multiplicity for m in range(int(blpg_wgts[k])): bpg_data.append(data * w) bpg_wgts.append(wgts * w[:, None]) bpg_ints.append(ints * w) bpg_nsmp.append(nsmp) + [bpg_stats[stat].append(errws[stat]) for stat in stat_l] w_list.append(w) + if store_cov: + bpg_cov.append(cov * w[:, None]) - # Take integration-weighted averages, with clipping to deal + + # Take integration-weighted averages, with clipping to deal # with zeros bpg_data = np.sum(bpg_data, axis=0) \ / np.sum(w_list, axis=0).clip(1e-10, np.inf) @@ -416,32 +472,54 @@ def average_spectra(uvp_in, blpair_groups=None, time_avg=False, bpg_nsmp = np.sum(bpg_nsmp, axis=0) bpg_ints = np.sum(bpg_ints, axis=0) \ / np.sum(w_list, axis=0).clip(1e-10, np.inf) + if store_cov: + bpg_cov = np.sum(bpg_cov, axis=0) \ + / np.sum(w_list, axis=0)[:,None].clip(1e-10, np.inf) w_list = np.sum(w_list, axis=0) - + + # For errors that aren't 0, sum weights and take inverse square + # root. Otherwise, set to invalid value -99. + for stat in stat_l: + arr = np.sum(bpg_stats[stat], axis=0) + not_valid = np.isclose(arr, 0., 1e-10) + arr[not_valid] *= np.nan + arr[~not_valid] = arr[~not_valid] ** (-0.5) + bpg_stats[stat] = arr + # Append to lists (polarization) pol_data.extend(bpg_data); pol_wgts.extend(bpg_wgts) pol_ints.extend(bpg_ints); pol_nsmp.extend(bpg_nsmp) + [pol_stats[stat].extend(bpg_stats[stat]) for stat in stat_l] + if store_cov: + pol_cov.extend(bpg_cov) # Append to lists (spectral window) spw_data.append(pol_data); spw_wgts.append(pol_wgts) spw_ints.append(pol_ints); spw_nsmp.append(pol_nsmp) + [spw_stats[stat].append(pol_stats[stat]) for stat in stat_l] + if store_cov: + spw_cov.append(pol_cov) # Append to dictionaries data_array[spw] = np.moveaxis(spw_data, 0, -1) wgts_array[spw] = np.moveaxis(spw_wgts, 0, -1) ints_array[spw] = np.moveaxis(spw_ints, 0, -1)[:, 0, :] nsmp_array[spw] = np.moveaxis(spw_nsmp, 0, -1)[:, 0, :] + for stat in stat_l: + stats_array[stat][spw] = np.moveaxis(spw_stats[stat], 0, -1) + if store_cov: + cov_array[spw] = np.moveaxis(np.array(spw_cov), 0, -1) # Iterate over blpair groups one more time to assign metadata time_1, time_2, time_avg_arr = [], [], [] lst_1, lst_2, lst_avg_arr = [], [], [] blpair_arr, bl_arr = [], [] - + for i, blpg in enumerate(blpair_groups): - + # Get blpairts indices for zeroth blpair in this group blpairts = uvp.blpair_to_indices(blpg[0]) - + # Assign meta-data bl_arr.extend(list(uvputils._blpair_to_bls(blpg[0]))) if time_avg: @@ -470,50 +548,56 @@ def average_spectra(uvp_in, blpair_groups=None, time_avg=False, uvp.Nblpairts = len(time_avg_arr) uvp.Nblpairs = len(np.unique(blpair_arr)) uvp.Nbls = len(bl_arr) - + # Baselines uvp.bl_array = bl_arr uvp.bl_vecs = bl_vecs uvp.blpair_array = np.array(blpair_arr) - + # Times uvp.time_1_array = np.array(time_1) uvp.time_2_array = np.array(time_2) uvp.time_avg_array = np.array(time_avg_arr) - + # LSTs uvp.lst_1_array = np.array(lst_1) uvp.lst_2_array = np.array(lst_2) uvp.lst_avg_array = np.array(lst_avg_arr) - + # Data, weights, and no. samples uvp.data_array = data_array uvp.integration_array = ints_array uvp.wgt_array = wgts_array uvp.nsample_array = nsmp_array + if store_cov: + uvp.cov_array = cov_array if hasattr(uvp_in, 'label1'): uvp.label1 = uvp_in.label1 if hasattr(uvp_in, 'label2'): uvp.label2 = uvp_in.label2 + if error_field is not None: + uvp.stats_array = stats_array + elif hasattr(uvp, "stats_array"): + delattr(uvp, "stats_array") + # Validity check uvp.check() - + # Return if inplace == False: return uvp - def fold_spectra(uvp): """ - Average bandpowers from matching positive and negative delay bins onto a - purely positive delay axis. Negative delay bins are still populated, but + Average bandpowers from matching positive and negative delay bins onto a + purely positive delay axis. Negative delay bins are still populated, but are filled with zero power. This is an in-place operation. - - Will only work if uvp.folded == False, i.e. data is currently unfolded - across negative and positive delay. Because this averages the data, the - nsample array is multiplied by a factor of 2. - + + Will only work if uvp.folded == False, i.e. data is currently unfolded + across negative and positive delay. Because this averages the data, the + nsample array is multiplied by a factor of 2. + WARNING: This operation cannot be undone. - + Parameters ---------- uvp : UVPSpec @@ -539,7 +623,7 @@ def fold_spectra(uvp): else: # odd number of dlys left = uvp.data_array[spw][:, :Ndlys//2, :][:, ::-1, :] - right = uvp.data_array[spw][:, Ndlys//2+1:, :] + right = uvp.data_array[spw][:, Ndlys//2+1:, :] uvp.data_array[spw][:, Ndlys//2+1:, :] = np.mean([left, right], axis=0) uvp.data_array[spw][:, :Ndlys//2, :] = 0.0 uvp.nsample_array[spw] *= 2.0 @@ -547,58 +631,58 @@ def fold_spectra(uvp): uvp.folded = True -def bootstrap_average_blpairs(uvp_list, blpair_groups, time_avg=False, +def bootstrap_average_blpairs(uvp_list, blpair_groups, time_avg=False, seed=None): """ - Generate a bootstrap-sampled average over a set of power spectra. The - sampling is done over user-defined groups of baseline pairs (with + Generate a bootstrap-sampled average over a set of power spectra. The + sampling is done over user-defined groups of baseline pairs (with replacement). - - Multiple UVPSpec objects can be passed to this function. The bootstrap - sampling is carried out over all of the available baseline-pairs (in a - user-specified group) across all UVPSpec objects. (This means that each - UVPSpec object can contribute a different number of baseline-pairs to the + + Multiple UVPSpec objects can be passed to this function. The bootstrap + sampling is carried out over all of the available baseline-pairs (in a + user-specified group) across all UVPSpec objects. (This means that each + UVPSpec object can contribute a different number of baseline-pairs to the average each time.) - + Successive calls to the function will produce different random samples. - + Parameters ---------- uvp_list : list of UVPSpec objects List of UVPSpec objects to form a bootstrap sample from. - - The UVPSpec objects can have different numbers of baseline-pairs and - times, but the averages will only be taken over spectral windows and + + The UVPSpec objects can have different numbers of baseline-pairs and + times, but the averages will only be taken over spectral windows and polarizations that match across all UVPSpec objects. - + blpair_groups : list of baseline-pair groups - List of baseline-pair groups, where each group is a list of tuples or - integers. The bootstrap sampling and averaging is done over the + List of baseline-pair groups, where each group is a list of tuples or + integers. The bootstrap sampling and averaging is done over the baseline-pairs within each group. - - There is no requirement for each UVPSpec object to contain all - baseline-pairs within each specified blpair group, as long as they + + There is no requirement for each UVPSpec object to contain all + baseline-pairs within each specified blpair group, as long as they exist in at least one of the UVPSpec objects. - + time_avg : bool, optional Whether to average over the time axis or not. Default: False. - + seed : int, optional Random seed to use when drawing baseline-pairs. - + Returns ------- uvp_avg_list : list of UVPSpec - List of UVPSpec objects containing averaged power spectra. - - Each object is an averaged version of the corresponding input UVPSpec - object. Each average is over the baseline-pairs sampled from that - object only; to return the true bootstrap average, these objects + List of UVPSpec objects containing averaged power spectra. + + Each object is an averaged version of the corresponding input UVPSpec + object. Each average is over the baseline-pairs sampled from that + object only; to return the true bootstrap average, these objects should be averaged together. - + blpair_wgts_list : list of lists of integers - List of weights used for each baseline-pair in each group, from each - UVPSpec object. This describes how the bootstrap sample was generated. + List of weights used for each baseline-pair in each group, from each + UVPSpec object. This describes how the bootstrap sample was generated. Shape: (len(uvp_list), shape(blpair_groups)). """ # Validate input data @@ -606,41 +690,41 @@ def bootstrap_average_blpairs(uvp_list, blpair_groups, time_avg=False, if isinstance(uvp_list, UVPSpec): uvp_list = [uvp_list,] assert isinstance(uvp_list, list), \ "uvp_list must be a list of UVPSpec objects" - + # Check that uvp_list contains UVPSpec objects with the correct dimensions for uvp in uvp_list: assert isinstance(uvp, UVPSpec), \ "uvp_list must be a list of UVPSpec objects" - + # Check for same length of time axis if no time averaging will be done if not time_avg: if np.unique([uvp.Ntimes for uvp in uvp_list]).size > 1: raise IndexError("Input UVPSpec objects must have the same number " "of time samples if time_avg is False.") - + # Check that blpair_groups is a list of lists of groups assert isinstance(blpair_groups, list), \ "blpair_groups must be a list of lists of baseline-pair tuples/integers" for grp in blpair_groups: assert isinstance(grp, list), \ "blpair_groups must be a list of lists of baseline-pair tuples/integers" - + # Convert blpair tuples into blpair integers if necessary if isinstance(blpair_groups[0][0], tuple): - new_blp_grps = [map(lambda blp: uvputils._antnums_to_blpair(blp), blpg) + new_blp_grps = [map(lambda blp: uvputils._antnums_to_blpair(blp), blpg) for blpg in blpair_groups] blpair_groups = new_blp_grps - - # Homogenise input UVPSpec objects in terms of available polarizations + + # Homogenise input UVPSpec objects in terms of available polarizations # and spectral windows if len(uvp_list) > 1: uvp_list = select_common(uvp_list, spws=True, pols=True, inplace=False) - + # Loop over UVPSpec objects, looking for available blpairs in each avail_blpairs = [np.unique(uvp.blpair_array) for uvp in uvp_list] - all_avail_blpairs = np.unique([blp for blplist in avail_blpairs + all_avail_blpairs = np.unique([blp for blplist in avail_blpairs for blp in blplist]) - + # Check that all requested blpairs exist in at least one UVPSpec object all_req_blpairs = np.unique([blp for grp in blpair_groups for blp in grp]) missing = [] @@ -650,29 +734,29 @@ def bootstrap_average_blpairs(uvp_list, blpair_groups, time_avg=False, raise KeyError("The following baseline-pairs were specified, but do " "not exist in any of the input UVPSpec objects: %s" \ % str(missing)) - + # Set random seed if specified if seed is not None: np.random.seed(seed) - - # Sample with replacement from the full list of available baseline-pairs - # in each group, and create a blpair_group and blpair_weights entry for + + # Sample with replacement from the full list of available baseline-pairs + # in each group, and create a blpair_group and blpair_weights entry for # each UVPSpec object blpair_grps_list = [[] for uvp in uvp_list] blpair_wgts_list = [[] for uvp in uvp_list] - + # Loop over each requested blpair group for grp in blpair_groups: - + # Find which blpairs in this group are available in each UVPSpec object avail = [np.intersect1d(grp, blp_list) for blp_list in avail_blpairs] avail_flat = [blp for lst in avail for blp in lst] num_avail = len(avail_flat) - - # Draw set of random integers (with replacement) and convert into + + # Draw set of random integers (with replacement) and convert into # list of weights for each blpair in each UVPSpec draw = np.random.randint(low=0, high=num_avail, size=num_avail) wgt = np.array([(draw == i).sum() for i in range(num_avail)]) - + # Extract the blpair weights for each UVPSpec j = 0 for i in range(len(uvp_list)): @@ -681,16 +765,15 @@ def bootstrap_average_blpairs(uvp_list, blpair_groups, time_avg=False, _wgts = wgt[np.arange(j, j+n_blps)].astype(float) #+ 1e-4 blpair_wgts_list[i].append( list(_wgts) ) j += n_blps - - # Loop over UVPSpec objects and calculate averages in each blpair group, + + # Loop over UVPSpec objects and calculate averages in each blpair group, # using the bootstrap-sampled blpair weights uvp_avg = [] for i, uvp in enumerate(uvp_list): - _uvp = average_spectra(uvp, blpair_groups=blpair_grps_list[i], - blpair_weights=blpair_wgts_list[i], + _uvp = average_spectra(uvp, blpair_groups=blpair_grps_list[i], + blpair_weights=blpair_wgts_list[i], time_avg=time_avg, inplace=False) uvp_avg.append(_uvp) - + # Return list of averaged spectra for now return uvp_avg, blpair_wgts_list - diff --git a/hera_pspec/tests/test_uvpspec.py b/hera_pspec/tests/test_uvpspec.py index a5615f97..f3bbb10b 100644 --- a/hera_pspec/tests/test_uvpspec.py +++ b/hera_pspec/tests/test_uvpspec.py @@ -85,6 +85,31 @@ def test_get_funcs(self): nt.assert_equal(keys, [(0, ((1, 2), (1, 2)), 'XX'), (0, ((1, 3), (1, 3)), 'XX'), (0, ((2, 3), (2, 3)), 'XX')]) + def test_stats_array(self): + # test get_data and set_data + keys = self.uvp.get_all_keys() + nt.assert_raises(ValueError, self.uvp.set_stats, "errors", keys[0], np.linspace(0, 1, 2)) + nt.assert_raises(AttributeError, self.uvp.get_stats, "__", keys[0]) + errs = np.ones((self.uvp.Ntimes, self.uvp.Ndlys)) + self.uvp.set_stats("errors", keys[0], errs) + e = self.uvp.get_stats("errors", keys[0]) + nt.assert_true(np.all(self.uvp.get_stats("errors", keys[0]) == errs)) + nt.assert_true(np.all(np.isnan(self.uvp.get_stats("errors", keys[1])))) + + #self.uvp.set_stats("errors", keys[0], -99.) + blpairs = self.uvp.get_blpairs() + u = self.uvp.average_spectra([blpairs], time_avg=False, error_field="errors", inplace=False) + nt.assert_true(np.all(u.get_stats("errors", keys[0])[0] == np.ones(u.Ndlys))) + self.uvp.set_stats("who?", keys[0], errs) + u = self.uvp.average_spectra([blpairs], time_avg=False, error_field=["errors", "who?"], inplace=False) + nt.assert_true(np.all( u.get_stats("errors", keys[0]) == u.get_stats("who?", keys[0]))) + u.select(times=np.unique(u.time_avg_array)[:20]) + if os.path.exists('./ex.hdf5'): + os.remove('./ex.hdf5') + u.write_hdf5('./ex.hdf5') + u.read_hdf5('./ex.hdf5') + os.remove('./ex.hdf5') + def test_convert_deltasq(self): uvp = copy.deepcopy(self.uvp) uvp.convert_to_deltasq(little_h=True) diff --git a/hera_pspec/uvpspec.py b/hera_pspec/uvpspec.py index b50b517f..3d8237c3 100644 --- a/hera_pspec/uvpspec.py +++ b/hera_pspec/uvpspec.py @@ -44,6 +44,9 @@ def __init__(self): desc = "Nsample dictionary, if the pspectra have been incoherently averaged (i.e. averaged after squaring), this is " \ "the effective number of samples in that average (float type). This is not the same as the pyuvdata.UVData nsample_array." self._nsample_array = PSpecParam("nsample_array", description=desc, expected_type=np.float64, form="(Nblpairts, Npols)") + desc = ("Power spectrum stats array with stats type and spw integer as keys and values as complex ndarrays with same shape" + " as data_array") + self._stats_array = PSpecParam("stats_array", description=desc, expected_type=np.complex128, form="(Nblpairts, Ndlys, Npols)") self._spw_array = PSpecParam("spw_array", description="Spw integer array.", form="(Nspwdlys,)", expected_type=np.uint16) self._freq_array = PSpecParam("freq_array", description="Frequency array of the original data in Hz.", form="(Nfreqs,)", expected_type=np.float64) self._dly_array = PSpecParam("dly_array", description="Delay array in seconds.", form="(Nspwdlys,)", expected_type=np.float64) @@ -80,44 +83,91 @@ def __init__(self): self._OmegaPP = PSpecParam("OmegaP", description="Integral of unitless beam power squared over the sky [steradians].", form="(Nbeam_freqs, Npols)", expected_type=np.float64) self._beam_freqs = PSpecParam("beam_freqs", description="Frequency bins of the OmegaP and OmegaPP beam-integral arrays [Hz].", form="(Nbeam_freqs,)", expected_type=np.float64) self._cosmo = PSpecParam("cosmo", description="Instance of conversion.Cosmo_Conversions class.", expected_type=conversions.Cosmo_Conversions) - # collect all parameters: required and non-required - self._all_params = sorted(map(lambda p: p[1:], fnmatch.filter(self.__dict__.keys(), '_*'))) - - # specify required params: these are required for read / write and self.check() - self._req_params = ["Ntimes", "Nblpairts", "Nblpairs", "Nspwdlys", "Nspws", "Ndlys", - "Npols", "Nfreqs", "history", "data_array", "wgt_array", "integration_array", - "spw_array", "freq_array", "dly_array", "pol_array", "lst_1_array", - "lst_2_array", "time_1_array", "time_2_array", "blpair_array", "Nbls", - "bl_vecs", "bl_array", "channel_width", "telescope_location", "weighting", - "vis_units", "norm_units", "taper", "norm", "nsample_array", 'lst_avg_array', - 'time_avg_array', 'folded', "scalar_array", "labels", "label_1_array", + + # Collect all parameters: required and non-required + self._all_params = sorted(map(lambda p: p[1:], + fnmatch.filter(self.__dict__.keys(), '_*'))) + + # Specify required params: these are required for read / write and + # self.check() + self._req_params = ["Ntimes", "Nblpairts", "Nblpairs", "Nspwdlys", + "Nspws", "Ndlys", "Npols", "Nfreqs", "history", + "data_array", "wgt_array", "integration_array", + "spw_array", "freq_array", "dly_array", "pol_array", + "lst_1_array", "lst_2_array", "time_1_array", + "time_2_array", "blpair_array", "Nbls", + "bl_vecs", "bl_array", "channel_width", + "telescope_location", "weighting", "vis_units", + "norm_units", "taper", "norm", "nsample_array", + "lst_avg_array", "time_avg_array", "folded", + "scalar_array", "labels", "label_1_array", "label_2_array","store_cov"] - # all parameters must fall into one and only one of the following groups, which are used in __eq__ - self._immutables = ["Ntimes", "Nblpairts", "Nblpairs", "Nspwdlys", "Nspws", "Ndlys", - "Npols", "Nfreqs", "history", "Nbls", "channel_width", "weighting", - "vis_units", "norm", "norm_units", "taper", "cosmo", "beamfile" ,'folded',"store_cov"] - self._ndarrays = ["spw_array", "freq_array", "dly_array", "pol_array", "lst_1_array", - 'lst_avg_array', 'time_avg_array', "lst_2_array", "time_1_array", - "time_2_array", "blpair_array", "OmegaP", "OmegaPP", "beam_freqs", - "bl_vecs", "bl_array", "telescope_location", "scalar_array", 'labels', - 'label_1_array', 'label_2_array'] - self._dicts = ["data_array", "wgt_array", "integration_array", "nsample_array", "cov_array"] + # All parameters must fall into one and only one of the following + # groups, which are used in __eq__ + self._immutables = ["Ntimes", "Nblpairts", "Nblpairs", "Nspwdlys", + "Nspws", "Ndlys", "Npols", "Nfreqs", "history", + "Nbls", "channel_width", "weighting", "vis_units", + "norm", "norm_units", "taper", "cosmo", "beamfile", + 'folded', "store_cov"] + self._ndarrays = ["spw_array", "freq_array", "dly_array", "pol_array", + "lst_1_array", "lst_avg_array", "time_avg_array", + "lst_2_array", "time_1_array", "time_2_array", + "blpair_array", "OmegaP", "OmegaPP", "beam_freqs", + "bl_vecs", "bl_array", "telescope_location", + "scalar_array", "labels", "label_1_array", + "label_2_array"] + + self._dicts = ["data_array", "wgt_array", "integration_array", + "nsample_array", "cov_array"] + self._dicts_of_dicts = ["stats_array"] # define which attributes are considred meta data. Large attrs should be constructed as datasets - self._meta_dsets = ["lst_1_array", "lst_2_array", "time_1_array", "time_2_array", "blpair_array", - "bl_vecs", "bl_array", 'lst_avg_array', 'time_avg_array', 'OmegaP', 'OmegaPP', - "label_1_array", "label_2_array"] - self._meta_attrs = sorted(set(self._all_params) - set(self._dicts) - set(self._meta_dsets)) + self._meta_dsets = ["lst_1_array", "lst_2_array", "time_1_array", + "time_2_array", "blpair_array", "bl_vecs", + "bl_array", "lst_avg_array", "time_avg_array", + "OmegaP", "OmegaPP", "label_1_array", + "label_2_array"] + self._meta_attrs = sorted(set(self._all_params) - set(self._dicts) - set(self._meta_dsets) - set(self._dicts_of_dicts)) self._meta = sorted(set(self._meta_dsets).union(set(self._meta_attrs))) # check all params are covered - assert len(set(self._all_params) - set(self._dicts) - set(self._immutables) - set(self._ndarrays)) == 0 + assert len(set(self._all_params) - set(self._dicts) - set(self._immutables) - set(self._ndarrays) - set(self._dicts_of_dicts)) == 0 # Default parameter values self.folded = False + def get_cov(self,key,*args): + """ + Slice into covariance array with a specified data key in the format + (spw, ((ant1, ant2),(ant3, ant4)), pol) + + or + + (spw, blpair-integer, pol) + + where spw is the spectral window integer, ant1 etc. arge integers, + and pol is either a polarization string (ex. 'XX') or integer (ex. -5). + + Parameters + ---------- + key: tuple + Baseline-pair key + + Returns + ------- + data : complex ndarray + Shape (Ntimes, Ndlys, Ndlys) + """ + spw, blpairts, pol = self.key_to_indices(key, *args) + # Need to deal with folded data! + # if data has been folded, return only positive delays + if hasattr(self,'cov_array'): + return self.cov_array[spw][blpairts, :, :, pol] + else: + raise AttributeError("No covariance array has been calculated.") + def get_data(self, key, *args): """ Slice into data_array with a specified data key in the format @@ -394,6 +444,63 @@ def get_spw_ranges(self, spws=None): return spw_ranges + def get_stats(self, stat, key, *args): + """ + Returns a statistic from the stats_array dictionary. + + Parameters + ---------- + stat: string + The statistic to return. + + spw: int + Choice of spectral window. + """ + if not hasattr(self, "stats_array"): + raise AttributeError("No stats have been entered to this UVPSpec object") + + assert stat in self.stats_array.keys(), "Statistic name not found in stat keys." + + spw, blpairts, pol = self.key_to_indices(key, *args) + data = self.stats_array[stat] + return data[spw][blpairts, :, pol] + + def set_stats(self, stat, key, statistic, *args): + """ + Sets a statistic in the stats_array. + + Parameters + ---------- + stat: string + Name of the statistic. + + spw: int + Spectral window of the statistic. + + statistic: ndarray + Array with statistics to set. Must be same shape as the same slice + of data_array. + """ + spw, blpairts, pol = self.key_to_indices(key, *args) + statistic = np.asarray(statistic) + + if self.data_array[spw][blpairts, :, pol].shape != statistic.shape: + errmsg = ("Input array (shape %r) must match the shape of data_array" + "spectra (shape (%i, %i)).") % (statistic.shape, + self.Ntimes, + self.Ndlys) + raise ValueError(errmsg) + + if not hasattr(self, "stats_array"): + self.stats_array = odict() + + dtype = statistic.dtype + if stat not in self.stats_array.keys(): + self.stats_array[stat] = odict([[i, np.nan * np.ones(self.data_array[i].shape, dtype=dtype)] + for i in range(self.Nspws)]) + + self.stats_array[stat][spw][blpairts, :, pol] = statistic + def convert_to_deltasq(self, little_h=True, inplace=True): """ Convert from P(k) to Delta^2(k) by multiplying by k^3 / (2pi^2). @@ -897,6 +1004,7 @@ def write_to_group(self, group, run_check=True): Whether to run a validity check on the UVPSpec object before writing it to the HDF5 group. Default: True. """ + # Run check if run_check: self.check() @@ -934,6 +1042,15 @@ def write_to_group(self, group, run_check=True): data=self.cov_array[i], dtype=np.complex) + # Store any statistics arrays + if hasattr(self, "stats_array"): + for s in self.stats_array.keys(): + data = self.stats_array[s] + for i in np.unique(self.spw_array): + group.create_dataset("stats_{}_{}".format(s, i), + data=data[i], dtype=data[i].dtype) + + def write_hdf5(self, filepath, overwrite=False, run_check=True): """ Write a UVPSpec object to HDF5 file. @@ -1096,6 +1213,17 @@ def check(self, just_meta=False): except: raise AssertionError(err_msg) + elif p in self._dicts_of_dicts: + assert isinstance(getattr(self, p), (dict, odict)) + for k in getattr(self, p).keys(): + assert isinstance(getattr(self, p)[k], (dict, odict)) + for j in getattr(self, p)[k].keys(): + assert isinstance(getattr(self, p)[k][j], np.ndarray) + + try: + getattr(self, p)[k][j] = a.expected_type(getattr(self, p)[k][j]) + except: + raise AssertionError(err_msg) def _clear(self): """ Clear UVPSpec of all parameters. Warning: this cannot be undone. @@ -1302,8 +1430,9 @@ def generate_noise_spectra(self, spw, pol, Tsys, blpairs=None, little_h=True, return P_N + def average_spectra(self, blpair_groups=None, time_avg=False, - blpair_weights=None, inplace=True): + blpair_weights=None, error_field=None, inplace=True): """ Average power spectra across the baseline-pair-time axis, weighted by each spectrum's integration time. @@ -1328,9 +1457,10 @@ def average_spectra(self, blpair_groups=None, time_avg=False, Parameters ---------- blpair_groups : list - List of list of baseline-pair group tuples or integers. All power spectra - in a baseline-pair group are averaged together. If a baseline-pair - exists in more than one group, a warning is raised. Examples:: + List of list of baseline-pair group tuples or integers. All power + spectra in a baseline-pair group are averaged together. If a + baseline-pair exists in more than one group, a warning is raised. + Examples:: blpair_groups = [ [((1, 2), (1, 2)), ((2, 3), (2, 3))], [((4, 6), (4, 6))]] @@ -1349,6 +1479,13 @@ def average_spectra(self, blpair_groups=None, time_avg=False, normalized within each baseline-pair group. Default: None (all baseline pairs have unity weights). + error_field: string or list, optional + If errorbars have been entered into stats_array, will do a weighted + sum to shrink the error bars down to the size of the averaged + data_array. Error_field strings be keys of stats_array. If list, + does this for every specified key. Every stats_array key that is + not specified is thrown out of the new averaged object. + inplace : bool, optional If True, edit data in self, else make a copy and return. Default: True. @@ -1364,11 +1501,13 @@ def average_spectra(self, blpair_groups=None, time_avg=False, if inplace: grouping.average_spectra(self, blpair_groups=blpair_groups, time_avg=time_avg, + error_field=error_field, blpair_weights=blpair_weights, inplace=True) else: return grouping.average_spectra(self, blpair_groups=blpair_groups, time_avg=time_avg, + error_field=error_field, blpair_weights=blpair_weights, inplace=False) @@ -1409,7 +1548,8 @@ def get_blpair_groups_from_bl_groups(self, blgroups, only_pairs_in_bls=False): """ blpair_groups = [] for blg in blgroups: - blp_select = uvputils._get_blpairs_from_bls(self, blg, only_pairs_in_bls=only_pairs_in_bls) + blp_select = uvputils._get_blpairs_from_bls(self, blg, + only_pairs_in_bls=only_pairs_in_bls) blp = sorted(set(self.blpair_array[blp_select])) if len(blp) > 0: blpair_groups.append(blp) @@ -1450,7 +1590,8 @@ def compute_scalar(self, spw, pol, num_steps=1000, little_h=True, Power spectrum normalization scalar. """ # make assertions - assert hasattr(self, 'cosmo'), "self.cosmo object must exist to compute scalar. See self.set_cosmology()" + assert hasattr(self, 'cosmo'), \ + "self.cosmo object must exist to compute scalar. See self.set_cosmology()" assert hasattr(self, 'OmegaP') and hasattr(self, "OmegaPP") and hasattr(self, "beam_freqs"), "self.OmegaP, "\ "self.OmegaPP and self.beam_freqs must exist to compute scalar." @@ -1460,10 +1601,12 @@ def compute_scalar(self, spw, pol, num_steps=1000, little_h=True, # compute scalar OP = self.OmegaP[:, self.pol_to_indices(pol)].squeeze() OPP = self.OmegaPP[:, self.pol_to_indices(pol)].squeeze() - scalar = pspecbeam._compute_pspec_scalar(self.cosmo, self.beam_freqs, OPP / OP**2, spw_freqs, - num_steps=num_steps, taper=self.taper, little_h=little_h, + scalar = pspecbeam._compute_pspec_scalar(self.cosmo, self.beam_freqs, + OPP / OP**2, spw_freqs, + num_steps=num_steps, + taper=self.taper, + little_h=little_h, noise_scalar=noise_scalar) - return scalar def combine_uvpspec(uvps, verbose=True): @@ -1502,11 +1645,10 @@ def combine_uvpspec(uvps, verbose=True): Nblpairts = len(new_blpts) Npols = len(new_pols) - - # store covariance only if all uvps have stored covariance. + # Store covariance only if all uvps have stored covariance. store_cov = np.all([hasattr(uvp,'cov_array') for uvp in uvps]) - # Create new empty data arrays and fill spw arrays + # Create new empty data arrays and fill spw arrays u.data_array = odict() u.integration_array = odict() u.wgt_array = odict() @@ -1515,36 +1657,35 @@ def combine_uvpspec(uvps, verbose=True): u.cov_array = odict() u.scalar_array = np.empty((Nspws, Npols), np.float) u.freq_array, u.spw_array, u.dly_array = [], [], [] - + # Loop over spectral windows for i, spw in enumerate(new_spws): - + # Initialize new arrays u.data_array[i] = np.empty((Nblpairts, spw[2], Npols), np.complex128) u.integration_array[i] = np.empty((Nblpairts, Npols), np.float64) u.wgt_array[i] = np.empty((Nblpairts, spw[2], 2, Npols), np.float64) u.nsample_array[i] = np.empty((Nblpairts, Npols), np.float64) if store_cov: - u.cov_array[i]=np.empty((Nblpairts, spw[2], spw[2], Npols), np.complex128) - + u.cov_array[i] = np.empty((Nblpairts, spw[2], spw[2], Npols), + np.complex128) + # Set frequencies and delays spw_Nfreqs = spw[-1] spw_freqs = np.linspace(*spw, endpoint=False) spw_dlys = np.fft.fftshift( - np.fft.fftfreq(spw_Nfreqs, + np.fft.fftfreq(spw_Nfreqs, np.median(np.diff(spw_freqs)) ) ) u.spw_array.extend(np.ones(spw_Nfreqs, np.int32) * i) u.freq_array.extend(spw_freqs) u.dly_array.extend(spw_dlys) - - # Convert to numpy arrays + # Convert to numpy arrays u.spw_array = np.array(u.spw_array) u.freq_array = np.array(u.freq_array) u.dly_array = np.array(u.dly_array) u.pol_array = np.array(new_pols) - # Number of spectral windows, delays etc. u.Nspws = Nspws u.Nblpairts = Nblpairts @@ -1552,7 +1693,7 @@ def combine_uvpspec(uvps, verbose=True): u.Nfreqs = len(np.unique(u.freq_array)) u.Nspwdlys = len(u.spw_array) u.Ndlys = len(np.unique(u.dly_array)) - + # Prepare time and label arrays u.time_1_array, u.time_2_array = np.empty(Nblpairts, np.float), \ np.empty(Nblpairts, np.float) @@ -1569,38 +1710,38 @@ def combine_uvpspec(uvps, verbose=True): uvp_spws = [_uvp.get_spw_ranges() for _uvp in uvps] uvp_blpts = [zip(_uvp.blpair_array, _uvp.time_avg_array) for _uvp in uvps] uvp_pols = [_uvp.pol_array.tolist() for _uvp in uvps] - + # Construct dict of label indices, to be used for re-ordering later u_lbls = {lbl: ll for ll, lbl in enumerate(u.labels)} - + # fill in data arrays depending on concat ax if concat_ax == 'spw': - + # Concatenate spectral windows for i, spw in enumerate(new_spws): l = [spw in _u for _u in uvp_spws].index(True) m = [spw == _spw for _spw in uvp_spws[l]].index(True) - + # Lookup indices of new_blpts in the uvp_blpts[l] array blpts_idxs = uvputils._fast_lookup_blpairts(uvp_blpts[l], new_blpts) if i == 0: blpts_idxs0 = blpts_idxs.copy() - + # Loop over polarizations for k, p in enumerate(new_pols): q = uvp_pols[l].index(p) u.scalar_array[i,k] = uvps[l].scalar_array[m,q] - + # Loop over blpair-times for j, blpt in enumerate(new_blpts): - + n = blpts_idxs[j] - + # Data/weight/integration arrays u.data_array[i][j,:,k] = uvps[l].data_array[m][n,:,q] u.wgt_array[i][j,:,:,k] = uvps[l].wgt_array[m][n,:,:,q] u.integration_array[i][j,k] = uvps[l].integration_array[m][n, q] u.nsample_array[i][j,k] = uvps[l].integration_array[m][n,q] - + # Labels lbl1 = uvps[l].label_1_array[m,n,q] lbl2 = uvps[l].label_2_array[m,n,q] @@ -1620,26 +1761,26 @@ def combine_uvpspec(uvps, verbose=True): u.blpair_array[j] = uvps[0].blpair_array[n] elif concat_ax == 'blpairts': - - # Get mapping of blpair-time indices between old UVPSpec objects and + + # Get mapping of blpair-time indices between old UVPSpec objects and # the new one - blpts_idxs = np.concatenate( + blpts_idxs = np.concatenate( [uvputils._fast_lookup_blpairts(_blpts, new_blpts) for _blpts in uvp_blpts] ) - + is_in = [uvputils._fast_is_in(_blpts, new_blpts) for _blpts in uvp_blpts] # Concatenate blpair-times for j, blpt in enumerate(new_blpts): - + l = [isn[j] for isn in is_in].index(True) n = blpts_idxs[j] - + # Loop over spectral windows for i, spw in enumerate(new_spws): m = [spw == _spw for _spw in uvp_spws[l]].index(True) - + # Loop over polarizations for k, p in enumerate(new_pols): q = uvp_pols[l].index(p) @@ -1650,13 +1791,13 @@ def combine_uvpspec(uvps, verbose=True): u.wgt_array[i][j,:,:,k] = uvps[l].wgt_array[m][n,:,:,q] u.integration_array[i][j,k] = uvps[l].integration_array[m][n,q] u.nsample_array[i][j,k] = uvps[l].integration_array[m][n,q] - + # Labels lbl1 = uvps[l].label_1_array[m,n,q] lbl2 = uvps[l].label_2_array[m,n,q] u.label_1_array[i,j,k] = u_lbls[uvps[l].labels[lbl1]] u.label_2_array[i,j,k] = u_lbls[uvps[l].labels[lbl2]] - + # Populate new LST, time, and blpair arrays u.time_1_array[j] = uvps[l].time_1_array[n] u.time_2_array[j] = uvps[l].time_2_array[n] @@ -1667,22 +1808,22 @@ def combine_uvpspec(uvps, verbose=True): u.blpair_array[j] = uvps[l].blpair_array[n] elif concat_ax == 'pol': - + # Concatenate polarizations for k, p in enumerate(new_pols): l = [p in _pols for _pols in uvp_pols].index(True) q = uvp_pols[l].index(p) - - # Get mapping of blpair-time indices between old UVPSpec objects + + # Get mapping of blpair-time indices between old UVPSpec objects # and the new one blpts_idxs = uvputils._fast_lookup_blpairts(uvp_blpts[l], new_blpts) - if k == 0: blpts_idxs0 = blpts_idxs.copy() - + if k == 0: blpts_idxs0 = blpts_idxs.copy() + # Loop over spectral windows for i, spw in enumerate(new_spws): m = [spw == _spw for _spw in uvp_spws[l]].index(True) u.scalar_array[i,k] = uvps[l].scalar_array[m,q] - + # Loop over blpair-times for j, blpt in enumerate(new_blpts): n = blpts_idxs[j] @@ -1692,7 +1833,7 @@ def combine_uvpspec(uvps, verbose=True): u.wgt_array[i][j,:,:,k] = uvps[l].wgt_array[m][n,:,:,q] u.integration_array[i][j,k] = uvps[l].integration_array[m][n,q] u.nsample_array[i][j,k] = uvps[l].integration_array[m][n,q] - + # Labels lbl1 = uvps[l].label_1_array[m,n,q] lbl2 = uvps[l].label_2_array[m,n,q] @@ -1708,7 +1849,7 @@ def combine_uvpspec(uvps, verbose=True): u.lst_2_array[j] = uvps[0].lst_2_array[n] u.lst_avg_array[j] = uvps[0].lst_avg_array[n] u.blpair_array[j] = uvps[0].blpair_array[n] - + # Set baselines u.Nblpairs = len(np.unique(u.blpair_array)) uvp_bls = [uvp.bl_array for uvp in uvps] @@ -1724,10 +1865,10 @@ def combine_uvpspec(uvps, verbose=True): u.Ntimes = len(np.unique(u.time_avg_array)) u.history = reduce(operator.add, [uvp.history for uvp in uvps]) u.labels = np.array(u.labels, np.str) - + for k in static_meta.keys(): setattr(u, k, static_meta[k]) - + # Run check to make sure the new UVPSpec object is valid u.check() return u @@ -1745,10 +1886,10 @@ def get_uvp_overlap(uvps, just_meta=True, verbose=True): an error is raised. - All uvpspec objects must have certain attributes that agree exactly. These + All uvpspec objects must have certain attributes that agree exactly. These include: - 'channel_width', 'telescope_location', 'weighting', 'OmegaP', - 'beam_freqs', 'OmegaPP', 'beamfile', 'norm', 'taper', 'vis_units', + 'channel_width', 'telescope_location', 'weighting', 'OmegaP', + 'beam_freqs', 'OmegaPP', 'beamfile', 'norm', 'taper', 'vis_units', 'norm_units', 'folded', 'cosmo', 'scalar' Parameters @@ -1767,17 +1908,17 @@ def get_uvp_overlap(uvps, just_meta=True, verbose=True): ------- uvps : list - List of input UVPSpec objects + List of input UVPSpec objects concat_ax : str Data axis ['spw', 'blpairts', 'pols'] across data can be concatenated unique_spws : list - List of unique spectral window tuples (spw_freq_start, spw_freq_end, + List of unique spectral window tuples (spw_freq_start, spw_freq_end, spw_Nfreqs) across all input uvps unique_blpts : list - List of unique baseline-pair-time tuples (blpair_integer, + List of unique baseline-pair-time tuples (blpair_integer, time_avg_array JD float) across all input uvps unique_pols : list @@ -1801,8 +1942,8 @@ def get_uvp_overlap(uvps, just_meta=True, verbose=True): # ensure static metadata agree between all objects - static_meta = ['channel_width', 'telescope_location', 'weighting', - 'OmegaP', 'beam_freqs', 'OmegaPP', 'beamfile', 'norm', + static_meta = ['channel_width', 'telescope_location', 'weighting', + 'OmegaP', 'beam_freqs', 'OmegaPP', 'beamfile', 'norm', 'taper', 'vis_units', 'norm_units', 'folded', 'cosmo'] for m in static_meta: for u in uvps[1:]: @@ -1813,9 +1954,9 @@ def get_uvp_overlap(uvps, just_meta=True, verbose=True): assert not hasattr(uvps[0], m) and not hasattr(u, m), \ "Cannot concatenate UVPSpec objs: not all agree on '{}' attribute".format(m) - static_meta = odict([(k, getattr(uvps[0], k, None)) for k in static_meta + static_meta = odict([(k, getattr(uvps[0], k, None)) for k in static_meta if getattr(uvps[0], k, None) is not None]) - + # create unique data axis lists unique_spws = [] unique_blpts = [] @@ -1827,7 +1968,7 @@ def get_uvp_overlap(uvps, just_meta=True, verbose=True): for uvp1 in uvps: for s in uvp1.get_spw_ranges(): if s not in unique_spws: unique_spws.append(s) - for p in uvp1.pol_array: + for p in uvp1.pol_array: if p not in unique_pols: unique_pols.append(p) uvp1_blpts = zip(uvp1.blpair_array, uvp1.time_avg_array) @@ -1892,6 +2033,3 @@ def get_uvp_overlap(uvps, just_meta=True, verbose=True): "scalar_array must be the same for all uvps given concatenation along blpairts." return uvps, concat_ax, unique_spws, unique_blpts, unique_pols, static_meta - - - diff --git a/hera_pspec/uvpspec_utils.py b/hera_pspec/uvpspec_utils.py index adcc03db..c9758f36 100644 --- a/hera_pspec/uvpspec_utils.py +++ b/hera_pspec/uvpspec_utils.py @@ -158,6 +158,7 @@ def _select(uvp, spws=None, bls=None, only_pairs_in_bls=False, blpairs=None, tim nsmp = odict() cov = odict() store_cov = hasattr(uvp, 'cov_array') + for s in np.unique(uvp.spw_array): if h5file is not None: data[s] = h5file['data_spw{}'.format(s)][blp_select, :, pol_select] @@ -167,23 +168,48 @@ def _select(uvp, spws=None, bls=None, only_pairs_in_bls=False, blpairs=None, tim if store_cov: cov[s] = h5file['cov_spw{}'.format(s)][blp_select, :, :, pol_select] else: + if store_cov: + cov[s] = uvp.cov_array[s][blp_select, :, :, pol_select] data[s] = uvp.data_array[s][blp_select, :, pol_select] wgts[s] = uvp.wgt_array[s][blp_select, :, :, pol_select] ints[s] = uvp.integration_array[s][blp_select, pol_select] nsmp[s] = uvp.nsample_array[s][blp_select, pol_select] - if store_cov: - cov[s] = uvp.cov_array[s][blp_select, :, :, pol_select] + + + # Prepare to read stats arrays + stats = odict() + + # If h5file, read in stats data + if h5file is not None: + statnames = [f[f.find("_")+1: f.rfind("_")] for f in h5file.keys() + if f.startswith("stats")] + for sts in np.unique(np.array(statnames)): + stats[sts] = odict() + for s in np.unique(uvp.spw_array): + stats[sts][s] = h5file["stats_{}_{}".format(sts, s)][blp_select, :, pol_select] + if len(statnames) is not 0: + uvp.stats_array = stats + + # Otherwise, keep whatever uvp already has + elif hasattr(uvp, "stats_array"): + for k in uvp.stats_array.keys(): + stats[k] = odict() + for s in np.unique(uvp.spw_array): + stats[k][s] = uvp.stats_array[k][s][blp_select, :, pol_select] + uvp.stats_array = stats uvp.data_array = data uvp.wgt_array = wgts uvp.integration_array = ints uvp.nsample_array = nsmp - if store_cov: - uvp.cov_array = cov - except AttributeError: - # if no h5file fed and hasattr(uvp, data_array) is False then just load meta-data - pass + # Check for covariance array + if store_cov: uvp.cov_array = cov + + except AttributeError as e: + # If no h5file fed and hasattr(uvp, data_array) is False then just + # load meta-data + pass def _blpair_to_antnums(blpair): """ @@ -375,29 +401,29 @@ def _conj_blpair(blpair, which='both'): def _fast_is_in(src_blpts, query_blpts, time_prec=8): """ - Helper function to rapidly check if a given blpair-time couplet is in an + Helper function to rapidly check if a given blpair-time couplet is in an array. - + Parameters ---------- src_blpts : list of tuples or array_like - List of tuples or array of shape (N, 2), containing a list of (blpair, + List of tuples or array of shape (N, 2), containing a list of (blpair, time) couplets. - + query_blpts : list of tuples or array_like - List of tuples or array of shape (M, 2), containing a list of (blpair, + List of tuples or array of shape (M, 2), containing a list of (blpair, time) which will be looked up in src_blpts - + time_prec : int, optional - Number of decimals to round time array to when performing float + Number of decimals to round time array to when performing float comparision. Default: 8. - + Returns ------- is_in_arr: list of bools A list of booleans, which indicate which query_blpts are in src_blpts. """ - # This function converts baseline-pair-times, a tuple (blp, time) + # This function converts baseline-pair-times, a tuple (blp, time) # to a complex number blp + 1j * time, so that "in" function is much # faster. src_blpts = np.asarray(src_blpts) @@ -413,42 +439,40 @@ def _fast_is_in(src_blpts, query_blpts, time_prec=8): def _fast_lookup_blpairts(src_blpts, query_blpts, time_prec=8): """ - Helper function to allow fast lookups of array indices for large arrays of + Helper function to allow fast lookups of array indices for large arrays of blpair-time tuples. - + Parameters ---------- src_blpts : list of tuples or array_like - List of tuples or array of shape (N, 2), containing a list of (blpair, + List of tuples or array of shape (N, 2), containing a list of (blpair, time) couplets. - + query_blpts : list of tuples or array_like - List of tuples or array of shape (M, 2), containing a list of (blpair, + List of tuples or array of shape (M, 2), containing a list of (blpair, time) couplets that you want to find the indices of in source_blpts. - + time_prec : int, optional - Number of decimals to round time array to when performing float + Number of decimals to round time array to when performing float comparision. Default: 8. - + Returns ------- blpts_idxs : array_like - Array of integers of size (M,), which are indices in the source_blpts + Array of integers of size (M,), which are indices in the source_blpts array for each item in query_blpts. """ - # This function works by using a small hack -- the blpair-times are turned - # into complex numbers of the form (blpair + 1.j*time), allowing numpy + # This function works by using a small hack -- the blpair-times are turned + # into complex numbers of the form (blpair + 1.j*time), allowing numpy # array lookup functions to be used src_blpts = np.asarray(src_blpts) query_blpts = np.asarray(query_blpts) src_blpts = src_blpts[:,0] + 1.j*np.around(src_blpts[:,1], time_prec) query_blpts = query_blpts[:,0] + 1.j*np.around(query_blpts[:,1], time_prec) # Applies rounding to time values to ensure reliable float comparisons - + # Do np.where comparison for all new_blpts # (indices stored in second array returned by np.where) blpts_idxs = np.where(src_blpts == query_blpts[:,np.newaxis])[1] - - return blpts_idxs - + return blpts_idxs