# Power Spectrum Summary Waterfalls

**by Josh Dillon and Steven Murray**, last updated March 25, 2025

The purpose of this notebook is to pull together results from power spectra from single, redundantly-averaged baselines (typically cross-power spectra from interleaved sets of times) as produced by the [Single Baseline Filtering and Power Spectrum Estimation
notebook](https://github.com/HERA-Team/hera_notebook_templates/blob/master/notebooks/single_baseline_postprocessing_and_pspec.ipynb). 
In this notebook, we consider and display only the non-time-averaged power spectra
produced by that notebook, useful for making broad decisions about which LSTs to keep in the analysis, etc.

The reason that we don't just do the time-averaging within this notebook and then also look at the time-averaged power spectra is because some of the bias corrections applied to the power spectra are interleave-dependent, and in this notebook we read interleave-averaged power spectra.

### [• Figure 1: P(k) Averaged Over Baseline vs. LST](#Figure-1:-P(k)-Averaged-Over-Baseline-vs.-LST)


## Imports and Parameters

In [None]:
import time
tstart = time.time()

In [None]:
import os
os.environ['HDF5_USE_FILE_LOCKING'] = 'FALSE'
import h5py
import hdf5plugin  # REQUIRED to have the compression plugins available
import numpy as np
import glob
import copy
import pandas as pd
import pickle
from hera_cal import io, utils
import hera_pspec as hp
import matplotlib.pyplot as plt
import matplotlib
from pathlib import Path
from scipy import constants
from scipy.signal import windows
from IPython.core.display import display, HTML
%matplotlib inline

In [None]:
# Data settings
PSPEC_FILE: str = '/lustre/aoc/projects/hera/h6c-analysis/IDR2/lstbin-outputs/redavg-smoothcal-inpaint-500ns-lstcal/inpaint/single_baseline_files/baselines_merged.pspec.h5'
FRF_SIGNAL_LOSS_FILE: str = None

# Output Files
CASENAME: str = "all_baselines_interleaved_IDR2.3_500ns_14band"

WEDGE_BUFFER_NS: float = 500

# Analysis Options
MAX_FRF_SIGNAL_LOSS: float = 0.1

In [None]:
# Some simple formatting of the inputs
PSPEC_FILE = Path(PSPEC_FILE)

if FRF_SIGNAL_LOSS_FILE is None:
    FRF_SIGNAL_LOSS_FILE = PSPEC_FILE.parent / PSPEC_FILE.name.replace(".pspec.h5", ".frf_losses.pkl")
else:
    FRF_SIGNAL_LOSS_FILE = Path(FRF_SIGNAL_LOSS_FILE)


## Load Power Spectra



In [None]:
# This takes a bit longer to load.
psc = hp.PSpecContainer(PSPEC_FILE, mode='r', keep_open=False)
uvp = psc.get_pspec('stokespol', 'interleave_averaged')
    
with open(FRF_SIGNAL_LOSS_FILE, 'rb') as f:
    frf_losses = pickle.load(f)

In [None]:
# select pI and pQ (ignore pU and pV, which are experimental)
uvp.select(polpairs=[('pI', 'pI'), ('pQ', 'pQ')])

In [None]:
# dict to map baseline pairs to baseline vectors
blp_to_blvec_dict = dict(zip(uvp.get_blpairs(), uvp.get_blpair_blvecs()))

In [None]:
# get mean redshifts for each spw
zs = np.array([np.mean(1.420405751e9 / uvp.freq_array[uvp.spw_freq_array == spw] - 1) for spw in uvp.spw_array])

## Examine LST structure of the power spectrum

In [None]:
dlst = np.median(np.diff(uvp.lst_avg_array[uvp.key_to_indices((0, ((0, 1), (0, 1)), ('pI', 'pI')))[1]]))
all_lsts = np.unique(uvp.lst_avg_array)
all_lsts[all_lsts * 12 / np.pi > 17.75] -= 2 * np.pi
lst_grid = np.arange(np.min(all_lsts) - dlst, np.max(all_lsts) + dlst, dlst)

In [None]:
# build dictionary mapping an individual baseline's LST range to the wider set of LSTs
blp_to_lst_indices = {}
for key in uvp.get_all_keys():
    if not key[1] in blp_to_lst_indices:
        lsts = uvp.lst_avg_array[uvp.key_to_indices(key)[1]]
        lsts[lsts * 12 / np.pi > 17.75] -= 2 * np.pi    
        blp_to_lst_indices[key[1]] = np.searchsorted(lst_grid, lsts[0]) + np.arange(len(lsts), dtype=int)

In [None]:
wgts_sum = {spw: np.zeros((len(lst_grid), len(uvp.get_dlys(spw)))) for spw in uvp.spw_array}
pI_sum = {spw: np.zeros((len(lst_grid), len(uvp.get_dlys(spw))), dtype=complex) for spw in uvp.spw_array}
pQ_sum = {spw: np.zeros((len(lst_grid), len(uvp.get_dlys(spw))), dtype=complex) for spw in uvp.spw_array}

# average power spectra over baselines
for key in list(uvp.get_all_keys()):
    spw, blp, pp = key
    if pp != ('pI', 'pI'):
        continue

    # Ignore baselines with too great a signal loss
    if frf_losses[blp][spw] > MAX_FRF_SIGNAL_LOSS:
        continue
        
    # inverse variance weight baselines
    wgt = np.abs(uvp.get_stats('P_N', key))**-2
    
    wedge_plus_buffer = np.linalg.norm(np.linalg.norm(blp_to_blvec_dict[key[1]])) / constants.c + WEDGE_BUFFER_NS * 1e-9
    wgt[:, np.abs(uvp.get_dlys(spw)) < wedge_plus_buffer] = 0
    pI_sum[spw][blp_to_lst_indices[blp], :] += uvp.get_data(key) * wgt
    pQ_sum[spw][blp_to_lst_indices[blp], :] += uvp.get_data((spw, blp, ('pQ', 'pQ'))) * wgt
    wgts_sum[spw][blp_to_lst_indices[blp], :] += wgt

# normalize average
for spw in pI_sum:
    pI_sum[spw] /= wgts_sum[spw]
    pQ_sum[spw] /= wgts_sum[spw]

In [None]:
def plot_power_spectrum_avg_vs_LST():
    fig, axes = plt.subplots(len(uvp.spw_array), 2, figsize=(8, len(uvp.spw_array) * 2), sharey=True, sharex=True, gridspec_kw={'wspace': 0, 'hspace': 0})
    
    for spw, ax in enumerate(axes):
        dlys = uvp.get_dlys(spw) * 1e9
        
        im = ax[0].imshow(pI_sum[spw].real, aspect='auto', interpolation='none', cmap='turbo', norm=matplotlib.colors.LogNorm(vmin=1e5, vmax=1e13), 
                          extent=[dlys[0], dlys[-1], lst_grid[-1] * 12 / np.pi, lst_grid[0] * 12 / np.pi])
    
        im = ax[1].imshow(pQ_sum[spw].real, aspect='auto', interpolation='none', cmap='turbo', norm=matplotlib.colors.LogNorm(vmin=1e5, vmax=1e13), 
                          extent=[dlys[0], dlys[-1], lst_grid[-1] * 12 / np.pi, lst_grid[0] * 12 / np.pi])
    
        
        for a in ax:
            a.set_xticks([-3000, -1500, 0, 1500, 3000])
            if spw == len(uvp.spw_array) - 1:
                a.set_xlabel('Delay (ns)')
        ax[0].set_yticks(ax[0].get_yticks())
        ax[0].set_yticklabels(['',] + list(ax[0].get_yticks() % 24)[1:])            
        ax[0].set_ylabel('LST (hours)')
    
        for a, pol in zip(ax, ['pI', 'pQ']):
            a.text(a.get_xlim()[0] + 300, a.get_ylim()[-1] + .4, f'{pol} Band {spw + 1}\nz = {zs[spw]:.1f}', ha='left', va='top',
                     bbox=dict(facecolor='w', edgecolor='black', alpha=.75, boxstyle='round', ls='-'))
            a.tick_params(axis='x', direction='in')
    
    
    plt.tight_layout()
    plt.colorbar(im, ax=axes, pad=.02, aspect=40, extend='both', location='top', label=f'Baselined-Averaged Outside Wedge + {WEDGE_BUFFER_NS} ns\n' + r'Re[$P(k)$] (mK$^2$ $h^{-3}$ Mpc$^3$)')

### Figure 1: P(k) Averaged Over Baseline vs. LST

In [None]:
plot_power_spectrum_avg_vs_LST()

## Metadata

In [None]:
for repo in ['numpy', 'scipy', 'astropy', 'hera_cal', 'hera_qm', 'pandas',
             'hera_filters', 'hera_pspec', 'hera_notebook_templates', 'pyuvdata']:
    exec(f'from {repo} import __version__')
    print(f'{repo}: {__version__}')

In [None]:
print(f'Finished execution in {(time.time() - tstart) / 60:.2f} minutes.')