# Goal: Calibrate a File as Fast as Possible
The goal of this notebook is to demonstrate techniques for quickly getting as much diagnostic information about a file as possible. This involves running a full suite of calibration, using shortcuts where possible to converge quickly to absolute calibration.

Current calibration shortcuts include:
- using autocorrelations for first-pass relative gain calibration
- using an empirically determined scaling applied to autocorrelations to approximate absolute gain calibration
- using empirically determined RFI station headings to approximate absolute phase calibration
- using DPSS filters to derive RFI flagging from autocorrelations

Important speed-ups come from:
- general I/O speed-ups from hera_cal.io.read_hera_hdf5 (~20x)
- speed-ups to DPSS fitting from hera_filters (~100x)
- speed-up of firstcal by using RFI channels and fewer channels (~10x)
- obtaining sufficient firstcal accuracy to skip logcal (saves 15s)
- capping Omnical iterations at 100 (~20x) relative to 10,000 used in current pipeline. Obtains equivalent $\chi^2$.

With plotting turned off, current notebook runs in ~60s.

In [1]:
import glob
PLOT = False
#%matplotlib notebook
%matplotlib inline
filenames = glob.glob('/lustre/aoc/projects/hera/aparsons/2459114/zen.2459114.6*.sum.uvh5')[100:101]
#filenames = glob.glob('/lustre/aoc/projects/hera/erath/2459639/zen.2459639.45*.sum.uvh5')[:1]

In [2]:
import numpy as np
import hera_cal
from hera_cal.utils import split_bl, join_bl
import uvtools
import matplotlib.pyplot as plt
import time
import hera_filters
import linsolve
from copy import deepcopy
_ = np.seterr(all='ignore')  # get rid of red warnings

In [3]:
class Timer:
    '''Keep track of run-time through various stages and print nicely
    formatted deltas.'''
    order = []
    def clock(self, name):
        self.order.append((name, time.time()))
    def __str__(self):
        t_full = '%5.2f s' % (self.order[-1][-1] - self.order[0][-1])
        s = f'{self.order[0][0]}->{self.order[-1][0]}: {t_full}'
        if len(self.order) <= 2:
            return s
        t_last = '%5.2f s' % (self.order[-1][-1] - self.order[-2][-1])
        return s + f', {self.order[-2][0]}->{self.order[-1][0]}: {t_last}'

In [4]:
timer = Timer()
timer.clock('start')

In [5]:
# Pick an input file and get header information
print('FILE:', filenames)
hc = hera_cal.io.HERADataFastReader(filenames)
_ = hc.read(read_data=False, read_flags=False, read_nsamples=False)

print('NANTS:', len(hc.data_ants))
print('NFREQS:', len(hc.freqs), (hc.freqs[0], hc.freqs[-1]))
print('NTIMES:', len(hc.times), (hc.times[0], hc.times[-1]))
print('LSTS:', (hc.lsts[0], hc.lsts[-1]))
print('NPOLS:', len(hc.pols), hc.pols)

inttime = 24 * 3600 * np.median(np.diff(hc.times))  # XXX get directly
chan_res = np.median(np.diff(hc.freqs))  # XXX get directly
intcnt = int(inttime * chan_res)  # number of samples per integration in correlator

FILE: ['/lustre/aoc/projects/hera/aparsons/2459114/zen.2459114.65411.sum.uvh5']
NANTS: 104
NFREQS: 1536 (46920776.3671875, 234298706.0546875)
NTIMES: 2 (2459114.654052777, 2459114.6541646253)
LSTS: (1.367506215348899, 1.3682109019040458)
NPOLS: 4 ['nn', 'ee', 'ne', 'en']


# Check Autocorrelation Levels

In [6]:
# Read autocorrelations from the file, indexing by (antenna, pol) rather than baseline
auto_pols = ['ee', 'nn']
auto_bls = [(i, i, pol) for i in hc.data_ants for pol in hc.pols if pol in auto_pols]
autos = hc.read(bls=auto_bls, read_data=True, read_flags=False, read_nsamples=False)[0]
autos = {split_bl(k)[0]: v for k, v in autos.items()}  # index by ant, not bl
antpos = {k: pos for k, pos in hc.antpos.items() if k in hc.data_ants}

## Sort antennas based on autocorrelation spectra

Cuts are made on:
- absolute power range, dividing out by correlator accumulation to get the 4b real/4b imag RMS levels. Should nominally be ~10 for well-trimmed RF inputs
- spectral slope across band, computed by medians on either side of the center frequency. Deviations from flatness are signs of antennas not seeing sky emission
- RFI occupancy. Positive outliers from a (-0.5, 1, 0.5) convolving kernel are flagged for being above the specified fraction of the mean

In [7]:
def within(val, bounds):
    return bounds[0] <= val <= bounds[1]

class Bounds:
    '''Sort antennas into good/suspect/bad categories based on bounds.'''
    
    def __init__(self, absolute, good):
        self.abs_bound = absolute
        self.good_bound = good
        self.bad = set()
        self.suspect = set()
        self.good = set()
        
    def classify(self, k, val):
        '''Assign k to internal sets of good/suspect/bad based on value.'''
        if not within(val, self.abs_bound):
            self.bad.add(k)
        elif not within(val, self.good_bound):
            self.suspect.add(k)
        else:
            self.good.add(k)

def _antenna_str(ants):
    '''Turn a set of (ant, pol) keys into a string.'''
    return ','.join(['%d%s' % (ant[0], ant[1][-1]) for ant in sorted(ants)])

class AntennaClassification:
    '''Injests Bounds to create sets of good/suspect/bad antennas.'''
    
    def __init__(self, *bounds_list):
        self.clear()
        for b in bounds_list:
            self.add_bounds(b)
            
    def clear(self):
        '''Clear good/suspect/bad sets.'''
        self.bad = set()
        self.suspect = set()
        self.good = set()
        
    def add_bounds(self, bound):
        '''Add antennas from Bounds to good/suspect/bad sets and remove
        intersections from superior categories.'''
        self.bad.update(bound.bad)
        self.suspect.update(bound.suspect)
        self.good.update(bound.good)
        self.good.difference_update(self.bad)  # remove bad from good
        self.good.difference_update(self.suspect)  # remove suspect from good
        self.suspect.difference_update(self.bad)  # remove bad from suspect

    def __str__(self):
        s = []
        s.append(f'Good: {_antenna_str(self.good)}')
        s.append(f'Suspect: {_antenna_str(self.suspect)}')
        s.append(f'Bad: {_antenna_str(self.bad)}')
        return '\n\n'.join(s)
    
    def is_good(self, k):
        return k in self.good
    
    def is_bad(self, k):
        return k in self.bad

In [8]:
# First-pass antenna classification based on auto levels

CEN_FREQ = 136e6  # Hz
RFI_THRESH = 1e-2  # fraction of mean

pwr_bound = Bounds(absolute=(1, 50), good=(5, 20))
slope_bound = Bounds(absolute=(-0.2, 0.2), good=(-0.12, 0.12))
rfi_bound = Bounds(absolute=(0, 0.15), good=(0, 0.1))

for k, v in autos.items():
    mean = np.mean(v, axis=0) / intcnt
    hi_pwr = np.median(mean[hc.freqs > CEN_FREQ])
    lo_pwr = np.median(mean[hc.freqs <= CEN_FREQ])
    pwr = 0.5 * (hi_pwr + lo_pwr)
    slope = (hi_pwr - lo_pwr) / pwr
    rfi = np.abs(mean[1:-1] - 0.5 * (mean[:-2] + mean[2:])) / mean[1:-1]
    rfi_frac = np.mean(np.where(rfi > RFI_THRESH, 1, 0))
    pwr_bound.classify(k, pwr)
    slope_bound.classify(k, slope)
    rfi_bound.classify(k, rfi_frac)

ant_class = AntennaClassification(pwr_bound, slope_bound, rfi_bound)
print(ant_class)

Good: 45n,73n,85n,88e,88n,98n,100e,100n,101e,102e,103e,104n,107n,108n,118n,124e,135e,136e,140e,142e,143e,144e,145e,156e,156n,157e,157n,163e,164e,164n,165e,165n,166n,178e,179e,179n,183e,184n,185n,187n

Suspect: 0e,1e,1n,2e,11e,11n,13e,13n,14e,14n,24e,24n,25e,26e,26n,39e,39n,44e,44n,46e,46n,58e,58n,59n,73e,75e,82e,82n,83e,83n,85e,86e,87e,87n,89n,91e,91n,92e,92n,94e,94n,98e,99e,99n,102n,103n,104e,105e,105n,107e,108e,109e,109n,111e,111n,112n,117e,117n,118e,120e,120n,122n,124n,127e,127n,128e,128n,129e,129n,130e,130n,135n,136n,140n,141e,141n,143n,144n,158e,160e,160n,161e,162e,162n,163n,166e,176e,176n,177e,177n,178n,181e,181n,182n,183n,185e,186e,186n,187e

Bad: 0n,2n,12e,12n,23e,23n,25n,36e,36n,37e,37n,38e,38n,45e,50e,50n,51e,51n,52e,52n,53e,53n,59e,65e,65n,66e,66n,67e,67n,68e,68n,75n,81e,81n,84e,84n,86n,89e,90e,90n,93e,93n,101n,110e,110n,112e,116e,116n,119e,119n,121e,121n,122e,123e,123n,137e,137n,138e,138n,142n,145n,155e,155n,158n,161n,180e,180n,182e,184e


# Determine RFI Flagging

In [9]:
# First-pass RFI flagging done with channel differencing

SIG_THRESH = 20
ARRAY_FLAG_THRESH = 0.05
        
rfi_wgts = {}

for k, v in autos.items():
    if not ant_class.is_good((k[0], 'Jee')) or not ant_class.is_good((k[0], 'Jnn')):
        continue
    sig = v / np.sqrt(intcnt / 2)  # factor of 2 for autos
    w = np.ones(v.shape)

    # A priori RFI flags
    w[:, np.logical_and(222e6 < hc.freqs, hc.freqs < 224e6)] = 0

    # First pass: difference with average on either side and flag positive outliers
    for width in (1, 2, 4):
        ker = np.ones(2 * width + 1)
        wv = v * w
        f_res = np.zeros_like(v)
        for t in range(v.shape[0]):
            d1 = np.convolve(wv[t], ker, mode='valid') - wv[t, width:-width]
            w1 = np.convolve( w[t], ker, mode='valid') -  w[t, width:-width]
            f_res[t, width:-width] = wv[t, width:-width] - d1 / w1.clip(1, np.Inf)
            ker_std = np.sqrt(1**2 + 1 / (2 * width))
            w[t, width:-width] = np.where(f_res[t, width:-width] > sig[t, width:-width] * ker_std * SIG_THRESH, 0, w[t, width:-width])

    rfi_wgts[k] = w

flags = sum([1 - v for k, v in rfi_wgts.items()]) / len(rfi_wgts)
data_wgts = np.where(flags > ARRAY_FLAG_THRESH, 0, 1)

In [10]:
# Second-pass RFI flagging done with DPSS filters

def dpss_filter(y, amat, fmat):
    '''Apply the provided DPSS filter matrices to data.'''
    model = np.array([amat @ (fmat[i] @ y[i]) for i in range(y.shape[0])])
    return model.real

SIG_THRESH = 4
FILTER_WIDTH = 250e-9
CENTERS = [0, 2700e-9, -2700e-9]

filter_kwargs = {'filter_centers': CENTERS,
                 'filter_half_widths': [FILTER_WIDTH] * len(CENTERS),
                 'eigenval_cutoff': [1e-9] * len(CENTERS)} 

amat, _ = hera_filters.dspec.dpss_operator(hc.freqs, **filter_kwargs)
fmat = np.array([hera_filters.dspec.fit_solution_matrix(np.diag(w), amat) for w in data_wgts])

rfi_wgts = {}

for k, v in autos.items():
    if ant_class.is_bad(k):
        continue
    mdl = dpss_filter(v * data_wgts, amat, fmat)
    sig = mdl / np.sqrt(intcnt / 2)
    rfi_wgts[k] = np.where(v - mdl > sig * SIG_THRESH, 0, 1)

flags = sum([1 - v for k, v in rfi_wgts.items()]) / len(rfi_wgts)

# Array-wide RFI weights
data_wgts = np.where(flags > ARRAY_FLAG_THRESH, 0, 1)

In [11]:
# Plot RFI flags versus frequency
if PLOT:
    plt.figure(figsize=(8,4))
    plt.plot(hc.freqs / 1e6, flags[0], 'c.')
    plt.plot(hc.freqs / 1e6, flags[1], 'm.')
    plt.fill_between(hc.freqs / 1e6, 1-data_wgts[0], color='c', alpha=0.5)
    plt.fill_between(hc.freqs / 1e6, 1-data_wgts[1], color='m', alpha=0.5)

    plt.plot(hc.freqs / 1e6, np.ones(hc.freqs.size) * ARRAY_FLAG_THRESH, 'k:')
    plt.grid()
    plt.ylabel('Antenna Fraction')
    plt.title('RFI Flags')
    _ = plt.xlabel('Frequency [MHz]')

In [12]:
# Second-pass antenna classification based on RFI

RFI2_SIG_THRESH = 3  # sigma threshold for flagging RFI
rfi2_bound = Bounds(absolute=(-np.Inf, 5), good=(-np.Inf, 1))

amat, _ = hera_filters.dspec.dpss_operator(hc.freqs, **filter_kwargs)
fmat = np.array([hera_filters.dspec.fit_solution_matrix(np.diag(w), amat) for w in data_wgts])

smooth_mdl = {}
rfi_wgts = {}

for k, v in autos.items():
    if ant_class.is_bad(k):
        continue
    smooth_mdl[k] = dpss_filter(v * data_wgts, amat, fmat)
    sig = smooth_mdl[k] / np.sqrt(intcnt / 2)
    rfi_wgts[k] = np.where(v - smooth_mdl[k] > sig * RFI2_SIG_THRESH, 0, 1)

flag_frac = {k: np.sum((1 - v) * data_wgts) for k, v in rfi_wgts.items()}
ff = np.array(list(flag_frac.values()))
ff_median = np.median(ff)
ff_std = np.median(np.abs(ff - ff_median)) / 0.675
    
for k, v in autos.items():
    if ant_class.is_bad(k):
        rfi2_bound.bad.add(k)
    else:
        zscore = (flag_frac[k] - ff_median) / ff_std
        rfi2_bound.classify(k, zscore)

ant_class = AntennaClassification(pwr_bound, slope_bound, rfi_bound, rfi2_bound)
print(ant_class)

Good: 45n,73n,85n,88n,98n,100e,101e,103e,104n,118n,135e,136e,140e,142e,143e,144e,145e,156e,156n,157e,157n,163e,164e,164n,165e,165n,166n,178e,179e,179n,183e,184n,185n,187n

Suspect: 0e,2e,11e,11n,13n,14n,24e,24n,25e,39e,39n,44e,44n,46e,46n,58e,59n,75e,82e,82n,83e,83n,85e,86e,87e,87n,88e,89n,91e,91n,92e,92n,94e,94n,98e,99e,99n,100n,102e,102n,103n,104e,105e,105n,107e,107n,108e,108n,109e,109n,111e,111n,112n,117e,117n,118e,120e,120n,122n,124e,124n,127e,127n,128e,128n,129e,129n,130e,130n,135n,136n,140n,141e,141n,143n,144n,158e,160e,160n,161e,162e,162n,163n,166e,176e,176n,177e,177n,178n,181e,181n,182n,183n,185e,186e,186n,187e

Bad: 0n,1e,1n,2n,12e,12n,13e,14e,23e,23n,25n,26e,26n,36e,36n,37e,37n,38e,38n,45e,50e,50n,51e,51n,52e,52n,53e,53n,58n,59e,65e,65n,66e,66n,67e,67n,68e,68n,73e,75n,81e,81n,84e,84n,86n,89e,90e,90n,93e,93n,101n,110e,110n,112e,116e,116n,119e,119n,121e,121n,122e,123e,123n,137e,137n,138e,138n,142n,145n,155e,155n,158n,161n,180e,180n,182e,184e


In [13]:
# Plot RMS residuals of good high-passed autos versus frequency
if PLOT:
    plot_pols = ['Jee', 'Jnn']
    fig, axes = plt.subplots(figsize=(8,6), ncols=1, nrows=len(plot_pols), sharex=True)

    for k, v in autos.items():
        if ant_class.is_bad(k):
            continue
        ax = axes[plot_pols.index(k[-1])]
        mdl = smooth_mdl[k][0]
        sig = mdl / np.sqrt(intcnt / 2)
        residual = (v[0] - mdl) / sig
        mask = np.where(data_wgts[0])
        ax.plot(hc.freqs[mask] / 1e6, residual[mask], 'k', label=str(k[0]), alpha=0.1)

    for cnt, pol in enumerate(plot_pols):
        ax = axes[plot_pols.index(pol)]
        ax.set_title(f'Good Antennas, Polarization: {pol}')
        ax.set_ylabel('Z Score')
        ax.set_ylim(-5, 5)
    _ = axes[-1].set_xlabel('Frequency [MHz]')

In [14]:
# Plot antenna positions with good and bad antennas highlighted
if PLOT:
    fig, axes = plt.subplots(figsize=(7,14), ncols=1, nrows=2)
    for cnt, pol in enumerate(('Jee', 'Jnn')):
        plt.sca(axes[cnt])
        ex_ants = [k[0] for k in ant_class.bad if k[-1] == pol]
        hl_ants = [k[0] for k in ant_class.good if k[-1] == pol]
        uvtools.plot.plot_antpos(antpos, ex_ants=ex_ants, hl_ants=hl_ants)
        plt.title(f'Polarization: {pol}')

# Estimate Absolute Amplitude from Autocorrelations

In [15]:
# First estimate of antenna gains from autos

# polynomial fit to x=log10(freq) y=log10(abscal_gain / auto) for H4C
abscal_loglog_poly = np.array([159.60511509346617, -6411.680706063783, 102993.61331972879, -826937.7537351248, 3318643.7541476665, -5325564.542530925])
auto_scalar = 10**np.polyval(abscal_loglog_poly, np.log10(hc.freqs))
inpainted_autos = {k: data_wgts * v + (1 - data_wgts) * smooth_mdl[k] for k, v in smooth_mdl.items()}
noise_mdl = {k: v / np.sqrt(intcnt / 2) for k, v in smooth_mdl.items()}
#mean_pwr = np.mean([v for k, v in inpainted_autos.items() if k in good_ants], axis=0)
#auto_gains = {k: np.sqrt(auto_scalar * v / mean_pwr) for k, v in inpainted_autos.items()}
auto_gains = {k: np.sqrt(auto_scalar * v) for k, v in inpainted_autos.items()}

In [16]:
# Plot shape of in-painted autos versus frequency
if PLOT:
    plot_pols = ['Jee', 'Jnn']
    fig, axes = plt.subplots(figsize=(8,6), ncols=1, nrows=len(plot_pols), sharex=True)

    for k, v in inpainted_autos.items():
        if ant_class.is_good(k):
            continue
        ax = axes[plot_pols.index(k[-1])]
        ax.plot(hc.freqs / 1e6, np.mean(v, axis=0) / np.mean(v), label=str(k[0]))

    for cnt, pol in enumerate(plot_pols):
        ax = axes[plot_pols.index(pol)]
        ax.set_title(f'Polarization: {pol}')
        ax.set_ylabel('Calibrated Power')
        ax.grid()
    _ = axes[-1].set_xlabel('Frequency [MHz]')

In [17]:
timer.clock('auto_flags')
print(timer)

start->auto_flags:  9.10


# Firstcal Delays from Stable RFI Transmitters

In [18]:
data = hc.read(read_data=True, read_flags=False, read_nsamples=False)[0]

In [19]:
# Dict of FM Radio Headings, ch: (fq, ang, chisq)
phs_sol = {
     #359: ( 90744018.5546875, 0.785398, 23.39),
     360: ( 90866088.8671875, 0.785398, 10.85),
     369: ( 91964721.6796875, 0.106814, 34.81),
     386: ( 94039916.9921875, 0.785398, 18.12),
     #391: ( 94650268.5546875, 3.581415, 47.47),
     392: ( 94772338.8671875, 3.587698, 40.78),
     #399: ( 95626831.0546875, 6.063273, 36.57),
     400: ( 95748901.3671875, 6.063273, 24.07),
     441: (100753784.1796875, 0.785398, 21.72),
     447: (101486206.0546875, 3.587698, 43.82),
     #455: (102462768.5546875, 6.063273, 18.87),
     456: (102584838.8671875, 6.063273, 8.811),
     471: (104415893.5546875, 0.785398, 13.39),
     477: (105148315.4296875, 3.587698, 19.82),
     485: (106124877.9296875, 6.063273, 4.041),
    1182: (191207885.7421875, 0.785398, 27.06),
#    1444: (223190307.6171875, 1.426283, 54.68),
#    1445: (223312377.9296875, 2.607521, 52.55),
#    1494: (229293823.2421875, 5.560618, 51.34),
}

chs = np.array(sorted(list(phs_sol.keys())))
ch_wgts = np.where(hc.freqs[chs] > 150e6, 10, 1)  # upwgt high-band station to offset FM overrepresentation
sum_ch_wgts = np.sum(ch_wgts)
lams = 3e8 / hc.freqs[chs]
_angs = np.array([phs_sol[ch][1] for ch in chs])
fm_headings = np.array([np.cos(_angs), np.sin(_angs), np.zeros_like(_angs)])

In [20]:
# Build redundancy lists from antenna position and filter out bad antennas
reds = hera_cal.redcal.get_reds(antpos, pols=['ee','nn'], pol_mode='2pol')
freds = hera_cal.redcal.filter_reds(reds, ex_ants=ant_class.bad)

In [21]:
# Attempt to geometrically phase baselines to RFI channels, and toss out
# baselines that don't phase (a sign of broken cross-correlation)

_phs_bl_bound = Bounds(absolute=(-np.Inf, 0.3), good=(-np.Inf, 0.1))
phs_bound = Bounds(absolute=(-np.Inf, 0.5), good=(-np.Inf, 0.1))
                       
# Because freq sampling is sparse, a brute-force search for best delay is
# both faster and more robust
MAX_DLY = 250  # maximum delay to try, in ns
dlys_try = np.linspace(-MAX_DLY, MAX_DLY, 4 * MAX_DLY + 1) * 1e-9
dlys_try.shape = (-1, 1)
fqs = hc.freqs[chs]
fqs.shape = (1, -1)
phasor = np.exp(-2j * np.pi * fqs * dlys_try)  # brute force RFI phasors by delay

ant_cnt = {}  # counts how many times an antenna appears in baselines
bl_dly = {}  # stores best-fit delay for each baseline
# some antennas phase best with a 180-deg rotated phasor, a sign of swapped dipoles on the feed
ant_swapped = {}  # stores ants if best-fit phasor for a baseline was 180-deg rotated

for grp in freds:
    bl = grp[0]
    # generate predicted geometric phases for each RFI station
    bl_xyz = antpos[bl[1]] - antpos[bl[0]]
    phs = np.exp(-2j * np.pi * np.dot(bl_xyz, fm_headings) / lams)

    for bl in grp:
        a_i, a_j = split_bl(bl)
        d_phs = np.sum(data[bl][:,chs] * phs, axis=0)
        ant_cnt[a_i] = ant_cnt.get(a_i, 0) + 1
        ant_cnt[a_j] = ant_cnt.get(a_j, 0) + 1
        d_phs /= np.abs(d_phs).clip(1, np.Inf)
        _chi = np.sum(ch_wgts * np.abs(d_phs * phasor - 1)**2, axis=1) / sum_ch_wgts
        _chi180 = np.sum(ch_wgts * np.abs(d_phs * phasor + 1)**2, axis=1) / sum_ch_wgts # 180-deg phase offset
        # if swapped 180-deg phasor is best fit, add 
        if _chi180.min() < _chi.min():
            ant_swapped[a_i] = ant_swapped.get(a_i, 0) + 1
            ant_swapped[a_j] = ant_swapped.get(a_j, 0) + 1
            _chi = _chi180
        i = np.argmin(_chi)
        _phs_bl_bound.classify(bl, _chi[i])
        #print(bl, chisq[i], dlys[i, 0] / 1e-9, mx)
        bl_dly[bl] = dlys_try[i, 0]

# Calculate fraction with bad chisq on phasing      
bad_cnt = {}
for bl in _phs_bl_bound.bad:
    for ant in split_bl(bl):
        bad_cnt[ant] = bad_cnt.get(ant, 0) + 1

# Classify antennas based on fraction of baselines that have bad phasing
for ant, cnt in bad_cnt.items():
    bad_frac = cnt / ant_cnt[ant]
    phs_bound.classify(ant, bad_frac)

ant_class = AntennaClassification(pwr_bound, slope_bound, rfi_bound, rfi2_bound, phs_bound)
print(ant_class)

Good: 45n,73n,88n,98n,100e,101e,103e,104n,118n,135e,136e,140e,142e,143e,144e,145e,156e,157e,163e,164e,165e,178e,179e,183e,184n,185n,187n

Suspect: 0e,2e,11e,11n,13n,14n,24e,24n,25e,39e,39n,44e,44n,46e,46n,58e,59n,75e,82e,82n,83e,83n,85e,85n,86e,88e,89n,91e,91n,92e,92n,94e,94n,98e,99e,99n,100n,102e,102n,103n,104e,105e,105n,107e,107n,108e,108n,109e,109n,111e,111n,112n,117e,117n,118e,120e,120n,122n,124e,124n,127e,127n,128e,128n,129e,129n,130e,130n,135n,136n,140n,141e,141n,143n,144n,156n,157n,158e,161e,162e,162n,163n,164n,165n,166e,166n,176e,176n,177e,177n,178n,179n,181e,181n,182n,183n,185e,186e,186n,187e

Bad: 0n,1e,1n,2n,12e,12n,13e,14e,23e,23n,25n,26e,26n,36e,36n,37e,37n,38e,38n,45e,50e,50n,51e,51n,52e,52n,53e,53n,58n,59e,65e,65n,66e,66n,67e,67n,68e,68n,73e,75n,81e,81n,84e,84n,86n,87e,87n,89e,90e,90n,93e,93n,101n,110e,110n,112e,116e,116n,119e,119n,121e,121n,122e,123e,123n,137e,137n,138e,138n,142n,145n,155e,155n,158n,160e,160n,161n,180e,180n,182e,184e


In [22]:
# Identify antennas that have 180-deg dipole rotations; they will be corrected
_swap_bound = Bounds(absolute=(-np.Inf, 0.5), good=(-np.Inf, 0.5))

# Calculate fraction with swapped dipoles
for ant, cnt in ant_swapped.items():
    bad_frac = cnt / ant_cnt[ant]
    _swap_bound.classify(ant, bad_frac)

swap_ants = _swap_bound.bad
swap_ants.difference_update(ant_class.bad)
print(f'Reversed: {_antenna_str(swap_ants)}')

Reversed: 11e,11n,25e,45n,58e,85e,88n,89n,100n,101e,102e,102n,104e,143e,143n,178n


In [23]:
# %%time
# # 2min 35s
# freds = hera_cal.redcal.filter_reds(reds, ex_ants=bad_ants)
# info = hera_cal.redcal.RedundantCalibrator(freds)
# meta0, sol0 = info.firstcal(data, hc.freqs)

In [24]:
# Solve firstcal delays for non-bad antennas

freds = hera_cal.redcal.filter_reds(reds, ex_ants=ant_class.bad)

# Encode equations for non-bad antennas
eqs1 = {}
for bl, dly in bl_dly.items():
    a_i, a_j = split_bl(bl)
    if ant_class.is_bad(a_i) or ant_class.is_bad(a_j):
        continue
    eqs1['dly_%d_%s - dly_%d_%s' % (a_i + a_j)] = dly

ls = linsolve.LinearSolver(eqs1)
_sol1 = ls.solve()

In [25]:
# Fine-tune FM delays after first round of per-ant solutions

freqs = hc.freqs.copy()
freqs.shape = (1, -1)
dlys = {ai: _sol1['dly_%d_%s' % ai] for ai in ant_cnt.keys() if not ant_class.is_bad(ai)}
gains = {ai: np.exp(2j * np.pi * freqs * dly) for ai, dly in dlys.items()}
for ai in swap_ants:
    gains[ai] *= -1

# fine-tuning loop. multiple passes appear unnecessary
for max_dly in (15,):
    dlys_try = np.linspace(-max_dly, max_dly, 20 * max_dly + 1) * 1e-9
    dlys_try.shape = (-1, 1)
    fqs = hc.freqs[chs]
    fqs.shape = (1, -1)
    phasor = np.exp(-2j * np.pi * fqs * dlys_try)  # brute force RFI phasors by delay
    
    _eqs = {}
    for grp in freds:
        bl = grp[0]
        # generate predicted geometric phases for each RFI station
        bl_xyz = antpos[bl[1]] - antpos[bl[0]]
        phs = np.exp(-2j * np.pi * np.dot(bl_xyz, fm_headings) / lams)

        for bl in grp:
            a_i, a_j = split_bl(bl)
            g_ij = gains[a_i][:,chs] * gains[a_j][:,chs].conj()
            d_phs = np.sum(data[bl][:,chs] * phs / g_ij, axis=0)
            d_phs /= np.abs(d_phs).clip(1, np.Inf)
            # gain inversion above for swapped antennas means no need to check 180-deg swap here
            _chi = np.mean(np.abs(d_phs * phasor - 1)**2, axis=1)
            i = np.argmin(_chi)
            _eqs['dly_%d_%s - dly_%d_%s' % (a_i + a_j)] = dlys_try[i, 0]

    ls = linsolve.LinearSolver(_eqs)
    _sol2 = ls.solve()
    dlys = {ai: dly + _sol2['dly_%d_%s' % ai] for ai, dly in dlys.items()}
    gains = {ai: np.exp(2j * np.pi * freqs * dly) for ai, dly in dlys.items()}
    for ai in swap_ants:
        gains[ai] *= -1

# Finalize Firstcal Delays from Sky

In [26]:
# Final polish on delays using full-band (non-RFI) data

CH_STEP = 200  # every nth channel to include in fit
_chs = np.arange(0, hc.freqs.size, CH_STEP)
fqs = hc.freqs[_chs]
fqs.shape = (1, -1)

# fine-tuning loop. multiple passes appear unnecessary
for dly_rng in (15,):
    dlys_try = np.linspace(-dly_rng, dly_rng, 20 * dly_rng + 1) * 1e-9
    dlys_try.shape = (-1, 1)

    phasor = np.exp(-2j * np.pi * fqs * dlys_try)  # brute force phasors by delay

    _eqs = {}
    for grp in freds:
        _chisq = np.Inf
        # pick a representative baseline for a redundant group based on flatness of phase
        # XXX ARP: why is this the right metric?
        for bl in grp:
            a_i, a_j = split_bl(bl)
            g_ij = gains[a_i][:,_chs] * gains[a_j][:,_chs].conj()
            d_phs = np.sum(data[bl][:,_chs] / g_ij, axis=0)
            d_phs /= np.abs(d_phs).clip(1, np.Inf)
            _chi = np.mean(np.abs(d_phs - 1)**2)
            if _chi < _chisq:
                min_bl = bl
                phs = d_phs.conj()
                _chisq = _chi
        # compute phase relative to representative baseline and encode 4-point phase equation
        ma_i, ma_j = split_bl(min_bl)
        for bl in grp:
            a_i, a_j = split_bl(bl)
            g_ij = gains[a_i][:,_chs] * gains[a_j][:,_chs].conj()
            d_phs = np.sum(data[bl][:,_chs] * phs / g_ij, axis=0)
            d_phs /= np.abs(d_phs).clip(1, np.Inf)
            chisq = np.mean(np.abs(d_phs * phasor - 1)**2, axis=1)
            i = np.argmin(chisq)
            _eqs['dly_%d_%s - dly_%d_%s - dly_%d_%s + dly_%d_%s' % (a_i + a_j + ma_i + ma_j)] = dlys_try[i, 0]

    ls = linsolve.LinearSolver(_eqs)
    _sol = ls.solve()
    dlys = {ai: dly + _sol['dly_%d_%s' % ai] for ai, dly in dlys.items()}
    # add auto_gains as first estimate of gain amplitudes
    sol0 = {ai: auto_gains[ai] * np.exp(2j * np.pi * freqs * dly) for ai, dly in dlys.items()}
    for ai in swap_ants:
        sol0[ai] *= -1

In [27]:
# Given final firstcal gain solutions, compute unique baseline solutions

all_bls = set(hera_cal.utils.join_bl(ai, aj) for ai in sol0.keys() for aj in sol0.keys())
info = hera_cal.redcal.RedundantCalibrator(freds)
sol0.update(info.compute_ubls(data, sol0))

In [28]:
# Plot firstcal calibrated visibilities for a redundant group

def plot_red_gp(data, sol, gp, t=0, title=None):
    '''Plot all calibrated visibility data in a redundant group given redcal solutions.'''
    fig, axes = plt.subplots(figsize=(8,6), ncols=1, nrows=2, sharex=True)
    ubl = [bl for bl in gp if bl in sol][0]  # find how this group is indexed in solutions
    mask = np.where(data_wgts[t])
    u = sol[ubl][t]
    for cnt, bl in enumerate(gp):
        a_i, a_j = split_bl(bl)
        g_ij = sol[a_i][t] * sol[a_j][t].conj()
        _dat = data[bl][t]
        axes[0].plot(hc.freqs / 1e6, np.angle(_dat / g_ij), label=str(bl))
        axes[1].semilogy(hc.freqs[mask] / 1e6, np.abs(_dat / g_ij)[mask], label=str(bl))
    axes[0].plot(hc.freqs / 1e6, np.angle(u), 'k', linewidth=3, label=str(ubl))
    axes[1].semilogy(hc.freqs[mask] / 1e6, np.abs(u)[mask], 'k', linewidth=3, label=str(ubl))
    if title is None:
        title = str(ubl)
    axes[0].set_title(title)
    axes[0].set_ylabel('Phase')
    axes[1].set_ylabel('Amplitude')
    axes[1].set_xlabel('Frequency [MHz]')
    axes[1].grid()

if PLOT:
    plot_red_gp(data, sol0, freds[0], title='Firstcal')

In [29]:
timer.clock('firstcal')
print(timer)

start->firstcal: 22.02, auto_flags->firstcal: 12.91


In [30]:
# Skipping logcal as an unnecessary step
# %%time
# roughly 15s
# meta1, sol1 = info.logcal(data, {k: v for k, v in sol0.items() if len(k) == 2})
# sol1 = info.remove_degen(sol1, degen_sol=sol0)

# Omnical

In [31]:
# Establish inverse variance weighting estimated from smoothed autocorrelations

wgts = {}
for bl in all_bls:
    a_i, a_j = split_bl(bl)
    noise = np.sqrt(noise_mdl[a_i] * noise_mdl[a_j])
    wgts[bl] = 1 / (noise / np.sqrt(2))**2 # crosses have 1/2 variance of autos
wgts = hera_cal.io.DataContainer(wgts)

In [32]:
%%time
# Run Omnical

#def wgt_func(abs2):
#    return np.where(abs2 > 0, 5 * np.tanh(abs2 / 5) / abs2, 1)
#meta2, sol2 = info.omnical(use_data, deepcopy(sol1), wgts=use_wgts, conv_crit=1e-10, gain=.4, maxiter=10000,
#                         check_after=500, check_every=100) # standard pipeline, takes 15.5 min
# wgt func reduces sensitivity to outliers; unclear what impact is for preflagged antennas
#meta2, sol2 = info.omnical(use_data, deepcopy(sol1), wgts=use_wgts, conv_crit=1e-5, gain=.4, maxiter=100,
#                         check_after=50, check_every=10, wgt_func=wgt_func)
meta2, sol2 = info.omnical(data, deepcopy(sol0), wgts=wgts, conv_crit=1e-5, gain=.4, maxiter=100,
                         check_after=100, check_every=10) # hardcoded to run 100 iterations w/o checking

CPU times: user 42.1 s, sys: 3.22 s, total: 45.4 s
Wall time: 45.5 s


In [33]:
# Replace degeneracies in omnical solutions with firstcal degeneracies, which
# inherited a nominal absolute calibration from H4C

sol2 = info.remove_degen(sol2, degen_sol=sol0)
# Slow
#vis_sols = {k: v for k, v in sol2.items() if len(k) == 3}
#gain_sols = {k: v for k, v in sol2.items() if len(k) == 2}
#chisq2_pol, chisq2_per_ant = hera_cal.redcal.normalized_chisq(use_data, use_wgts, freds, vis_sols, gain_sols)
#chisq2 = 0.5 * (chisq2_pol['Jee'] + chisq2_pol['Jnn'])

In [34]:
# Plot firstcal calibrated visibilities for a redundant group
if PLOT:
    plot_red_gp(data, sol2, freds[0], title='Omnical')

In [35]:
# Examine how redundant solution for a group changed between firstcal & omnical
if PLOT:
    fig, axes = plt.subplots(figsize=(8,6), ncols=1, nrows=2, sharex=True)
    gp = freds[0]
    ubl = [bl for bl in gp if bl in sol0][0]
    mask = np.where(data_wgts[0])
    for cnt, sol in enumerate((sol0, sol2)):
        u = sol[ubl][0]
        axes[0].plot(hc.freqs / 1e6, np.angle(u), label=f'sol{2*cnt}')
        axes[1].semilogy(hc.freqs[mask] / 1e6, np.abs(u[mask]), label=f'sol{2*cnt}')
    title = str(ubl)
    axes[0].set_title(title)
    axes[0].set_ylabel('Phase')
    axes[0].legend()
    axes[1].set_ylabel('Amplitude')
    axes[1].set_xlabel('Frequency [MHz]')
    axes[1].legend()
    axes[1].grid()

In [36]:
# Plot chisq and # of iterations from omnical
if PLOT:
    plt.figure()
    plt.semilogy(hc.freqs / 1e6, meta2['chisq'][0] / len(all_bls) * 4) # XXX fix this scaling
    plt.semilogy(hc.freqs / 1e6, meta2['iter'][0])
    plt.xlabel('Frequency [MHz]')
    plt.ylabel('$\chi_r^2$')
    plt.ylim(3e-1, 1e3)
    plt.grid()

In [37]:
# if PLOT:
#     plt.figure()
#     for k, _chi in chisq2_per_ant.items():
#         #print(k, np.median(_chi), np.where(_chi > 2)[0].size, np.median(chisq2b_per_ant[k]), np.where(chisq2b_per_ant[k] > 2)[0].size)
#         if np.where(_chi > 2)[0].size / _chi.size > 0.2:
#             continue
#         plt.semilogy(hc.freqs / 1e6, (_chi * data_wgts)[0])

In [38]:
timer.clock('omnical')
print(timer)

start->omnical: 69.20, firstcal->omnical: 47.18


In [39]:
# if PLOT:
#     plt.figure()
#     hist, bins = np.histogram([np.where(_chi > 2)[0].size / _chi.size for _chi in chisq2_per_ant.values()])
#     plt.plot(0.5 * (bins[1:] + bins[:-1]), hist)

In [40]:
# Tack on solutions to "bad" antennas given existing redundant bl solution

gsum = {}
gwgt = {}
sol3 = deepcopy(sol2)

for grp in reds:
    try:
        ubl = [bl for bl in grp if bl in sol3][0]
    except(IndexError):
        continue
    u = sol3[ubl]
    for bl in grp:
        a_i, a_j = split_bl(bl)
        noise = np.sqrt(autos[a_i] * autos[a_j])
        wgt = 1 / (noise / np.sqrt(2))**2 # crosses have 1/2 variance of autos
        if a_i not in sol3:
            if a_j not in sol3:
                continue
            gsum[a_i] = gsum.get(a_i, 0) + data[bl] * (u * sol3[a_j].conj()).conj() * wgt
            gwgt[a_i] = gwgt.get(a_i, 0) + np.abs(u)**2 * np.abs(sol3[a_j])**2 * wgt
        elif a_j not in sol3:
            gsum[a_j] = gsum.get(a_j, 0) + data[bl].conj() * (u.conj() * sol3[a_i].conj()).conj() * wgt
            gwgt[a_j] = gwgt.get(a_j, 0) + np.abs(u)**2 * np.abs(sol3[a_i])**2 * wgt
sol3.update({k: np.nan_to_num(gsum[k] / gwgt[k]) for k in gsum.keys()})

In [41]:
timer.clock('badcal')
print(timer)

start->badcal: 70.33, omnical->badcal:  1.13


In [42]:
# Plot omnical gains versus first-guess gains from auto-correlations
if PLOT:
    plt.figure()
    mask = np.where(data_wgts[0])
    for k, v in auto_gains.items():
        if k not in sol3:
            continue
            
        plt.plot(v[0][mask], np.abs(sol3[k][0][mask]), ',', alpha=0.2)

    plt.grid()
    plt.xlabel('Auto Gain')
    plt.ylabel('Omnical Gain')

In [43]:
# Plot all gains (good and bad)
if PLOT:
    fig, axes = plt.subplots(figsize=(8,6), ncols=1, nrows=2, sharex=True)
    mask = np.where(data_wgts[0])
    for k, gain in sol3.items():
        if len(k) == 3:
            continue
        if k[-1] == 'Jee':
            ax = axes[0]
        else:
            ax = axes[1]
        ax.semilogy(hc.freqs[mask] / 1e6, np.abs(gain[0][mask]), label=str(k))
    axes[0].grid()
    axes[0].set_ylabel('Gain')
    axes[0].set_title('Omnical Gain Solutions')
    axes[1].grid()
    axes[1].set_ylabel('Gain')
    plt.xlabel('Frequency [MHz]')