In [1]:
from __future__ import print_function, division, absolute_import
import numpy as np
from astropy import constants as const
from astropy.coordinates import Angle
from astropy import units

This notebook investigates the savings that can be had when using baseline-dependent averaging (BDA). We are assuming that the proposed layout for the full HERA-350 array is fixed, as well as the correlator having the properties required for adequate imaging. Specifically, we are assuming a fringe-stopped correlator with a pre-fringe-stopped integration time of 0.1 seconds, and a post-fringe-stopped integration time of 10 seconds. We are also assuming that the magic "allowable" amount of decorrelation for a given baseline is 10% for a 10 degree field of view (FoV). We also assume that there are 8192 channels of output, but note that this choice does not significantly affect the possible savings versus 4096 (so in effect, the only effective change is a doubling of the base data rate).

In [2]:
# define correlator properties
max_decorr = 0.1
frequency = (250 * 1e6 * units.Hz)
wavelength = const.c / frequency.to(1/units.s)
earth_rot_speed = (Angle(360, units.deg) / units.sday).to(units.arcminute/units.s)
corr_FoV_min = Angle(10., units.degree)
corr_FoV_max = Angle(90., units.degree)
hera_latitude = Angle('-30:43:17.5', units.deg)
corr_int_time = 0.1 * units.s
corr_post_fs_int_time = 10. * units.s
n_channels = 8192
corr_chan_width = (250 * units.MHz) / n_channels

Define helper functions:

In [3]:
def bl_resolution(ew_bl_len):
    # baseline E-W angular resolution
    return Angle(np.arcsin(min(1, wavelength/(ew_bl_len * units.m))), units.radian).to(units.arcminute)

def decorr_int_time(ew_bl_len):
    # decorrelation due to pre-fringe-stopped integration time
    bl_res = bl_resolution(ew_bl_len)
    dit = corr_int_time * earth_rot_speed / bl_res.to(units.arcminute)
    return dit.value

def decorr_chan_width(ew_bl_len):
    # decorrelation due to channel width
    dcw = (corr_chan_width.to(1/units.s) * ew_bl_len * units.m
           * np.sin(corr_FoV_min.to(units.rad)) / const.c)
    return dcw.value

def decorr_fringe_stop(ew_bl_len, fs_int_time):
    # decorrelation due to averaging for a given post-fringe-stopped time
    bl_res = bl_resolution(ew_bl_len)
    dfs = (fs_int_time * np.sin(corr_FoV_min.radian) * earth_rot_speed
           * np.abs(np.sin(hera_latitude)) / bl_res.to(units.arcminute))
    return dfs.value

def fs_int_time(ew_bl_len, decorr_val):
    # fringe-stopped integration time for a given E-W separation and decorrelation value
    bl_res = bl_resolution(ew_bl_len)
    int_time = (decorr_val * bl_res.to(units.arcminute) / np.sin(corr_FoV_min.radian)
                / earth_rot_speed / np.abs(np.sin(hera_latitude)))
    return int_time.to(units.s).value

def decorr_pre_fs(ew_bl_len):
    # decorrelation from pre-fringe-stopped considerations (integration time + channel width)
    return 1 - (1 - decorr_int_time(ew_bl_len)) * (1 - decorr_chan_width(ew_bl_len))

def decorr_total(ew_bl_len, fs_int_time):
    # pre-fringe-stopped + post-fringe-stopped decorrelation, given a post-fringe-stopped integration time
    return 1 - (1 - decorr_pre_fs(ew_bl_len)) * (1 - decorr_fringe_stop(ew_bl_len, fs_int_time))

def max_int_time(ew_bl_len, max_decorr):
    # Compute the maximum post-fring-stopped integration time for a given E-W baseline
    # length, in m, and max decorrelation fraction
    # Assumes fringe stopping
    dpf = decorr_pre_fs(ew_bl_len)
    dfs = 1 - (1 - max_decorr)/(1 - dpf)
    int_time = fs_int_time(ew_bl_len, dfs)

    return int_time

Get the file containing the proposed HERA-350 layout

In [4]:
hera_txt = '/Users/plaplant/Documents/school/penn/software/hera_mc/hera_mc/data/HERA_350.txt'
hera_bls = np.genfromtxt(hera_txt)

We now compute the total savings for the array, which is driven by primarily by the E-W component of each baseline. We assume that if a given baseline has less than the maximum decorrelation level, we can continue to average until we reach it. We compute a "perfect compression" factor, which would be achievable by compressing a given baseline by the maximum amount. We also compute a "simple" compression factor, where the averaging is only done in a power-of-two fashion. (That is, we either perform no averaging, or average two consecutive time samples, or four, or eight, etc., while we are still below the maximum amount of decorrelation.) This approach allows for a more straightforward implementation, though obviously does not achieve the maximum savings available.

In [5]:
# compute the savings for each baseline in the array
# XXX: takes ~5 minutes; might be able to be more clever, but easier to shut up and calculate
max_bda_data_rate = 0.
simple_bda_data_rate = 0.

nants = hera_bls.shape[0]
for iant in range(nants):
    # first column is antenna name; second--fourth columns are xyz positions, in meters
    xi = hera_bls[iant, 1]
    yi = hera_bls[iant, 2]
    for jant in range(iant + 1, nants):
        xj = hera_bls[jant, 1]
        yj = hera_bls[jant, 2]

        # To first order, E-W separation is given by delta-x value
        # Prevent against division by zero
        ew_sep = max(np.abs(xj - xi), 1e-1)

        # compute total decorrelation for this baseline length and default correlator setting
        dt = decorr_total(ew_sep, corr_post_fs_int_time)

        if dt < max_decorr:
            # we can theoretically integrate this bl in time until we hit the max_decorr
            new_int_time = max_int_time(ew_sep, max_decorr)
            max_bda_data_rate += corr_post_fs_int_time.value / new_int_time

            # also compute the max power-of-two integration factor
            fac = np.floor(np.log2(new_int_time / corr_post_fs_int_time.value))
            simple_bda_data_rate += 2.**(-fac)
            
            # drive home the point that the default correlator output rates are overkill for most baselines
            if fac == 0:
                print("Maximum baseline separation found")
        else:
            # no savings
            max_bda_data_rate += 1
            simple_bda_data_rate += 1

# add factor for autos; assume no compression
# note that it doesn't really matter, since these are < 1% of baselines
max_bda_data_rate += nants
simple_bda_data_rate += nants

# normalize by the number of baselines
nbls = (nants * (nants + 1)) / 2
max_bda_data_rate /= nbls
simple_bda_data_rate /= nbls

Maximum baseline separation found
Maximum baseline separation found
Maximum baseline separation found
Maximum baseline separation found
Maximum baseline separation found


In [6]:
print("Theoretic maximum baseline-dependent averaging savings for HERA-350 array: ")
print(max_bda_data_rate, "\n")
print("Power-of-2 savings: ")
print(simple_bda_data_rate)

Theoretic maximum baseline-dependent averaging savings for HERA-350 array: 
0.0659164291229 

Power-of-2 savings: 
0.0929365079365


Given the data rate above, we can compare the "default" data rate of the proposed correlator, and the "actual" one that includes the effect of BDA. For this calculation, we take the value corresponding to the "simple" compression scheme, which represents a more realistic value that can be achieved. The calculation above can be re-run for a different number of channels, but the compression savings are virtually the same. Thus, to convert the difference in data rate between, e.g., 8192 channels and 4096 channels, divide the rates and volumes below by a factor of 2 (or vice-versa).

In [7]:
# compute data rate for season
channels_to_keep = n_channels * 3. / 4.
sum_diff_factor = 2
bytes_per_vis = 8 * units.byte
n_polarizations = 4
obs_hrs_per_day = 12 * units.hour / units.day
days_per_season = 120 * units.day

naive_data_rate = (channels_to_keep * nbls * n_polarizations * bytes_per_vis
                   * sum_diff_factor / corr_post_fs_int_time)
naive_data_vol = naive_data_rate * obs_hrs_per_day * days_per_season
bda_data_rate = simle_bda_data_rate * naive_data_rate
bda_data_vol = bda_data_rate * obs_hrs_per_day * days_per_season
print("Naive data rate:      ", naive_data_rate.to(units.Gbyte/units.s))
print("Naive season volume:  ", naive_data_vol.to(units.Pbyte), "\n")
print("BDA data rate:        ", bda_data_rate.to(units.Gbyte/units.s))
print("BDA season volume:    ", bda_data_vol.to(units.Pbyte))

Naive data rate:       2.41532928 Gbyte / s
Naive season volume:   12.5210669875 Pbyte 

BDA data rate:         0.2244722688 Gbyte / s
BDA season volume:     1.16366424146 Pbyte
