In [1]:
%load_ext autoreload
%autoreload 2
from astropy.table import Table
import numpy as np
import tqdm

# Verify that old and new normalizations are equivalent


Old ones:

In [2]:
def normalize_crossspectrum_oldstingray(unnorm_power, tseg, nbins, nphots1, nphots2, norm="none", power_type="real"):
    """
    Normalize the real part of the cross spectrum to Leahy, absolute rms^2,
    fractional rms^2 normalization, or not at all.

    Parameters
    ----------
    unnorm_power: numpy.ndarray
        The unnormalized cross spectrum.

    tseg: int
        The length of the Fourier segment, in seconds.

    nbins : int
        Number of bins in the light curve

    nphots1 : int
        Number of photons in the light curve no. 1

    nphots2 : int
        Number of photons in the light curve no. 2

    Other parameters
    ----------------
    norm : str
        One of `'leahy'` (Leahy+83), `'frac'` (fractional rms), `'abs'`
        (absolute rms)

    power_type : str
        One of `'real'` (real part), `'all'` (all complex powers), `'abs'`
        (absolute value)

    Returns
    -------
    power: numpy.nd.array
        The normalized co-spectrum (real part of the cross spectrum). For
        'none' normalization, imaginary part is returned as well.
    """

    # The "effective" counts/bin is the geometrical mean of the counts/bin
    # of the two light curves. Same goes for counts/second in meanrate.

    log_nphots1 = np.log(nphots1)
    log_nphots2 = np.log(nphots2)

    actual_nphots = np.float64(np.sqrt(np.exp(log_nphots1 + log_nphots2)))

    if power_type == "all":
        c_num = unnorm_power
    elif power_type == "real":
        c_num = unnorm_power.real
    elif power_type == "absolute":
        c_num = np.absolute(unnorm_power)
    else:
        raise ValueError("`power_type` not recognized!")

    if norm.lower() == 'leahy':
        power = c_num * 2. / actual_nphots

    elif norm.lower() == 'frac':
        meancounts1 = nphots1 / nbins
        meancounts2 = nphots2 / nbins

        actual_mean = np.sqrt(meancounts1 * meancounts2)

        assert actual_mean > 0.0, \
            "Mean count rate is <= 0. Something went wrong."

        c = c_num / float(nbins ** 2.)
        power = c * 2. * tseg / (actual_mean ** 2.0)

    elif norm.lower() == 'abs':
        meanrate = np.sqrt(nphots1 * nphots2) / tseg

        power = c_num * 2. * meanrate / actual_nphots

    elif norm.lower() == 'none':
        power = unnorm_power

    else:
        raise ValueError("Value for `norm` not recognized.")

    return power


def normalize_crossspectrum_gauss(
        unnorm_power, mean_flux, var, dt, N, norm="none", power_type="real"):
    """
    Normalize the real part of the cross spectrum to Leahy, absolute rms^2,
    fractional rms^2 normalization, or not at all.

    Parameters
    ----------
    unnorm_power: numpy.ndarray
        The unnormalized cross spectrum.

    mean_flux: float
        The mean flux of the light curve (if a cross spectrum, the geometrical
        mean of the flux in the two channels)

    var: float
        The variance of the light curve (if a cross spectrum, the geometrical
        mean of the variance in the two channels)

    dt: float
        The sampling time of the light curve

    N: int
        The number of bins in the light curve

    Other parameters
    ----------------
    norm : str
        One of `'leahy'` (Leahy+83), `'frac'` (fractional rms), `'abs'`
        (absolute rms)

    power_type : str
        One of `'real'` (real part), `'all'` (all complex powers), `'abs'`
        (absolute value)

    Returns
    -------
    power: numpy.nd.array
        The normalized co-spectrum (real part of the cross spectrum). For
        'none' normalization, imaginary part is returned as well.

    Examples
    --------
    >>> lc_c = np.random.poisson(10000, 10000)
    >>> lc_c_var = 10000
    >>> lc = lc_c / 17.3453
    >>> lc_var = (100 / 17.3453)**2
    >>> pds_c = np.absolute(np.fft.fft(lc_c))**2
    >>> pds = np.absolute(np.fft.fft(lc))**2
    >>> norm_c = normalize_crossspectrum_gauss(pds_c, np.mean(lc_c), lc_c_var, 0.1, len(lc_c), norm='leahy')
    >>> norm = normalize_crossspectrum_gauss(pds, np.mean(lc), lc_var, 0.1, len(lc), norm='leahy')
    >>> np.allclose(norm, norm_c)
    True
    >>> np.isclose(np.mean(norm[1:]), 2, atol=0.1)
    True
    >>> norm_c = normalize_crossspectrum_gauss(pds_c, np.mean(lc_c), np.mean(lc_c), 0.1, len(lc_c), norm='frac')
    >>> norm = normalize_crossspectrum_gauss(pds, np.mean(lc), lc_var, 0.1, len(lc), norm='frac')
    >>> np.allclose(norm, norm_c)
    True
    >>> norm_c = normalize_crossspectrum_gauss(pds_c, np.mean(lc_c), np.mean(lc_c), 0.1, len(lc_c), norm='abs')
    >>> norm = normalize_crossspectrum_gauss(pds, np.mean(lc), lc_var, 0.1, len(lc), norm='abs')
    >>> np.allclose(norm / np.mean(lc)**2, norm_c / np.mean(lc_c)**2)
    True
    >>> np.isclose(np.mean(norm_c[2:]), 2 * np.mean(lc_c * 0.1), rtol=0.1)
    True
    """

    # The "effective" counts/bin is the geometrical mean of the counts/bin
    # of the two light curves. Same goes for counts/second in meanrate.
    if power_type == "all":
        c_num = unnorm_power
    elif power_type == "real":
        c_num = unnorm_power.real
    elif power_type == "absolute":
        c_num = np.absolute(unnorm_power)
    else:
        raise ValueError("`power_type` not recognized!")

    common_factor = 2 * dt / N
    rate_mean = mean_flux * dt
    if norm.lower() == 'leahy':
        norm = 2 / var / N

    elif norm.lower() == 'frac':
        norm = common_factor / rate_mean**2

    elif norm.lower() == 'abs':
        norm = common_factor

    elif norm.lower() == 'none':
        norm = 1

    else:
        raise ValueError("Value for `norm` not recognized.")

    return norm * c_num




New ones:

In [3]:
def normalize_frac(power, dt, N, mean):
    """Fractional rms normalization, from the variance of the lc.

    Examples
    --------
    >>> mean = var = 1000000
    >>> N = 1000000
    >>> dt = 0.2
    >>> meanrate = mean / dt
    >>> lc = np.random.poisson(mean, N)
    >>> pds = np.abs(fft(lc))**2
    >>> pdsnorm = normalize_frac(pds, dt, lc.size, mean)
    >>> np.isclose(pdsnorm[1:N//2].mean(), poisson_level(norm="frac", meanrate=meanrate), rtol=0.01)
    True
    """
    #     (mean * N) / (mean /dt) = N * dt
    #     It's Leahy / meanrate;
    #     Nph = mean * N
    #     meanrate = mean / dt
    #     norm = 2 / (Nph * meanrate) = 2 * dt / (mean**2 * N)

    return power * 2. * dt / (mean ** 2 * N)


def normalize_abs(power, dt, N):
    """Absolute rms normalization, from the variance of the lc.

    Examples
    --------
    >>> mean = var = 100000
    >>> N = 1000000
    >>> dt = 0.2
    >>> meanrate = mean / dt
    >>> lc = np.random.poisson(mean, N)
    >>> pds = np.abs(fft(lc))**2
    >>> pdsnorm = normalize_abs(pds, dt, lc.size)
    >>> np.isclose(pdsnorm[1:N//2].mean(), poisson_level(norm="abs", meanrate=meanrate), rtol=0.01)
    True
    """
    #     It's frac * meanrate**2; Leahy / meanrate * meanrate**2
    #     Nph = mean * N
    #     meanrate = mean / dt
    #     norm = 2 / (Nph * meanrate) * meanrate**2 = 2 * dt / (mean**2 * N) * mean**2 / dt**2

    return power * 2. / N / dt


def normalize_leahy_from_variance(power, variance, N):
    """Leahy+83 normalization, from the variance of the lc.

    Examples
    --------
    >>> mean = var = 100000.
    >>> N = 1000000
    >>> lc = np.random.poisson(mean, N).astype(float)
    >>> pds = np.abs(fft(lc))**2
    >>> pdsnorm = normalize_leahy_from_variance(pds, var, lc.size)
    >>> np.isclose(pdsnorm[0], 2 * np.sum(lc), rtol=0.01)
    True
    >>> np.isclose(pdsnorm[1:N//2].mean(), poisson_level(norm="leahy"), rtol=0.01)
    True
    """
    return power * 2. / (variance * N)


def normalize_leahy_poisson(power, Nph):
    """Leahy+83 normalization, from the variance of the lc.

    Examples
    --------
    >>> mean = var = 100000.
    >>> N = 1000000
    >>> lc = np.random.poisson(mean, N).astype(float)
    >>> pds = np.abs(fft(lc))**2
    >>> pdsnorm = normalize_leahy_poisson(pds, np.sum(lc))
    >>> np.isclose(pdsnorm[0], 2 * np.sum(lc), rtol=0.01)
    True
    >>> np.isclose(pdsnorm[1:N//2].mean(), poisson_level(norm="leahy"), rtol=0.01)
    True
    """
    return power * 2. / Nph


def normalize_crossspectrum(unnorm_power, dt, N, mean, variance=None, norm="abs", power_type="all"):
    """Wrapper around all the normalize_NORM methods."""

    if norm == "leahy" and variance is not None:
        pds = normalize_leahy_from_variance(unnorm_power, variance, N)
    elif norm == "leahy":
        pds = normalize_leahy_poisson(unnorm_power, N * mean)
    elif norm == "frac":
        pds = normalize_frac(unnorm_power, dt, N, mean)
    elif norm == "abs":
        pds = normalize_abs(unnorm_power, dt, N)
    elif norm == "none":
        pds = unnorm_power
    else:
        raise ValueError("Unknown value for the norm")

    if power_type == "real":
        pds = pds.real
    elif power_type in ["abs", "absolute"]:
        pds = np.abs(pds)

    return pds


In [4]:
import tqdm
ntrial = 100

rows = []
for i in tqdm.tqdm(range(ntrial)):
    M = np.random.randint(1, 1000)
    N = np.random.randint(1, 10000)
    
    dt = np.random.uniform(0.0001, 1)
    nphots1 = np.random.randint(0, 100_000_000)
    nphots2 = nphots1 # np.random.randint(0, 100_000_000)
    nphots = np.sqrt(nphots1 * nphots2)

    mean1 = nphots1 / N
    mean2 = nphots2 / N
    mean = np.sqrt(mean1 * mean2)
    
    unnorm_power = 1

    variance = np.random.choice([None, np.random.uniform(0, 1000)])
    norm = np.random.choice(["abs", "frac", "leahy", "none"])
    power_type = np.random.choice(["all", "real", "absolute"])
    
    tseg = dt * N
    
    if variance is not None:
        old_power = normalize_crossspectrum_gauss(
            unnorm_power, mean / dt, variance, dt, N, norm=norm, power_type=power_type)
    else:
        old_power = normalize_crossspectrum_oldstingray(
            unnorm_power, tseg, N, nphots1, nphots2, norm=norm, power_type=power_type)

    new_power = normalize_crossspectrum(unnorm_power, dt, N, mean, variance=variance, norm=norm, power_type=power_type)
    
    msg =(f"power={unnorm_power:g}, dt={dt:g}, M={M}, N={N}, mean1={mean1:g}, mean2={mean2:g}, " 
          f"variance={variance}, norm={norm}, power_type={power_type}\n"
          f"old_power={old_power}, new_power={new_power}")
    is_close = np.isclose(new_power, old_power, rtol=0.00001)
    if is_close:
        msg = f"GOOD: {msg}"
    else:
        msg = f"BAD: {msg}"
        print(msg)
    variance = 0 if variance is None else variance
    rows.append([old_power, new_power, dt, M, N, mean1, mean2, mean, variance, norm, power_type, is_close])
table = Table(rows=rows, names=["old", "new", "dt", "M", "N", "mean1", "mean2", "mean", "variance", "norm", "power_type", "good?"])

100%|██████████| 100/100 [00:00<00:00, 8812.86it/s]

BAD: power=1, dt=0.41152, M=579, N=5015, mean1=1496.79, mean2=1496.79, variance=659.559661367017, norm=abs, power_type=real
old_power=0.0001641158088278564, new_power=0.0009690980041501762
BAD: power=1, dt=0.0439776, M=837, N=9732, mean1=1108.22, mean2=1108.22, variance=236.5161825487937, norm=abs, power_type=all
old_power=9.037722911601215e-06, new_power=0.0046730106272383715
BAD: power=1, dt=0.0677722, M=422, N=9557, mean1=3509.04, mean2=3509.04, variance=391.0115597135558, norm=abs, power_type=all
old_power=1.4182743379320322e-05, new_power=0.0030878526959168817
BAD: power=1, dt=0.0890723, M=207, N=589, mean1=137001, mean2=137001, variance=261.13462201397186, norm=abs, power_type=absolute
old_power=0.00030245259853935673, new_power=0.03812168440098623
BAD: power=1, dt=0.749326, M=381, N=5937, mean1=1837.55, mean2=1837.55, variance=883.000270186624, norm=abs, power_type=real
old_power=0.0002524257595454147, new_power=0.00044956471949520073
BAD: power=1, dt=0.0731319, M=336, N=7725, m




In [5]:
table

old,new,dt,M,N,mean1,mean2,mean,variance,norm,power_type,good?
float64,float64,float64,int32,int32,float64,float64,float64,float64,str5,str8,bool
8.220336924483433e-08,8.220336924483423e-08,0.6911600263710839,848,1721,14137.073213248112,14137.073213248112,14137.073213248112,0.0,leahy,real,True
4.0781736363112174e-13,4.0781736363112174e-13,0.6535791809803784,142,693,68008.72438672438,68008.72438672438,68008.72438672438,0.0,frac,absolute,True
5.307517296326984e-07,5.307517296326984e-07,0.7907856291475417,94,5280,18079.755492424243,18079.755492424243,18079.755492424243,713.6818546969507,leahy,all,True
1.0,1.0,0.34493473922039186,425,87,296742.0689655172,296742.0689655172,296742.0689655172,368.1315911382821,none,absolute,True
0.0001641158088278564,0.0009690980041501762,0.41152039063584994,579,5015,1496.7928215353938,1496.7928215353938,1496.7928215353938,659.559661367017,abs,real,False
3.2043109108482003e-07,3.2043109108482e-07,0.23297630026581714,903,8804,10879.237505679237,10879.237505679237,10879.237505679237,708.9495206422113,leahy,all,True
2.1477822464163293e-12,2.1477822464163293e-12,0.5026503226301102,627,8658,7352.652922152922,7352.652922152922,7352.652922152922,0.0,frac,absolute,True
4.053056599286416e-13,4.053056599286416e-13,0.8962715648380795,521,1462,55000.89876880985,55000.89876880985,55000.89876880985,300.40166755370615,frac,real,True
9.037722911601215e-06,0.0046730106272383715,0.04397755968785151,837,9732,1108.2200986436499,1108.2200986436499,1108.2200986436499,236.5161825487937,abs,all,False
3.1582863643512836e-07,3.1582863643512847e-07,0.5980381176374163,399,6398,989.769928102532,989.769928102532,989.769928102532,0.0,leahy,real,True


In [6]:
table_bad = table[~table["good?"]]
table_bad

old,new,dt,M,N,mean1,mean2,mean,variance,norm,power_type,good?
float64,float64,float64,int32,int32,float64,float64,float64,float64,str5,str8,bool
0.0001641158088278,0.0009690980041501,0.4115203906358499,579,5015,1496.7928215353938,1496.7928215353938,1496.7928215353938,659.559661367017,abs,real,False
9.037722911601215e-06,0.0046730106272383,0.0439775596878515,837,9732,1108.22009864365,1108.22009864365,1108.22009864365,236.5161825487937,abs,all,False
1.4182743379320322e-05,0.0030878526959168,0.0677722392380821,422,9557,3509.0438422098987,3509.0438422098987,3509.0438422098987,391.0115597135558,abs,all,False
0.0003024525985393,0.0381216844009862,0.0890722902698405,207,589,137001.28692699492,137001.28692699492,137001.28692699492,261.13462201397186,abs,absolute,False
0.0002524257595454,0.0004495647194952,0.7493258672105635,381,5937,1837.5546572342932,1837.5546572342932,1837.5546572342932,883.000270186624,abs,real,False
1.8933831503744032e-05,0.0035401731769945,0.0731319241832113,336,7725,5260.163754045308,5260.163754045308,5260.163754045308,620.9235196924246,abs,real,False
4.6247661542309925e-05,0.0010433026560555,0.2105424791713659,964,9105,5424.89346512905,5424.89346512905,5424.89346512905,223.88216609762767,abs,absolute,False
0.0001513911294146,0.0004917582030106,0.5548484893046784,64,7330,3322.2129604365623,3322.2129604365623,3322.2129604365623,75.80389174687086,abs,real,False
0.0005109831796905,0.0022077645053628,0.4810906636786438,617,1883,16439.10408921933,16439.10408921933,16439.10408921933,668.7075947699643,abs,real,False
0.0010219896572744,0.0014411164511676,0.8421194775941847,980,1648,17005.509708737864,17005.509708737864,17005.509708737864,904.3703869314973,abs,all,False


Looking at the actual normalizations, I realized that the _old_ stingray formula was wrong, in the Gaussian case. 
The normalization factor should have been

$$\frac{2}{dt\,N}$$

instead of 

$$\frac{2 dt}{N}$$
