In [1]:
%load_ext autoreload
%autoreload 2
from astropy.table import Table
import numpy as np
import tqdm

# Verify that old and new normalizations are equivalent


Old ones:

In [2]:
def normalize_crossspectrum_oldstingray(unnorm_power, tseg, nbins, nphots1, nphots2, norm="none", power_type="real"):
    """
    Normalize the real part of the cross spectrum to Leahy, absolute rms^2,
    fractional rms^2 normalization, or not at all.

    Parameters
    ----------
    unnorm_power: numpy.ndarray
        The unnormalized cross spectrum.

    tseg: int
        The length of the Fourier segment, in seconds.

    nbins : int
        Number of bins in the light curve

    nphots1 : int
        Number of photons in the light curve no. 1

    nphots2 : int
        Number of photons in the light curve no. 2

    Other parameters
    ----------------
    norm : str
        One of `'leahy'` (Leahy+83), `'frac'` (fractional rms), `'abs'`
        (absolute rms)

    power_type : str
        One of `'real'` (real part), `'all'` (all complex powers), `'abs'`
        (absolute value)

    Returns
    -------
    power: numpy.nd.array
        The normalized co-spectrum (real part of the cross spectrum). For
        'none' normalization, imaginary part is returned as well.
    """

    # The "effective" counts/bin is the geometrical mean of the counts/bin
    # of the two light curves. Same goes for counts/second in meanrate.

    log_nphots1 = np.log(nphots1)
    log_nphots2 = np.log(nphots2)

    actual_nphots = np.float64(np.sqrt(np.exp(log_nphots1 + log_nphots2)))

    if power_type == "all":
        c_num = unnorm_power
    elif power_type == "real":
        c_num = unnorm_power.real
    elif power_type == "absolute":
        c_num = np.absolute(unnorm_power)
    else:
        raise ValueError("`power_type` not recognized!")

    if norm.lower() == 'leahy':
        power = c_num * 2. / actual_nphots

    elif norm.lower() == 'frac':
        meancounts1 = nphots1 / nbins
        meancounts2 = nphots2 / nbins

        actual_mean = np.sqrt(meancounts1 * meancounts2)

        assert actual_mean > 0.0, \
            "Mean count rate is <= 0. Something went wrong."

        c = c_num / float(nbins ** 2.)
        power = c * 2. * tseg / (actual_mean ** 2.0)

    elif norm.lower() == 'abs':
        meanrate = np.sqrt(nphots1 * nphots2) / tseg

        power = c_num * 2. * meanrate / actual_nphots

    elif norm.lower() == 'none':
        power = unnorm_power

    else:
        raise ValueError("Value for `norm` not recognized.")

    return power


def normalize_crossspectrum_gauss(
        unnorm_power, mean_flux, var, dt, N, norm="none", power_type="real"):
    """
    Normalize the real part of the cross spectrum to Leahy, absolute rms^2,
    fractional rms^2 normalization, or not at all.

    Parameters
    ----------
    unnorm_power: numpy.ndarray
        The unnormalized cross spectrum.

    mean_flux: float
        The mean flux of the light curve (if a cross spectrum, the geometrical
        mean of the flux in the two channels)

    var: float
        The variance of the light curve (if a cross spectrum, the geometrical
        mean of the variance in the two channels)

    dt: float
        The sampling time of the light curve

    N: int
        The number of bins in the light curve

    Other parameters
    ----------------
    norm : str
        One of `'leahy'` (Leahy+83), `'frac'` (fractional rms), `'abs'`
        (absolute rms)

    power_type : str
        One of `'real'` (real part), `'all'` (all complex powers), `'abs'`
        (absolute value)

    Returns
    -------
    power: numpy.nd.array
        The normalized co-spectrum (real part of the cross spectrum). For
        'none' normalization, imaginary part is returned as well.

    Examples
    --------
    >>> lc_c = np.random.poisson(10000, 10000)
    >>> lc_c_var = 10000
    >>> lc = lc_c / 17.3453
    >>> lc_var = (100 / 17.3453)**2
    >>> pds_c = np.absolute(np.fft.fft(lc_c))**2
    >>> pds = np.absolute(np.fft.fft(lc))**2
    >>> norm_c = normalize_crossspectrum_gauss(pds_c, np.mean(lc_c), lc_c_var, 0.1, len(lc_c), norm='leahy')
    >>> norm = normalize_crossspectrum_gauss(pds, np.mean(lc), lc_var, 0.1, len(lc), norm='leahy')
    >>> np.allclose(norm, norm_c)
    True
    >>> np.isclose(np.mean(norm[1:]), 2, atol=0.1)
    True
    >>> norm_c = normalize_crossspectrum_gauss(pds_c, np.mean(lc_c), np.mean(lc_c), 0.1, len(lc_c), norm='frac')
    >>> norm = normalize_crossspectrum_gauss(pds, np.mean(lc), lc_var, 0.1, len(lc), norm='frac')
    >>> np.allclose(norm, norm_c)
    True
    >>> norm_c = normalize_crossspectrum_gauss(pds_c, np.mean(lc_c), np.mean(lc_c), 0.1, len(lc_c), norm='abs')
    >>> norm = normalize_crossspectrum_gauss(pds, np.mean(lc), lc_var, 0.1, len(lc), norm='abs')
    >>> np.allclose(norm / np.mean(lc)**2, norm_c / np.mean(lc_c)**2)
    True
    >>> np.isclose(np.mean(norm_c[2:]), 2 * np.mean(lc_c * 0.1), rtol=0.1)
    True
    """

    # The "effective" counts/bin is the geometrical mean of the counts/bin
    # of the two light curves. Same goes for counts/second in meanrate.
    if power_type == "all":
        c_num = unnorm_power
    elif power_type == "real":
        c_num = unnorm_power.real
    elif power_type == "absolute":
        c_num = np.absolute(unnorm_power)
    else:
        raise ValueError("`power_type` not recognized!")

    common_factor = 2 * dt / N
    rate_mean = mean_flux * dt
    if norm.lower() == 'leahy':
        norm = 2 / var / N

    elif norm.lower() == 'frac':
        norm = common_factor / rate_mean**2

    elif norm.lower() == 'abs':
        norm = common_factor

    elif norm.lower() == 'none':
        norm = 1

    else:
        raise ValueError("Value for `norm` not recognized.")

    return norm * c_num




New ones:

In [3]:
def normalize_frac(power, dt, N, mean):
    """Fractional rms normalization, from the variance of the lc.

    Examples
    --------
    >>> mean = var = 1000000
    >>> N = 1000000
    >>> dt = 0.2
    >>> meanrate = mean / dt
    >>> lc = np.random.poisson(mean, N)
    >>> pds = np.abs(fft(lc))**2
    >>> pdsnorm = normalize_frac(pds, dt, lc.size, mean)
    >>> np.isclose(pdsnorm[1:N//2].mean(), poisson_level(norm="frac", meanrate=meanrate), rtol=0.01)
    True
    """
    #     (mean * N) / (mean /dt) = N * dt
    #     It's Leahy / meanrate;
    #     Nph = mean * N
    #     meanrate = mean / dt
    #     norm = 2 / (Nph * meanrate) = 2 * dt / (mean**2 * N)

    return power * 2. * dt / (mean ** 2 * N)


def normalize_abs(power, dt, N):
    """Absolute rms normalization, from the variance of the lc.

    Examples
    --------
    >>> mean = var = 100000
    >>> N = 1000000
    >>> dt = 0.2
    >>> meanrate = mean / dt
    >>> lc = np.random.poisson(mean, N)
    >>> pds = np.abs(fft(lc))**2
    >>> pdsnorm = normalize_abs(pds, dt, lc.size)
    >>> np.isclose(pdsnorm[1:N//2].mean(), poisson_level(norm="abs", meanrate=meanrate), rtol=0.01)
    True
    """
    #     It's frac * meanrate**2; Leahy / meanrate * meanrate**2
    #     Nph = mean * N
    #     meanrate = mean / dt
    #     norm = 2 / (Nph * meanrate) * meanrate**2 = 2 * dt / (mean**2 * N) * mean**2 / dt**2

    return power * 2. / N / dt


def normalize_leahy_from_variance(power, variance, N):
    """Leahy+83 normalization, from the variance of the lc.

    Examples
    --------
    >>> mean = var = 100000.
    >>> N = 1000000
    >>> lc = np.random.poisson(mean, N).astype(float)
    >>> pds = np.abs(fft(lc))**2
    >>> pdsnorm = normalize_leahy_from_variance(pds, var, lc.size)
    >>> np.isclose(pdsnorm[0], 2 * np.sum(lc), rtol=0.01)
    True
    >>> np.isclose(pdsnorm[1:N//2].mean(), poisson_level(norm="leahy"), rtol=0.01)
    True
    """
    return power * 2. / (variance * N)


def normalize_leahy_poisson(power, Nph):
    """Leahy+83 normalization, from the variance of the lc.

    Examples
    --------
    >>> mean = var = 100000.
    >>> N = 1000000
    >>> lc = np.random.poisson(mean, N).astype(float)
    >>> pds = np.abs(fft(lc))**2
    >>> pdsnorm = normalize_leahy_poisson(pds, np.sum(lc))
    >>> np.isclose(pdsnorm[0], 2 * np.sum(lc), rtol=0.01)
    True
    >>> np.isclose(pdsnorm[1:N//2].mean(), poisson_level(norm="leahy"), rtol=0.01)
    True
    """
    return power * 2. / Nph


def normalize_crossspectrum(unnorm_power, dt, N, mean, variance=None, norm="abs", power_type="all"):
    """Wrapper around all the normalize_NORM methods."""

    if norm == "leahy" and variance is not None:
        pds = normalize_leahy_from_variance(unnorm_power, variance, N)
    elif norm == "leahy":
        pds = normalize_leahy_poisson(unnorm_power, N * mean)
    elif norm == "frac":
        pds = normalize_frac(unnorm_power, dt, N, mean)
    elif norm == "abs":
        pds = normalize_abs(unnorm_power, dt, N)
    elif norm == "none":
        pds = unnorm_power
    else:
        raise ValueError("Unknown value for the norm")

    if power_type == "real":
        pds = pds.real
    elif power_type in ["abs", "absolute"]:
        pds = np.abs(pds)

    return pds


In [4]:
import tqdm
ntrial = 100

rows = []
for i in tqdm.tqdm(range(ntrial)):
    M = np.random.randint(1, 1000)
    N = np.random.randint(1, 10000)
    
    dt = np.random.uniform(0.0001, 1)
    nphots1 = np.random.randint(0, 100_000_000)
    nphots2 = nphots1 # np.random.randint(0, 100_000_000)
    nphots = np.sqrt(nphots1 * nphots2)

    mean1 = nphots1 / N
    mean2 = nphots2 / N
    mean = np.sqrt(mean1 * mean2)
    
    unnorm_power = 1

    variance = np.random.choice([None, np.random.uniform(0, 1000)])
    norm = np.random.choice(["abs", "frac", "leahy", "none"])
    power_type = np.random.choice(["all", "real", "absolute"])
    
    tseg = dt * N
    
    if variance is not None:
        old_power = normalize_crossspectrum_gauss(
            unnorm_power, mean / dt, variance, dt, N, norm=norm, power_type=power_type)
    else:
        old_power = normalize_crossspectrum_oldstingray(
            unnorm_power, tseg, N, nphots1, nphots2, norm=norm, power_type=power_type)

    new_power = normalize_crossspectrum(unnorm_power, dt, N, mean, variance=variance, norm=norm, power_type=power_type)
    
    msg =(f"power={unnorm_power:g}, dt={dt:g}, M={M}, N={N}, mean1={mean1:g}, mean2={mean2:g}, " 
          f"variance={variance}, norm={norm}, power_type={power_type}\n"
          f"old_power={old_power}, new_power={new_power}")
    is_close = np.isclose(new_power, old_power, rtol=0.00001)
    if is_close:
        msg = f"GOOD: {msg}"
    else:
        msg = f"BAD: {msg}"
        print(msg)
    variance = 0 if variance is None else variance
    rows.append([old_power, new_power, dt, M, N, mean1, mean2, mean, variance, norm, power_type, is_close])
table = Table(rows=rows, names=["old", "new", "dt", "M", "N", "mean1", "mean2", "mean", "variance", "norm", "power_type", "good?"])

100%|██████████| 100/100 [00:00<00:00, 4963.97it/s]

BAD: power=1, dt=0.898527, M=561, N=1320, mean1=38383.5, mean2=38383.5, variance=976.9199352745533, norm=abs, power_type=absolute
old_power=0.0013614051652651507, new_power=0.0016862607638327997
BAD: power=1, dt=0.66665, M=627, N=7181, mean1=8692.88, mean2=8692.88, variance=781.5211894773033, norm=abs, power_type=all
old_power=0.00018567044354993417, new_power=0.0004177797281562002
BAD: power=1, dt=0.615666, M=65, N=1647, mean1=31771.1, mean2=31771.1, variance=174.09922389759558, norm=abs, power_type=real
old_power=0.0007476213964438318, new_power=0.0019723821833813847
BAD: power=1, dt=0.0727525, M=748, N=199, mean1=242526, mean2=242526, variance=990.0484899918553, norm=abs, power_type=real
old_power=0.0007311813190525624, new_power=0.1381429580904335
BAD: power=1, dt=0.444013, M=703, N=1191, mean1=22037.5, mean2=22037.5, variance=61.77808527096529, norm=abs, power_type=all
old_power=0.0007456144979623862, new_power=0.003782005223872418
BAD: power=1, dt=0.507541, M=65, N=4706, mean1=11




In [5]:
table

old,new,dt,M,N,mean1,mean2,mean,variance,norm,power_type,good?
float64,float64,float64,int64,int64,float64,float64,float64,float64,str5,str8,bool
2.9806062812510466e-07,2.980606281251046e-07,0.776995735463288,657,9665,1817.767511639938,1817.767511639938,1817.767511639938,694.2622076469609,leahy,all,True
0.0013614051652651507,0.0016862607638327997,0.8985274090749995,561,1320,38383.482575757575,38383.482575757575,38383.482575757575,976.9199352745533,abs,absolute,False
0.001568906862221712,0.0015689068622217128,0.4386692813134602,193,2906,12043.213695801789,12043.213695801789,12043.213695801789,0.0,abs,all,True
5.411572785735735e-13,5.411572785735735e-13,0.823649888800288,324,3184,30919.896984924624,30919.896984924624,30919.896984924624,0.0,frac,all,True
5.879977513836387e-13,5.879977513836387e-13,0.5344521891262215,975,4736,19591.863597972973,19591.863597972973,19591.863597972973,12.24575026381891,frac,absolute,True
1.196458833686626e-06,1.1964588336866258e-06,0.41014302527390867,815,3578,11280.519005030743,11280.519005030743,11280.519005030743,467.18823641557026,leahy,absolute,True
4.427743193905621e-08,4.427743193905619e-08,0.19778072978322858,57,9305,4854.351209027404,4854.351209027404,4854.351209027404,0.0,leahy,all,True
1.0,1.0,0.8029256477987071,470,3488,27168.93377293578,27168.93377293578,27168.93377293578,0.0,none,all,True
1.0,1.0,0.8624865359111966,287,2081,2015.5689572321,2015.5689572321,2015.5689572321,0.0,none,absolute,True
0.00018567044354993417,0.0004177797281562002,0.6666497275660387,627,7181,8692.882328366522,8692.882328366522,8692.882328366522,781.5211894773033,abs,all,False


In [6]:
table_bad = table[~table["good?"]]
table_bad

old,new,dt,M,N,mean1,mean2,mean,variance,norm,power_type,good?
float64,float64,float64,int64,int64,float64,float64,float64,float64,str5,str8,bool
0.0013614051652651,0.0016862607638327,0.8985274090749995,561,1320,38383.482575757575,38383.482575757575,38383.482575757575,976.9199352745532,abs,absolute,False
0.0001856704435499,0.0004177797281562,0.6666497275660387,627,7181,8692.882328366522,8692.882328366522,8692.882328366522,781.5211894773033,abs,all,False
0.0007476213964438,0.0019723821833813,0.6156662199714955,65,1647,31771.092896174865,31771.092896174865,31771.092896174865,174.09922389759558,abs,real,False
0.0007311813190525,0.1381429580904335,0.0727525412457299,748,199,242525.6783919598,242525.6783919598,242525.6783919598,990.0484899918551,abs,real,False
0.0007456144979623,0.0037820052238724,0.444013433536601,703,1191,22037.46011754828,22037.46011754828,22037.46011754828,61.77808527096529,abs,all,False
0.0002156996261548,0.0008373494767162,0.5075412203423726,65,4706,11359.3385040374,11359.3385040374,11359.3385040374,595.1883648463114,abs,absolute,False
0.0002268667634864,0.0010749272422884,0.4594051960600497,914,4050,12130.261728395062,12130.261728395062,12130.261728395062,394.9085770957451,abs,real,False
0.0021685080339119,0.0055790887041001,0.6234460597496911,664,575,44104.44695652174,44104.44695652174,44104.44695652174,497.5281648889309,abs,absolute,False
0.0002937876832272,0.0003148493476579,0.9659739024511068,330,6576,5020.665145985401,5020.665145985401,5020.665145985401,268.4617736720939,abs,all,False
0.0019560451111074,0.0064515539774099,0.5506266987767575,523,563,153667.86856127885,153667.86856127885,153667.86856127885,485.0786564351558,abs,real,False


Looking at the actual normalizations, I realized that the _old_ stingray formula was wrong, in the Gaussian case. 
The normalization factor should have been

$$\frac{2}{dt\,N}$$

instead of 

$$\frac{2 dt}{N}$$
