
# Student-t vs Gaussian Likelihood — GWOSC Demo (PSD log–log, Residual Histograms, CBC Injection)

This notebook compares the **standard Gaussian** likelihood with a **Student-t (heavy-tailed)** generalisation on **real LIGO data** from **GWOSC**. It includes:

1. **Welch PSD** plots in **log–log** scale (customary in GW).
2. **Histograms of whitened residuals** with overlays of **Normal** and **Student-t** PDFs.
3. A **CBC injection** (via **PyCBC**) and comparison of **ΔlogL = logL(h=template) − logL(h=0)** under Gaussian vs Student-t.
4. A **non-Gaussian noise** example (impulsive spikes) to show Student-t robustness.

> Requirements: `gwpy`, `pycbc`, `numpy`, `scipy`, `matplotlib`  
> Use the install cell below if needed.


In [None]:

# If needed:
# %pip install -q gwpy pycbc numpy scipy matplotlib


In [None]:

import numpy as np
import matplotlib.pyplot as plt

from gwpy.timeseries import TimeSeries
from scipy.signal import welch
from scipy.signal.windows import tukey
from scipy.special import gammaln, erfinv, gamma
from scipy.stats import t as student_t

# CBC injection (time-domain) via PyCBC
from pycbc.waveform import get_td_waveform


In [None]:

# ---------------- Configuration ----------------
EVENT_GPS   = 1126259462.4
SPAN_SEC    = 32.0
IFO         = "L1"

# Preprocessing
HIGHPASS_HZ = 20.0
RESAMPLE_HZ = 2048.0  # set None to keep native

# Likelihood segmentation
SEG_LEN_S   = 8.0
OVERLAP_FR  = 0.5
WINDOW_TUKEY_ALPHA = 0.1

# Student-t shapes to test (ν → ∞ ≡ Gaussian)
NU_LIST = [6.0, 10.0, 30.0, np.inf]
NU_OVERLAY = 6.0  # for histogram overlay

# Injection (IMRPhenomD, non-spinning)
DO_INJECTION = True
M1_Msun      = 30.0
M2_Msun      = 30.0
DIST_Mpc     = 400.0   # lower => louder
F_LOWER      = 30.0
APPROXIMANT  = "IMRPhenomD"
INJ_INCL     = 0.0
INJ_PHI      = 0.0
INJ_GPS      = EVENT_GPS

# Non-Gaussian augmentation
MAKE_NON_GAUSSIAN = True
N_GLITCHES        = 25
GLITCH_SCALE_SIG  = 8.0
RNG_SEED          = 123


In [None]:

# ---------------- Likelihood core ----------------
def _student_t_logpdf(x, s, nu):
    # x, s arrays; nu scalar
    logC = gammaln(0.5*(nu+1)) - gammaln(0.5*nu) - 0.5*np.log(nu*np.pi) - np.log(s)
    z2 = (x / s)**2
    return logC - 0.5*(nu+1)*np.log1p(z2/nu)

def gaussian_loglik_segment(D, H, Sn, df):
    r = D - H
    var = 0.5 * Sn * df
    ll = -0.5*((r.real**2)/var + np.log(2*np.pi*var))          -0.5*((r.imag**2)/var + np.log(2*np.pi*var))
    return float(np.sum(ll))

def student_t_loglik_segment(D, H, Sn, df, nu):
    r = D - H
    s = np.sqrt(0.5 * Sn * df)
    ll = _student_t_logpdf(r.real, s, nu) + _student_t_logpdf(r.imag, s, nu)
    return float(np.sum(ll))

def robust_loglik(segments, nu=None):
    total = 0.0
    if (nu is None) or np.isinf(nu):
        for D,H,Sn,df in segments:
            total += gaussian_loglik_segment(D,H,Sn,df)
    else:
        for D,H,Sn,df in segments:
            total += student_t_loglik_segment(D,H,Sn,df,nu)
    return total


In [None]:

# ---------------- Utilities ----------------
def segment_indices(n, seglen, step):
    starts = np.arange(0, n-seglen+1, step, dtype=int)
    return [(s, s+seglen) for s in starts]

def segment_fft_and_psd(x, fs, seg_len_s, overlap_fr, win_alpha):
    seglen = int(round(seg_len_s * fs))
    step = int(round(seglen * (1-overlap_fr)))
    if seglen <= 1 or step < 1:
        raise ValueError("Bad segmentation parameters")
    win = tukey(seglen, alpha=win_alpha)
    out = []
    for a,b in segment_indices(len(x), seglen, step):
        seg = x[a:b]
        if len(seg) != seglen: continue
        segw = seg * win
        D = np.fft.rfft(segw)
        df = fs / seglen
        f_fft = np.fft.rfftfreq(seglen, d=1.0/fs)
        nper = max(int(2.0*fs), 8)
        nover = int(0.5*nper)
        f_psd, Pxx = welch(segw, fs=fs, nperseg=nper, noverlap=nover, detrend="constant")
        Pxx = np.maximum(Pxx, np.finfo(float).tiny)
        out.append((D, f_psd, Pxx, df, f_fft, win, slice(a,b)))
    return out

def interpolate_psd_to_fft_grid(f_psd, Pxx, f_fft):
    ff = np.clip(f_fft, f_psd[0], f_psd[-1])
    return np.interp(ff, f_psd, Pxx)

def whitened_residuals(D, H, Sn, df, f_fft, fmin=20.0):
    keep = f_fft >= fmin
    R = D[keep] - H[keep]
    s = np.sqrt(0.5 * Sn[keep] * df)
    z = np.concatenate([R.real/s, R.imag/s])
    return z[np.isfinite(z)]

def add_impulsive_spikes(x, scale_sigma, n_spikes, rng):
    y = x.copy()
    n = len(x)
    med = np.median(x)
    mad = np.median(np.abs(x-med)) + 1e-12
    robust_sigma = 1.4826*mad
    amp = scale_sigma * robust_sigma
    idx = rng.choice(n, size=n_spikes, replace=False)
    signs = rng.choice([-1.0,1.0], size=n_spikes)
    y[idx] += signs*amp
    return y

def make_td_injection(fs, n, start_gps, inj_gps, m1, m2, dist_mpc, f_lower, incl, phi0, approximant):
    dt = 1.0/fs
    hp, hc = get_td_waveform(approximant=approximant,
                             mass1=m1, mass2=m2, spin1z=0, spin2z=0,
                             f_lower=f_lower, delta_t=dt, distance=dist_mpc,
                             inclination=incl, coa_phase=phi0)
    h = hp.numpy()
    inj_index = int(round((inj_gps - start_gps)*fs))
    L = len(h)
    x_model = np.zeros(n, dtype=float)
    a = inj_index - (L - 1)
    b = inj_index + 1
    aa = max(a,0); bb = min(b,n)
    ha = aa - a; hb = ha + (bb-aa)
    if bb>aa and hb>ha:
        x_model[aa:bb] += h[ha:hb]
    return x_model


In [None]:

# ---------------- Fetch data ----------------
start = EVENT_GPS - SPAN_SEC/2
stop  = EVENT_GPS + SPAN_SEC/2
print(f"Fetching {IFO} strain {SPAN_SEC:.0f}s from {start:.1f} to {stop:.1f} (GPS) ...")
data = TimeSeries.fetch_open_data(IFO, start, stop)
data = data.highpass(HIGHPASS_HZ)
if RESAMPLE_HZ is not None:
    data = data.resample(RESAMPLE_HZ)

t = data.times.value
x = data.value.astype(float)
fs = float(data.sample_rate.value)
n  = len(x)
print(f"fs = {fs:.1f} Hz, N = {n}, duration = {n/fs:.1f} s")

plt.figure(figsize=(11,3.5))
plt.plot(t - t[0], x, lw=0.6)
plt.xlabel("Time since segment start [s]")
plt.ylabel("Strain (hp)")
plt.title(f"{IFO} strain — {SPAN_SEC:.0f}s, fs={fs:.0f} Hz")
plt.tight_layout(); plt.show()


In [None]:

# ---------------- Build variants: clean, non-Gaussian, and + injection ----------------
rng = np.random.default_rng(RNG_SEED)

x_clean = x.copy()
x_ng    = add_impulsive_spikes(x_clean, GLITCH_SCALE_SIG, N_GLITCHES, rng) if MAKE_NON_GAUSSIAN else x_clean
template_td = make_td_injection(fs, n, t[0], INJ_GPS, M1_Msun, M2_Msun, DIST_Mpc, F_LOWER, INJ_INCL, INJ_PHI, APPROXIMANT) if DO_INJECTION else np.zeros_like(x)
x_inj   = x_clean + template_td

# Segment each variant
packs_clean = segment_fft_and_psd(x_clean, fs, SEG_LEN_S, OVERLAP_FR, WINDOW_TUKEY_ALPHA)
packs_ng    = segment_fft_and_psd(x_ng,    fs, SEG_LEN_S, OVERLAP_FR, WINDOW_TUKEY_ALPHA)
packs_inj   = segment_fft_and_psd(x_inj,   fs, SEG_LEN_S, OVERLAP_FR, WINDOW_TUKEY_ALPHA)

def build_segments(packs, template=None):
    segments = []
    for (D, f_psd, Pxx, df, f_fft, win, slc) in packs:
        Sn = interpolate_psd_to_fft_grid(f_psd, Pxx, f_fft)
        keep = f_fft >= 20.0
        if template is None:
            H = np.zeros_like(D)
        else:
            H = np.fft.rfft(template[slc] * win)
        segments.append((D[keep], H[keep], Sn[keep], df))
    return segments

seg_clean_noise = build_segments(packs_clean, template=None)
seg_ng_noise    = build_segments(packs_ng,    template=None)
seg_inj_noise   = build_segments(packs_inj,   template=None)     # h=0
seg_inj_signal  = build_segments(packs_inj,   template=template_td)  # h=template


In [None]:

# ---------------- Likelihood comparisons ----------------
def print_ll_table(label, segments_A, segments_B=None):
    print(f"\n{label}")
    if segments_B is None:
        print("Model: H=0")
        for nu in NU_LIST:
            tag = "Gaussian (ν=∞)" if (nu is None or np.isinf(nu)) else f"Student-t (ν={nu:g})"
            ll = robust_loglik(segments_A, nu)
            print(f"  {tag:18s}: {ll: .3f}")
    else:
        print("Models: H=0 vs H=template   (Δ = L(h=template) - L(h=0))")
        for nu in NU_LIST:
            tag = "Gaussian (ν=∞)" if (nu is None or np.isinf(nu)) else f"Student-t (ν={nu:g})"
            ll0 = robust_loglik(segments_A, nu)
            ll1 = robust_loglik(segments_B, nu)
            print(f"  {tag:18s}:  h=0: {ll0: .3f}   h=template: {ll1: .3f}   Δ: {ll1-ll0: .3f}")

print_ll_table("CLEAN data", seg_clean_noise)
print_ll_table("NON-GAUSSIANIZED data (impulsive spikes)", seg_ng_noise)
print_ll_table("INJECTED data (compare models)", seg_inj_noise, seg_inj_signal)


In [None]:

# ---------------- PSD in log–log ----------------
plt.figure(figsize=(10.8,3.6))
for i, (_, f_psd, Pxx, _, _, _, _) in enumerate(packs_clean[:3]):
    plt.loglog(f_psd, Pxx, alpha=0.9, label=f"Clean PSD (seg {i})")
for i, (_, f_psd, Pxx, _, _, _, _) in enumerate(packs_ng[:2]):
    plt.loglog(f_psd, Pxx, alpha=0.6, linestyle="--", label=f"NG PSD (seg {i})")
plt.xlabel("Frequency [Hz]"); plt.ylabel("PSD [strain$^2$/Hz]")
plt.title("Welch PSD — log–log scale")
plt.legend(); plt.tight_layout(); plt.show()


In [None]:

# ---------------- Histograms of whitened residuals ----------------
# We'll use the middle segment for each case
def mid_whitened(packs):
    D, f_psd, Pxx, df, f_fft, _, _ = packs[len(packs)//2]
    Sn = interpolate_psd_to_fft_grid(f_psd, Pxx, f_fft)
    return whitened_residuals(D, np.zeros_like(D), Sn, df, f_fft, fmin=20.0)

z_clean = mid_whitened(packs_clean)
z_ng    = mid_whitened(packs_ng)

# Overlay Normal and Student-t PDFs
zz = np.linspace(-10, 10, 2001)
norm_pdf = (1/np.sqrt(2*np.pi))*np.exp(-0.5*zz**2)
t_pdf    = student_t.pdf(zz, df=NU_OVERLAY)  # scale=1

plt.figure(figsize=(5.2,3.9))
plt.hist(z_clean, bins=100, density=True, alpha=0.7, label="Whitened residuals (clean)")
plt.plot(zz, norm_pdf, lw=2, label="Normal N(0,1)")
plt.plot(zz, t_pdf, lw=2, linestyle="--", label=f"Student-t (ν={NU_OVERLAY:g})")
plt.xlim(-8, 8)
plt.xlabel("z (whitened residual)"); plt.ylabel("Density")
plt.title("Histogram (clean) with Normal & Student-t overlays")
plt.legend(); plt.tight_layout(); plt.show()

plt.figure(figsize=(5.2,3.9))
plt.hist(z_ng, bins=100, density=True, alpha=0.7, label="Whitened residuals (non-Gaussianized)")
plt.plot(zz, norm_pdf, lw=2, label="Normal N(0,1)")
plt.plot(zz, t_pdf, lw=2, linestyle="--", label=f"Student-t (ν={NU_OVERLAY:g})")
plt.xlim(-8, 8)
plt.xlabel("z (whitened residual)"); plt.ylabel("Density")
plt.title("Histogram (with spikes) with Normal & Student-t overlays")
plt.legend(); plt.tight_layout(); plt.show()
