In [2]:
# ============================================================================
# SECTION 0: SETUP AND SIGNAL GENERATION
# ============================================================================
# Why: Create synthetic audio signals for advanced frequency domain analysis.
# What we'll build: Two pure tones at different frequencies + noise
# Key difference from basic.ipynb: We'll focus on phase and group delay analysis
# ============================================================================

from files import generate_tone
import numpy as np
import plotly.graph_objects as go

# --- Setup: Key audio parameters ---
duration = 3  # seconds
sample_rate = 16000  # Hz (samples per second). Standard for speech/phone audio.

# --- Create two component signals ---
# Low tone (like AC hum): 150 Hz - slow oscillations
# High tone (like birdsong): 3000 Hz - fast oscillations
t, tone_low = generate_tone(150, duration, sample_rate)
_, tone_high = generate_tone(3000, duration, sample_rate)
clean_audio = tone_low + tone_high  # Mix them together (linear superposition)

# --- Add realistic noise ---
# Why: Real recordings are never clean. Noise affects phase coherence and timing estimates.
noise = np.random.normal(0, 0.1, clean_audio.shape)  # Gaussian noise: mean=0, std=0.1
noisy_audio = clean_audio + noise  # Composite signal: tone_low + tone_high + noise

print(f"Signal Generated. shape: {noisy_audio.shape}, Sampling Rate: {sample_rate}")
print(f"  → {noisy_audio.shape[0]} samples at {sample_rate} Hz = {duration} seconds")

Signal Generated. shape: (48000,), Sampling Rate: 16000
  → 48000 samples at 16000 Hz = 3 seconds


In [3]:
# ============================================================================
# OPTIONAL: LOAD REAL AUDIO FILE
# ============================================================================
# Uncomment below to load your own audio file instead of synthetic signal
# ============================================================================

# import soundfile as sf
# import os

# audio_path = os.path.expanduser("~/Downloads/biluma.wav")  # replace with your file
# noisy_audio, sample_rate = sf.read(audio_path)

# if noisy_audio.ndim > 1:
#     noisy_audio = noisy_audio.mean(axis=1)  # Convert stereo to mono if needed

In [4]:
# ============================================================================
# SECTION 1: PHASE SPECTRUM ANALYSIS
# ============================================================================
# Why: Phase contains timing information about frequency components.
#      While magnitude tells us "what" frequencies exist, phase tells us "when"
#      they occur relative to each other.
# What we'll see: Phase values in radians, showing timing relationships between
#                 frequency components.
# Key insight: Only bins near 150 Hz and 3000 Hz have strong magnitude.
#              Everywhere else, magnitude is dominated by noise.
#              For noise-dominated bins, phase becomes uniformly random in [-π, π].
# ============================================================================

from files import compute_phase_spectrum

# --- Compute Phase Spectrum ---
# How: FFT decomposes signal into frequency components, each with magnitude and phase
# Math: X[k] = |X[k]| * e^(j*φ[k]) where φ[k] is the phase
# 
# Phase wrapping:
#   - np.angle() gives phase in [-π, π] (wrapped)
#   - If phase jumps by more than π, assume it's a wrap and add/subtract 2π
#   - Formula: φ_unwrap(k) = φ(k) + 2π*n_k
#   - Use np.unwrap() to study delay or system behavior
#
# Optional masking:
#   - Mask phase values where magnitude is below threshold
#   - Focus on strong frequency components (e.g., top 10% by magnitude)

freqs, phase, magnitude = compute_phase_spectrum(
    noisy_audio, 
    sample_rate, 
    unwrap=True,  # Unwrap phase to remove 2π discontinuities
    mask_threshold=None  # Set to np.percentile(magnitude, 90) to mask weak components
)

# Optional: Compute masked phase for comparison
threshold = np.percentile(magnitude, 90)  # Keep strongest 10%
_, phase_masked, _ = compute_phase_spectrum(
    noisy_audio, 
    sample_rate, 
    unwrap=True,
    mask_threshold=threshold
)

# --- Interactive Visualization ---
fig = go.Figure()

# Full phase spectrum (all frequencies)
fig.add_trace(go.Scatter(
    x=freqs, 
    y=phase, 
    mode="lines", 
    name="Phase Spectrum (Unwrapped)",
    line=dict(color='blue', width=1)
))

# Optional: Add masked version to compare
fig.add_trace(go.Scatter(
    x=freqs, 
    y=phase_masked, 
    mode="lines", 
    name="Phase (Masked: Top 10%)",
    line=dict(color='red', width=1, dash='dash')
))

fig.update_layout(
    title="Phase Spectrum: Timing Information in Frequency Domain",
    xaxis_title="Frequency (Hz)",
    yaxis_title="Phase (radians)",
    template="plotly_white",
    width=1000,
    height=400,
    hovermode='x unified'
)

fig.show()

print(f"Phase Spectrum computed:")
print(f"  → {len(freqs)} frequency bins from 0 Hz to {sample_rate/2:.0f} Hz")
print(f"  → Phase range: [{np.nanmin(phase):.2f}, {np.nanmax(phase):.2f}] radians")
print(f"  → Strong components at 150 Hz and 3000 Hz have stable phase")
print(f"  → Noise-dominated bins show random phase in [-π, π]")

Phase Spectrum computed:
  → 24001 frequency bins from 0 Hz to 8000 Hz
  → Phase range: [-154.55, 442.85] radians
  → Strong components at 150 Hz and 3000 Hz have stable phase
  → Noise-dominated bins show random phase in [-π, π]


In [5]:
# ============================================================================
# SECTION 2: GROUP DELAY ANALYSIS (Basic)
# ============================================================================
# Why: Group delay measures frequency-dependent timing distortion.
#      It answers: "Does my processing introduce frequency-dependent timing distortion?"
# What we'll see: Group delay in seconds vs frequency, showing timing reliability
#                 at each frequency component.
# Key insight: Since there's no real delay in the generated audio signal,
#              this shows how unreliable the timing estimate is at each frequency.
#              Noise makes it look sloppy, indicating timing estimate unreliability.
# Use case: ONLY USEFUL IN DEBUGGING - best for comparing 2 audio files
#           (clean vs processed) to detect processing artifacts.
# ============================================================================

from files import compute_group_delay

# --- Compute Group Delay ---
# How: Take the derivative of phase spectrum with respect to frequency
# Math: τ_g(ω) = -dφ/dω / (2π)
#       where φ is phase and ω is angular frequency
#
# Intuition:
#   - Group delay measures how long different frequency components take to propagate
#   - For signals (not systems), group delay shows timing estimate reliability
#   - Computing group delay of a signal spectrum (hence jagged)
#   - Should ideally be applied to system/filter transfer functions
#
# Note: For generated signal with no delay, this shows timing estimate unreliability.
#       If delay were added, look for overall vertical shift (center of gravity moves above 0).
#       With noise, interpretation becomes difficult.

freqs, group_delay = compute_group_delay(
    noisy_audio, 
    sample_rate,
    smooth_phase=False  # Don't smooth for basic analysis
)

# Optional: Add artificial delay to see effect
# delay_samples = int(0.1 * sample_rate)  # 0.1 second delay
# delayed_audio = np.concatenate([np.zeros(delay_samples), noisy_audio[:-delay_samples]])
# freqs, group_delay = compute_group_delay(delayed_audio, sample_rate, smooth_phase=False)

# --- Interactive Visualization ---
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=freqs, 
    y=group_delay, 
    mode="lines", 
    name="Group Delay",
    line=dict(color='blue', width=1)
))

fig.update_layout(
    title="Group Delay: Frequency-Dependent Timing Distortion",
    xaxis_title="Frequency (Hz)",
    yaxis_title="Group Delay (seconds)",
    template="plotly_white",
    width=1000,
    height=400,
    hovermode='x unified'
)

# Focus on low frequencies where signal components exist
fig.update_xaxes(range=[0, 500])

fig.show()

print(f"Group Delay computed:")
print(f"  → Group delay range: [{np.min(group_delay):.4f}, {np.max(group_delay):.4f}] seconds")
print(f"  → Jagged appearance indicates timing estimate unreliability")
print(f"  → Noise makes interpretation difficult at each frequency")

Group Delay computed:
  → Group delay range: [-1.4848, 1.4956] seconds
  → Jagged appearance indicates timing estimate unreliability
  → Noise makes interpretation difficult at each frequency


In [6]:
# ============================================================================
# SECTION 3: GROUP DELAY COMPARISON (Clean vs Noisy)
# ============================================================================
# Why: Compare group delay between clean and noisy signals to understand
#      how noise affects timing structure and phase coherence.
# What we'll see: Clean signal has stable timing structure.
#                 Noisy signal has perturbed timing structure.
#                 Noise reduces phase coherence, making timing estimates unreliable.
# Key technique: Windowing before FFT to remove artificial timing distortion
#                caused by abrupt signal boundaries.
# ============================================================================

from files import compute_group_delay

# --- Apply Windowing ---
# Why: Windowing smoothly fades in and out the start and end of the signal
#      before FFT, removing artificial timing distortion caused by abrupt boundaries
# How: Multiply signal by a window function (e.g., Hanning window)
#      This ensures smooth transitions at signal edges
window = np.hanning(len(clean_audio))

# --- Compute Group Delay with Phase Smoothing ---
# Why smooth phase: Reduces noise artifacts in group delay computation
# Parameters:
#   - smooth_phase=True: Apply Savitzky-Golay filter to phase before differentiation
#   - window_length=101: Filter window size (must be odd)
#   - polyorder=3: Polynomial order for smoothing

freqs, gd_clean = compute_group_delay(
    clean_audio * window, 
    sample_rate,
    smooth_phase=True,  # Smooth phase to reduce noise artifacts
    window_length=101,
    polyorder=3
)

_, gd_noisy = compute_group_delay(
    noisy_audio * window, 
    sample_rate,
    smooth_phase=True,
    window_length=101,
    polyorder=3
)

# --- Interactive Comparison Visualization ---
fig = go.Figure()

# Clean signal: stable timing structure
fig.add_trace(go.Scatter(
    x=freqs, 
    y=gd_clean, 
    name="Clean (150 + 3000 Hz)",
    line=dict(color="green", width=2)
))

# Noisy signal: perturbed timing structure
fig.add_trace(go.Scatter(
    x=freqs, 
    y=gd_noisy, 
    name="Noisy (150 + 3000 Hz + noise)",
    line=dict(color="red", width=2)
))

fig.update_layout(
    title="Group Delay Comparison: Clean vs Noisy Signal",
    xaxis_title="Frequency (Hz)",
    yaxis_title="Group Delay (seconds)",
    template="plotly_white",
    width=1000,
    height=500,
    hovermode='x unified',
    legend=dict(x=0.7, y=0.95)
)



# Focus on frequency range where signal components exist (0-5500 Hz)
fig.update_xaxes(range=[0, 5500])

fig.show()

print(f"Group Delay Comparison:")
print(f"  → Clean signal: Stable timing structure, consistent group delay")
print(f"  → Noisy signal: Perturbed timing structure, noise reduces phase coherence")
print(f"  → Windowing removes artificial timing distortion from signal boundaries")
print(f"  → Phase smoothing reduces noise artifacts in group delay computation")

Group Delay Comparison:
  → Clean signal: Stable timing structure, consistent group delay
  → Noisy signal: Perturbed timing structure, noise reduces phase coherence
  → Windowing removes artificial timing distortion from signal boundaries
  → Phase smoothing reduces noise artifacts in group delay computation


In [7]:
# ============================================================================
# SECTION 4: GROUP DELAY DIFFERENCE (Noise Impact Analysis)
# ============================================================================
# Why: Quantify the impact of noise on timing structure by computing the difference
#      between noisy and clean group delay.
# What we'll see: The deviation in group delay caused by noise at each frequency.
# Key insight: Large differences indicate frequencies where noise significantly
#              degrades timing estimates and phase coherence.
# ============================================================================

# --- Compute Difference ---
# How: Subtract clean group delay from noisy group delay
# Math: Δτ_g(ω) = τ_g_noisy(ω) - τ_g_clean(ω)
#
# Interpretation:
#   - Δτ_g ≈ 0: Noise has minimal impact on timing at this frequency
#   - |Δτ_g| large: Noise significantly degrades timing estimate at this frequency
#   - Positive Δτ_g: Noisy signal has longer delay (slower propagation)
#   - Negative Δτ_g: Noisy signal has shorter delay (faster propagation)

gd_diff = gd_noisy - gd_clean

# --- Interactive Difference Visualization ---
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=freqs, 
    y=gd_diff, 
    name="Noisy - Clean",
    line=dict(color='purple', width=2),
    fill='tozeroy',  # Fill area under curve
    fillcolor='rgba(128, 0, 128, 0.2)'  # Semi-transparent purple
))

# Add horizontal reference line at zero
fig.add_hline(y=0, line_dash="dash", line_color="gray", opacity=0.5, annotation_text="No difference")

fig.update_layout(
    title="Group Delay Difference: Quantifying Noise Impact on Timing",
    xaxis_title="Frequency (Hz)",
    yaxis_title="Δ Group Delay (seconds)",
    template="plotly_white",
    width=1000,
    height=500,
    hovermode='x unified'
)

# Focus on low frequencies where signal components exist
fig.update_xaxes(range=[0, 500])

fig.show()

print(f"Noise Impact Analysis:")
print(f"  → Mean absolute difference: {np.mean(np.abs(gd_diff)):.6f} seconds")
print(f"  → Max difference: {np.max(np.abs(gd_diff)):.6f} seconds at {freqs[np.argmax(np.abs(gd_diff))]:.1f} Hz")
print(f"  → Large differences indicate frequencies where noise degrades timing estimates")

Noise Impact Analysis:
  → Mean absolute difference: 0.175612 seconds
  → Max difference: 0.805203 seconds at 2173.0 Hz
  → Large differences indicate frequencies where noise degrades timing estimates


In [10]:
# =============================================================================
# SECTION 5: LINKED GROUP DELAY SUBPLOTS (Sections 2–4 Combined)
# =============================================================================
# Goal: Visualize Section 2 (basic group delay), Section 3 (clean vs noisy),
#       and Section 4 (difference) together in a single figure with
#       *shared, linked axes*.
#
# Why:
#   - Shared X-axis (frequency in Hz): zoom/pan in one subplot updates all.
#   - Shared Y-axis (group delay in seconds): vertical zoom is synchronized,
#     making it easy to compare scales across views.
#   - This mirrors the "linked subplot" experience from basic.ipynb.
#
# Requirement from the notebook design:
#   - Put Section 2, 3, and 4 under the same visual umbrella as subplots
#     so that interaction in one subplot is reflected in the others.
# =============================================================================

from plotly.subplots import make_subplots

# Create 3 vertically-stacked subplots with shared axes
fig = make_subplots(
    rows=3,
    cols=1,
    shared_xaxes=True,   # Share frequency axis across all subplots
    shared_yaxes=True,   # Share group-delay axis across all subplots
    vertical_spacing=0.06,
    subplot_titles=[
        "Section 2 – Group Delay (Noisy Signal)",
        "Section 3 – Group Delay: Clean vs Noisy",
        "Section 4 – Group Delay Difference (Noise Impact)",
    ],
)

# -------------------------------------------------------------------------
# Subplot 1 (Row 1): Section 2 – Basic group delay of noisy signal
# -------------------------------------------------------------------------
fig.add_trace(
    go.Scatter(
        x=freqs,
        y=group_delay,
        mode="lines",
        name="Noisy Group Delay",
        line=dict(color="royalblue", width=1),
    ),
    row=1,
    col=1,
)

# -------------------------------------------------------------------------
# Subplot 2 (Row 2): Section 3 – Clean vs noisy group delay (windowed + smoothed)
# -------------------------------------------------------------------------
fig.add_trace(
    go.Scatter(
        x=freqs,
        y=gd_clean,
        name="Clean (150 + 3000 Hz)",
        line=dict(color="green", width=2),
    ),
    row=2,
    col=1,
)

fig.add_trace(
    go.Scatter(
        x=freqs,
        y=gd_noisy,
        name="Noisy (150 + 3000 Hz + noise)",
        line=dict(color="red", width=2),
    ),
    row=2,
    col=1,
)

# -------------------------------------------------------------------------
# Subplot 3 (Row 3): Section 4 – Group delay difference (noise impact)
# -------------------------------------------------------------------------
fig.add_trace(
    go.Scatter(
        x=freqs,
        y=gd_diff,
        name="Noisy - Clean",
        line=dict(color="purple", width=2),
        fill="tozeroy",
        fillcolor="rgba(128, 0, 128, 0.18)",
    ),
    row=3,
    col=1,
)

# Horizontal reference line at zero in the bottom subplot
fig.add_hline(
    y=0,
    line_dash="dash",
    line_color="gray",
    opacity=0.5,
    annotation_text="No difference",
    row=3,
    col=1,
)

# -------------------------------------------------------------------------
# Axis linking and layout
# -------------------------------------------------------------------------
# Link all x-axes (frequency) and y-axes (group delay) so panning/zooming
# in one subplot updates the others automatically.
fig.update_xaxes(matches="x", row=1, col=1)
fig.update_xaxes(matches="x", row=2, col=1)
fig.update_xaxes(matches="x", row=3, col=1)

fig.update_yaxes(matches="y", row=1, col=1)
fig.update_yaxes(matches="y", row=2, col=1)
fig.update_yaxes(matches="y", row=3, col=1)

# Only bottom subplot shows the shared X label
fig.update_xaxes(title_text="Frequency (Hz)", row=3, col=1)

# Shared Y label (group delay in seconds)
fig.update_yaxes(title_text="Group Delay / Δ Group Delay (seconds)", row=2, col=1)

fig.update_layout(
    title="Sections 2–4: Linked Group Delay Subplots (Shared X & Y Axes)",
    height=950,
    width=1100,
    template="plotly_white",
    hovermode="x unified",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1.0),
)

# Focus on the most informative low-frequency region by default
fig.update_xaxes(range=[0, 500], row=1, col=1)
fig.update_yaxes(range=[-1, 1], row=1, col=1)

fig.show()

print("Linked group delay subplots created:")
print("  → Top:   Section 2 – basic noisy group delay")
print("  → Middle:Section 3 – clean vs noisy (windowed + smoothed)")
print("  → Bottom:Section 4 – noisy minus clean (noise impact)")
print("  → Zoom or pan in ANY subplot to see all three update together (X & Y)")

Linked group delay subplots created:
  → Top:   Section 2 – basic noisy group delay
  → Middle:Section 3 – clean vs noisy (windowed + smoothed)
  → Bottom:Section 4 – noisy minus clean (noise impact)
  → Zoom or pan in ANY subplot to see all three update together (X & Y)
