In [None]:
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import soundfile as sf
import madmom

# Detect Beats + DownBeats w/ Madmom

In [None]:
# Load audio
## filename = "../Songs/bob_marley--redemption_song.mp3"
filename = "../Songs/dev/tool--ticks_and_leaches.mp3"
y, sr = librosa.load(filename, sr=44100, mono=True)

# -------------------------------
# Beat + Downbeat tracking
# -------------------------------

# Process with madmom's RNN downbeat processor
proc = madmom.features.downbeats.RNNDownBeatProcessor()(filename)

# Decode with a DBN to get sequences of [beat, downbeat]
beats = madmom.features.downbeats.DBNDownBeatTrackingProcessor(beats_per_bar=[3, 4, 5, 7, 11],
                                                               fps=100)(proc)

# beats is an array of shape (N, 2):
#   [:,0] = time (s)
#   [:,1] = 1 if downbeat, 0 if beat

# Extract beat times and downbeat times
beat_times = beats[:,0]
downbeat_times = beats[beats[:,1] == 1, 0]

# Estimate tempo from inter-beat intervals
if len(beat_times) > 1:
    ib_intervals = np.diff(beat_times)
    tempo = 60.0 / np.median(ib_intervals)
else:
    tempo = float('nan')

print(f"Estimated tempo: {tempo:.2f} BPM")
print("First 20 beats (s):", beat_times[:20])
print("First 10 downbeats (s):", downbeat_times[:10])


# Visualize Beat Detection

In [None]:
# --- USER SETTINGS FOR VISUALIZATION WINDOW ---
start_time = 90.0  # Start time in seconds
window_length = 20.0  # Window length in seconds
end_time = start_time + window_length

print(f"\nVisualization window: {start_time}s to {end_time}s")

# Filter beats and downbeats within the specified window
beats_in_window = beat_times[(beat_times >= start_time) & (beat_times <= end_time)]
downbeats_in_window = downbeat_times[(downbeat_times >= start_time) & (downbeat_times <= end_time)]
print(f"Beats in window: {len(beats_in_window)}, Downbeats in window: {len(downbeats_in_window)}")


### PLOT 1 - Beat Times overlayed on raw Audio Form ###
plt.figure(figsize=(16, 5))

# Extract audio segment for the specified window
start_sample = int(start_time * sr)
end_sample = int(end_time * sr)
y_window = y[start_sample:end_sample]

# Plot windowed waveform
time_axis_window = np.linspace(start_time, end_time, len(y_window))
plt.plot(time_axis_window, y_window, alpha=1, color='blue', linewidth=0.8)

# Overlay regular beat markers (excluding downbeats)
regular_beats = np.setdiff1d(beats_in_window, downbeats_in_window)
plt.vlines(regular_beats, np.min(y_window), np.max(y_window), 
           color='red', alpha=0.4, linestyle='-', 
           linewidth=2, label='Beats')

# Overlay downbeat markers
plt.vlines(downbeats_in_window, np.min(y_window), np.max(y_window), 
           color='green', alpha=0.7, linestyle='-', 
           linewidth=2, label='Downbeats')

plt.xlim(start_time, end_time)
plt.ylim([np.min(y_window) * 1.1, np.max(y_window) * 1.1])
plt.xlabel('Time (s)')
plt.ylabel('Amplitude')
plt.title(f'Audio Waveform with Beat & Downbeat Detection ({start_time}-{end_time}s)')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()


### PLOT 2 - Time intervals between Beat Frames ###
beat_intervals = np.diff(beat_times)
plt.figure(figsize=(12, 4))
plt.plot(beat_times[1:], beat_intervals, 'o-', alpha=0.7, markersize=3)

# Highlight the window region
window_mask = (beat_times[1:] >= start_time) & (beat_times[1:] <= end_time)
if np.any(window_mask):
    plt.plot(beat_times[1:][window_mask], beat_intervals[window_mask], 
             'ro-', alpha=0.9, markersize=4, label=f'Window ({start_time}-{end_time}s)')

plt.axhline(y=60/tempo, color='red', linestyle='--', alpha=0.8, 
            label=f'Expected interval: {60/tempo:.3f}s')
plt.axvspan(start_time, end_time, alpha=0.2, color='yellow', label='Analysis window')
plt.xlabel('Time (s)')
plt.ylabel('Beat Interval (s)')
plt.title('Beat Interval Analysis (Full Song)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()


# Add Metronome (detected) to Soundtrack

In [None]:
def overlay_clicks_dual(y, sr, beat_times, downbeat_times,
                        click_high_path="./perc_high.wav",
                        click_low_path="./perc_low.wav",
                        out_path=None):
    """
    Overlay high clicks on downbeats and low clicks on other beats.

    Parameters
    ----------
    y : np.ndarray
        Original audio signal (mono).
    sr : int
        Sample rate of the audio.
    beat_times : np.ndarray
        Array of all beat times in seconds.
    downbeat_times : np.ndarray
        Array of downbeat times in seconds.
    click_high_path : str
        Path to a high click .wav file (downbeats).
    click_low_path : str
        Path to a low click .wav file (other beats).
    out_path : str or None
        If provided, saves the output audio to this path.

    Returns
    -------
    y_out : np.ndarray
        Audio with clicks overlayed.
    """

    # Load clicks and match sample rate
    click_high, _ = librosa.load(click_high_path, sr=sr, mono=True)
    click_low, _ = librosa.load(click_low_path, sr=sr, mono=True)

    click_gain = 2
    click_high *= click_gain
    click_low *= click_gain

    y_out = np.copy(y)

    # Overlay clicks
    downbeat_set = set(np.round(downbeat_times * 1000).astype(int))  # millisecond rounding for matching
    for t in beat_times:
        start_sample = int(t * sr)
        if start_sample >= len(y_out):
            continue

        # Determine if this beat is a downbeat
        key = int(round(t * 1000))
        click = click_high if key in downbeat_set else click_low

        end_sample = start_sample + len(click)
        if end_sample > len(y_out):
            click_segment = click[: len(y_out) - start_sample]
        else:
            click_segment = click

        # Mix click into signal
        y_out[start_sample:start_sample + len(click_segment)] += click_segment

    # Normalize
    max_val = np.max(np.abs(y_out))
    if max_val > 1.0:
        y_out = y_out / max_val

    # Save if requested
    if out_path is not None:
        sf.write(out_path, y_out, sr)

    return y_out


# Overlay dual clicks
y_debug = overlay_clicks_dual(y, sr, beat_times, downbeat_times,
                              click_high_path="./perc_high.wav",
                              click_low_path="./perc_low.wav",
                              out_path="../Results/debug_with_dual_clicks.wav")

print("Debug audio with high/low clicks saved to debug_with_dual_clicks.wav")
