In [None]:
import matplotlib.pyplot as plt
import librosa
import numpy as np
from IPython.display import Audio, display
import ruptures as rpt

def fig_ax(figsize=(15, 5), dpi=150):
    return plt.subplots(figsize=figsize, dpi=dpi)

file_path = "FILE_PATH" # file path here
duration = 120
signal, sampling_rate = librosa.load(file_path, duration=duration)

display(Audio(data=signal, rate=sampling_rate))

# Plot the sound envelope
fig, ax = fig_ax()
ax.plot(np.arange(signal.size) / sampling_rate, signal)
ax.set_xlim(0, signal.size / sampling_rate)
ax.set_xlabel("Time (s)")
_ = ax.set(title="Sound Envelope")

# Compute the onset strength and tempogram
hop_length_tempo = 256
oenv = librosa.onset.onset_strength(y=signal, sr=sampling_rate, hop_length=hop_length_tempo)
tempogram = librosa.feature.tempogram(onset_envelope=oenv, sr=sampling_rate, hop_length=hop_length_tempo)

# Display the tempogram
fig, ax = fig_ax()
_ = librosa.display.specshow(tempogram, ax=ax, hop_length=hop_length_tempo, sr=sampling_rate, x_axis="s", y_axis="tempo")

# Segmentation with ruptures
algo = rpt.KernelCPD(kernel="linear").fit(tempogram.T)
n_bkps_max = 20
array_of_n_bkps = np.arange(1, n_bkps_max + 1)

def get_sum_of_cost(algo, n_bkps) -> float:
    """Return the sum of costs for the change points `bkps`"""
    bkps = algo.predict(n_bkps=n_bkps)
    return algo.cost.sum_of_costs(bkps)

# Elbow method to determine number of breakpoints
fig, ax = fig_ax((7, 4))
ax.plot(
    array_of_n_bkps,
    [get_sum_of_cost(algo=algo, n_bkps=n_bkps) for n_bkps in array_of_n_bkps],
    "-*",
    alpha=0.5,
)
ax.set_xticks(array_of_n_bkps)
ax.set_xlabel("Number of change points")
ax.set_title("Sum of costs")
ax.grid(axis="x")
ax.set_xlim(0, n_bkps_max + 1)

# Choose number of breakpoints (modify based on elbow plot)
n_bkps = 5
_ = ax.scatter([n_bkps], [get_sum_of_cost(algo=algo, n_bkps=n_bkps)], color="r", s=100)

# Segment the signal
bkps = algo.predict(n_bkps=n_bkps)
bkps_times = librosa.frames_to_time(bkps, sr=sampling_rate, hop_length=hop_length_tempo)

# Display segmented tempogram
fig, ax = fig_ax()
_ = librosa.display.specshow(tempogram, ax=ax, x_axis="s", y_axis="tempo", hop_length=hop_length_tempo, sr=sampling_rate)
for b in bkps_times[:-1]:
    ax.axvline(b, ls="--", color="white", lw=4)

# Play each segment
bkps_time_indexes = (sampling_rate * bkps_times).astype(int).tolist()
for segment_number, (start, end) in enumerate(rpt.utils.pairwise([0] + bkps_time_indexes), start=1):
    segment = signal[start:end]
    print(f"Segment n°{segment_number} (duration: {segment.size / sampling_rate:.2f} s)")
    display(Audio(data=segment, rate=sampling_rate))
