This file produces spectrograms for given segments

In [60]:
# adapted from https://github.com/musikalkemist/AudioSignalProcessingForML/blob/master/16%20-%20Extracting%20Spectrograms%20from%20Audio%20with%20Python/Extracting%20Spectrograms%20from%20Audio%20with%20Python.ipynb
import os
import librosa
import librosa.display
import IPython.display as ipd
import numpy as np
import matplotlib.pyplot as plt

In [61]:
example = "segmented_wavs/cm/CM-BBC_CMI_1995_06_21-0001.wav"
ipd.Audio(example)

In [62]:
# output folders
os.makedirs("spectrograms/cm", exist_ok=True)
os.makedirs("spectrograms/non_cm", exist_ok=True)

In [63]:
# following params specified by Karthikeyan et al. (2025)
# https://link.springer.com/article/10.1007/s11042-025-20694-5#citeas

# wavs are already at 16k
SR = 16000
# these might have to be powers of 2?
FRAME_SIZE = 320 # 20 ms
HOP_SIZE = 160 # 10 ms
N_MELS = 128
# freqs for mel filter bank?
FMIN = 20
FMAX = 8000
# 128 x 128
CHUNK_WIDTH = 128
CHUNK_HEIGHT = 128

In [64]:
def make_mel_chunks(y, sr, filename):
    mel_spec = librosa.feature.melspectrogram(
        y=y, sr=sr, n_fft=FRAME_SIZE, hop_length=HOP_SIZE,
        n_mels=N_MELS, fmin=FMIN, fmax=FMAX, window='hamming'
    )
    log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)

    # split to size 128 x 128 chunks
    time_frames = log_mel_spec.shape[1]
    num_chunks = time_frames // CHUNK_WIDTH

    for i in range(num_chunks):
        chunk = log_mel_spec[:, i*CHUNK_WIDTH:(i+1)*CHUNK_WIDTH]
        if chunk.shape[1] == CHUNK_WIDTH:
            save_path = f"spectrograms/non_cm/{filename}_chunk{i}.png" # f"spectrograms/cm/{filename}_chunk{i}.png"
            plt.figure(figsize=(3, 3))
            librosa.display.specshow(chunk, sr=sr, hop_length=HOP_SIZE,
                                     x_axis=None, y_axis=None, cmap='magma')
            plt.axis("off")
            plt.savefig(save_path, bbox_inches="tight", pad_inches=0, dpi=100)
            plt.close()

In [65]:
directory = 'segmented_wavs/non_cm' #'segmented_wavs/cm'
for filename in os.listdir(directory):
    if filename.endswith('.wav'):
        file_path = os.path.join(directory, filename)
        y, sr = librosa.load(file_path, sr=SR)
        base_filename = os.path.splitext(filename)[0]
        make_mel_chunks(y, sr, base_filename)

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)
