In [20]:
from pathlib import Path
import soundfile as sf
import librosa
import matplotlib.pyplot as plt
import numpy as np

In [21]:
DATA_PATH = Path("data")
SPECTROGRAM_OUTPUT_PATH = Path("spectrogram")
SAVE_PARAMS = {"dpi": 300, "bbox_inches": "tight", "transparent": True}

TICKS = np.array([31.25, 62.5, 125, 250, 500, 1000, 2000, 4000, 8000])
TICK_LABELS = np.array(["31.25", "62.5", "125", "250", "500", "1k", "2k", "4k", "8k"])

In [22]:
def plot_spectrogram(signal, sample_rate, output: Path, fft_size=2048, hop_size=None, window_size=None):
    # Compute default parameters
    if not window_size:
        window_size = fft_size
    if not hop_size:
        hop_size = fft_size // 4

    # Convert audio to digital signal with fft
    stft = librosa.stft(signal, n_fft=fft_size, hop_length=hop_size, win_length=window_size, center=False)
    spectrogram = np.abs(stft)
    spectrogram_db = librosa.amplitude_to_db(spectrogram, ref=np.max)

    plt.figure(figsize=(10, 4))
    img = librosa.display.specshow(spectrogram_db, sr=sample_rate, x_axis='time', y_axis='log', hop_length=hop_size, cmap = 'inferno')

    # Plot settings
    plt.xlabel("Time (s)")
    plt.ylabel("Frequency (Hz)")
    plt.yticks(TICKS, TICK_LABELS)
    plt.colorbar(img, format="%+2.f dBFS")

    # Save plot
    output.parent.mkdir(parents=True, exist_ok=True)
    plt.savefig(
        output.with_stem(
            f"{output.stem}_spectrogram_win_length={window_size}_hop_length={hop_size}_n_fft={fft_size}"
        ),
        **SAVE_PARAMS,
    )
    plt.close()

In [23]:
signal, sample_rate = sf.read(DATA_PATH / 'sound_to_spec_test.mp3')
print(f'Sample rate: {sample_rate}')
plot_spectrogram(signal, sample_rate, SPECTROGRAM_OUTPUT_PATH / "sound_to_spec_test.png")

Sample rate: 48000
