In [200]:
import librosa
import numpy as np
from PIL import Image
import librosa.display
from librosa import feature
import matplotlib.pyplot as plt

In [194]:
# Define source and destination paths
input_dir = '../data/preprocessed/not-gunshot'
output_dir = '../data/processed/not-gunshot'
file_prefix = 'not_gunshot_'

In [195]:
SAMPLE_RATE = 16000
FRAME_SIZE = 2048
HOP_SIZE = FRAME_SIZE // 4
SEGMENT_LENGTH = SAMPLE_RATE * 2
SEGMENT_HOP = SEGMENT_LENGTH // 2

In [239]:
test_path_1_g = '../data/preprocessed/gunshot/gunshot_0.wav'
test_path_2_g = '../data/preprocessed/gunshot/gunshot_1.wav'
test_path_3_g = '../data/preprocessed/gunshot/gunshot_2.wav'

test_path_4_ng = '../data/preprocessed/not-gunshot/not_gunshot_0_0.wav'
test_path_5_ng = '../data/preprocessed/not-gunshot/not_gunshot_1_0.wav'
test_path_6_ng = '../data/preprocessed/not-gunshot/not_gunshot_2_0.wav'
test_path_7_ng = '../data/preprocessed/not-gunshot/not_gunshot_3_0.wav'

In [301]:
def mel_spectrogram_generator(path, sr = 16000, duration = 2.0, n_fft = 2560, hop_length = 128, n_mels = 512, fmin = 4000, fmax = 8000, power = 2.0, figsize = (5,5), target_shape = (256, 256), show = False, save = False):
    # Load audio data from path
    data, sr = librosa.load(path, sr = sr, duration = duration)

    # Compute spectrogram
    spectrogram = librosa.feature.melspectrogram(
        y = data,
        sr = sr,
        n_fft = n_fft,
        hop_length = hop_length,
        n_mels = n_mels,
        fmin = fmin,
        fmax = fmax,
        power = power
    )

    # Convert to decibel
    spectrogram_decibel = librosa.power_to_db(spectrogram)

    # Open and configure plot
    fig, ax = plt.subplots(figsize=figsize, dpi=100)
    ax.set_position([0, 0, 1, 1])
    ax.set_frame_on(False)
    ax.set_xticklabels([])
    ax.set_yticklabels([])
    fig.patch.set_alpha(0)
    ax.set_xticks([])
    ax.set_yticks([])
    plt.ioff()

    # Plot spectrogram
    librosa.display.specshow(
        spectrogram_decibel,
        sr = sr,
        hop_length = hop_length,
        x_axis = "time",
        y_axis = "mel",
        fmin = fmin,
        fmax = fmax,
        vmin = -20,
        vmax = 10,
        cmap = 'magma'
    )

     # Copy spectrogram graph to image, then delete graph
    fig.canvas.draw()
    image = Image.frombytes('RGB', fig.canvas.get_width_height(), fig.canvas.tostring_rgb())
    plt.close(fig)

    # When True: Image of spectrogram is saved to cwd
    if save:
        image.save("spectrogram.png")

    # When False: Return numpy array for CNN input
    if not show:
        # Resize image to CNN input layer
        image = image.resize(target_shape)

        # Convert to array
        image_array = np.array(image)

        # Normalize [0, 1]
        image_array = image_array.astype(np.float32) / 255.0

        return image_array

    # When True: Print spectrogram to console
    else:
        image.show()

In [302]:
mel_spectrogram_generator(test_path_1_g, show = True, save = True)

  image = Image.frombytes('RGB', fig.canvas.get_width_height(), fig.canvas.tostring_rgb())
