In [200]:
import librosa
import numpy as np
from PIL import Image
import librosa.display
from librosa import feature
import matplotlib.pyplot as plt

In [194]:
# Define source and destination paths
input_dir = '../data/preprocessed/not-gunshot'
output_dir = '../data/processed/not-gunshot'
file_prefix = 'not_gunshot_'

In [195]:
SAMPLE_RATE = 16000
FRAME_SIZE = 2048
HOP_SIZE = FRAME_SIZE // 4
SEGMENT_LENGTH = SAMPLE_RATE * 2
SEGMENT_HOP = SEGMENT_LENGTH // 2

In [196]:
test_path_1 = '../data/preprocessed/gunshot/gunshot_0.wav'
test_path_2 = '../data/preprocessed/gunshot/gunshot_1.wav'
test_path_3 = '../data/preprocessed/gunshot/gunshot_2.wav'
test_path_4 = '../data/preprocessed/not-gunshot/not_gunshot_0_0.wav'

In [228]:
def mel_spectrogram_generator(path, n_fft = 2560, hop_length = 128, n_mels = 512, fmin = 4000, fmax = 8000, power = 2.0, target_shape = (256, 256), show = False):
    # Load audio data from path
    data, sr = librosa.load(path)

    # Compute spectrogram
    spectrogram = librosa.feature.melspectrogram(
        y = data,
        sr = sr,
        n_fft = n_fft,
        hop_length = hop_length,
        n_mels = n_mels,
        fmin = fmin,
        fmax = fmax,
        power = power
    )

    # Convert to decibel
    spectrogram_decibel = librosa.power_to_db(spectrogram)

    # Open and configure plot
    fig, ax = plt.subplots(figsize = (10, 5))
    fig.tight_layout(pad = 0)
    ax.axis('off')
    plt.ioff()

    # Plot spectrogram
    librosa.display.specshow(
        spectrogram_decibel,
        sr = sr,
        hop_length = hop_length,
        x_axis = "time",
        y_axis = "mel",
        fmin = fmin,
        fmax = fmax,
        vmin = -20,
        vmax = 10,
        cmap = 'magma',
    )

    # When False: Return numpy array for CNN input
    if not show:
        # Copy spectrogram graph to image, then delete graph
        fig.canvas.draw()
        image = Image.frombytes('RGB', fig.canvas.get_width_height(), fig.canvas.buffer_rgba())
        plt.close(fig)

        # Resize image to CNN input layer
        image = image.resize(target_shape)

        # Convert to array
        image_array = np.array(image)

        # Normalize [0, 1]
        image_array = image_array.astype(np.float32) / 255.0

        return image_array

    # When True: Print spectrogram to console
    else:
        plt.show()

In [230]:
print(mel_spectrogram_generator(test_path_1))

[[0.6039216  0.58431375 0.54509807]
 [0.58431375 0.58431375 0.5882353 ]
 [0.58431375 0.58431375 0.58431375]
 [0.58431375 0.58431375 0.58431375]
 [0.58431375 0.58431375 0.58431375]
 [0.58431375 0.58431375 0.58431375]
 [0.58431375 0.58431375 0.58431375]
 [0.58431375 0.58431375 0.58431375]
 [0.58431375 0.58431375 0.58431375]
 [0.58431375 0.58431375 0.58431375]
 [0.54901963 0.54509807 0.50980395]
 [0.28235295 0.2509804  0.25490198]
 [0.2509804  0.25490198 0.25490198]
 [0.25490198 0.25490198 0.25490198]
 [0.25490198 0.25490198 0.25490198]
 [0.25490198 0.25490198 0.25490198]
 [0.25490198 0.25490198 0.25490198]
 [0.25490198 0.25490198 0.25490198]
 [0.25490198 0.25490198 0.25490198]
 [0.25490198 0.25490198 0.25490198]
 [0.25490198 0.25490198 0.25490198]
 [0.25490198 0.25490198 0.25490198]
 [0.25490198 0.25490198 0.25490198]
 [0.25490198 0.25490198 0.25490198]
 [0.25490198 0.25490198 0.25490198]
 [0.25490198 0.25490198 0.25490198]
 [0.25490198 0.25490198 0.25490198]
 [0.25490198 0.25490198 0.25