In [8]:
import os
import matplotlib.pyplot as plt
import librosa
import numpy as np

In [9]:
ROOT_PATH = "../"

DATASET_FOLDER = ROOT_PATH + "Dataset/Audios/"

In [None]:
def create_spectrogram(audio_file):
    # Define the frequency range
    fmin = 0  # Minimum frequency (0 Hz)
    fmax = 16000  # Maximum frequency (32000 Hz)

    y, sr = librosa.load(audio_file, sr=None)

    # if audio is less than 3 seconds, pad it with zeros
    if len(y) < 3 * sr:
        y = np.pad(y, int(np.ceil((3 * sr - len(y)) / 2)), mode='constant')
        # Create the output path for the image
        output_image_path = audio_file.replace('Audios', 'Images').replace(".WAV", ".PNG")

        # Ensure the output folder exists
        os.makedirs(os.path.dirname(output_image_path), exist_ok=True)

    # if audio is more than 3 seconds, create as many 3-second clips as possible and add _i to the filename
    elif len(y) > 3 * sr:
        for i in range(int(len(y) / (3 * sr))):
            y_clip = y[i * 3 * sr:(i + 1) * 3 * sr]
            output_image_path = audio_file.replace('Audios', 'Images').replace(".WAV", f"_{i}.PNG")
            create_spectrogram(y_clip, output_image_path)
            # Ensure the output folder exists
            os.makedirs(os.path.dirname(output_image_path), exist_ok=True)

    fig, ax = plt.subplots(figsize=(12, 6))  # Set the background color to black
    D = librosa.amplitude_to_db(librosa.stft(y), ref=np.max)
    librosa.display.specshow(D, sr=sr, x_axis="time", y_axis="log", fmin=fmin, fmax=fmax, ax=ax)  # Specify frequency range
    ax.axis('off')  # Remove axes

    # Save the figure using the output_image_path
    fig.savefig(output_image_path, bbox_inches='tight', pad_inches=0, transparent=True)
    
    # Close the figure to release memory resources
    plt.close(fig)

# Recursively iterate through all subdirectories and audio files
for root, _, files in os.walk(DATASET_FOLDER):
    for file in files:
        if file.endswith('.WAV'):
            audio_file = os.path.join(root, file)
            output_image_path = audio_file.replace('Audios', 'Images').replace(".WAV", ".PNG")
            if not os.path.exists(output_image_path): # Skip if the image already exists
                create_spectrogram(audio_file)

TypeError: Invalid file: array([ 1.4953613e-03,  9.1552734e-05,  0.0000000e+00, ...,
       -9.1552734e-05,  3.6621094e-04, -2.1057129e-03], dtype=float32)

In [None]:
# Count number of .WAV files in Dataset Folder and Count number of .PNG files in Images Folder
audio_files = sum([len(files) for _, _, files in os.walk(DATASET_FOLDER)])
image_files = sum([len(files) for _, _, files in os.walk(DATASET_FOLDER.replace('Audios', 'Images'))])

print(f"Number of audio files: {audio_files}")
print(f"Number of image files: {image_files}")