Importing the Neccessary Libraries

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import librosa
import librosa.display
import soundfile as sf
from glob import glob
from itertools import cycle
import random
import gc
import queue

# Seaborn visualization setup
sns.set_theme(style="white", palette=None)
color_pal = plt.rcParams["axes.prop_cycle"].by_key()["color"]
color_cycle = cycle(plt.rcParams["axes.prop_cycle"].by_key()["color"])

Making the list of path to audio file and making the output directories.

In [None]:
# Path to the nested folder structure
# audio_files = glob('genres_original/**/*.wav')
# print(audio_files)

audio_files = list()
subdirectories = os.listdir("./genres_original")
for subdirectory in subdirectories:
    subdirectoryPath = os.path.join("./genres_original",subdirectory)
    if os.path.isdir(subdirectoryPath):
        files = os.listdir(subdirectoryPath)
        selected_files = random.sample(files,10)
        full_path_selected_files = [os.path.join(subdirectoryPath,wav_file) for wav_file in selected_files]
        audio_files.extend(full_path_selected_files)
print(audio_files[:10])

# Output directories for saving plots and MFCC features
output_dir = 'output'
spectrogram_dir = os.path.join(output_dir, 'spectrogram_plots')
mel_spectrogram_dir = os.path.join(output_dir, 'mel_spectrogram_plots')
mfcc_dir = os.path.join(output_dir, 'mfcc_features')
os.makedirs(spectrogram_dir, exist_ok=True)
os.makedirs(mel_spectrogram_dir, exist_ok=True)
os.makedirs(mfcc_dir, exist_ok=True)

This function augments the audio, Pitch Shifting, Time Stretching and adding the noise to data. This step is performed to improve the model accuracy.

In [None]:
# Function to augment audio
def augment_audio(y):
    y_pitch_shifted = librosa.effects.pitch_shift(y, sr=22050, n_steps=4)  # Assuming default sr
    y_time_stretched = librosa.effects.time_stretch(y=y, rate=1.5)
    noise = np.random.randn(len(y))
    y_noisy = y + 0.005 * noise
    listReturn = [y_pitch_shifted, y_time_stretched, y_noisy]
    return listReturn

This will plot the Short Time Fourier Transform.

In [None]:
def plotSTFT(y, sr, file_basename, suffix):
    # Compute and save STFT spectrogram plot
    D = librosa.stft(y)
    S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
    fig, ax = plt.subplots(figsize=(8, 4))  # Reduced figure size
    img = librosa.display.specshow(S_db, x_axis='time', y_axis='log', ax=ax)
    ax.set_title(f'Spectrogram Waveform of {file_basename}{suffix}', fontsize=14)
    fig.colorbar(img, ax=ax, format='%0.2f')
    spectrogram_filename = os.path.join(spectrogram_dir, f'{file_basename}{suffix}.png')
    plt.savefig(spectrogram_filename)
    plt.close(fig)
    del fig,spectrogram_filename,D,S_db,ax,img
    gc.collect()

This plot the melspectrograms

In [None]:
def plotMelSpectrogram(y, sr, file_basename, suffix):
    # Compute and save Mel spectrogram plot
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
    S_db_mel = librosa.amplitude_to_db(S, ref=np.max)
    fig, ax = plt.subplots(figsize=(8, 4))  # Reduced figure size
    img = librosa.display.specshow(S_db_mel, x_axis='time', y_axis='log', ax=ax)
    ax.set_title(f'Mel Spectrogram Waveform of {file_basename}{suffix}', fontsize=14)
    fig.colorbar(img, ax=ax, format='%0.2f')
    mel_spectrogram_filename = os.path.join(mel_spectrogram_dir, f'{file_basename}{suffix}.png')
    plt.savefig(mel_spectrogram_filename)
    plt.close(fig)
    del fig,mel_spectrogram_filename,S,S_db_mel,ax,img
    gc.collect()

This computes the mfcc(Mel frequency cepstral coefficients)

In [None]:
def computeMFCC(y, sr, file_basename, suffix):
    # Compute and save MFCC features
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mfcc_filename = os.path.join(mfcc_dir, f'{file_basename}{suffix}.csv')
    mfcc_df = pd.DataFrame(mfccs)
    mfcc_df.to_csv(mfcc_filename, index=False)
    del mfccs, mfcc_filename, mfcc_df
    gc.collect()

This is the wrappeer function that process the single audio file

In [None]:
# Function to process a single audio file
def process_audio_file(y, sr, file_basename, augment_index=None):
    suffix = f'aug{augment_index}' if augment_index is not None else ''
    plotSTFT(y, sr, file_basename, suffix)
    plotMelSpectrogram(y, sr, file_basename, suffix)
    computeMFCC(y, sr, file_basename, suffix)
    gc.collect()

This is the wrapper function that process the audio file for all the audiofiles.

In [None]:
def doStuff(wav_file):
    file_basename = os.path.splitext(os.path.basename(wav_file))[0]
    # Load the audio file
    y, sr = librosa.load(wav_file)
    
    # Process the original audio file
    process_audio_file(y, sr, file_basename)
    # Augment the audio and process augmented versions
    augmented_audios = augment_audio(y)
    for i, aug_y in enumerate(augmented_audios):
        augmented_filename = os.path.join(output_dir, f'{file_basename}aug{i}.wav')
        sf.write(augmented_filename, aug_y, sr)  # Save the augmented audio file
        process_audio_file(aug_y, sr, file_basename, augment_index=i)
        os.remove(augmented_filename)
    del augmented_audios, y, sr, file_basename
    gc.collect()

Main Loop

In [None]:

fileQueue = queue.Queue()
for wav_file in audio_files:
    fileQueue.put(wav_file)

while not fileQueue.empty():
    wav_file = fileQueue.get()
    doStuff(wav_file)
    gc.collect()