In [1]:
import os
import librosa
import librosa.display
import IPython.display as ipd

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from glob import glob
from scipy.signal import butter, filtfilt, lfilter
import pywt
import gc

In [2]:
#audio_files=glob("C:/Users/User/EDD_audio/EATD-Corpus/*/*.wav")
##ipd.Audio(audio_files[0])
#y, sr=librosa.load(audio_files[0]) #The raw output is y, sample rate is sr

##pd.Series(y).plot(figsize=(10,5), lw=1, title="Raw audio example")
#y_trimmed, _= librosa.effects.trim(y, top_db=20)
##pd.Series(y_trimmed).plot(figsize=(10,5), lw=1, title="Raw audio trimmed example", color=color_pal[1])
#pd.Series(y_trimmed[5000:10000]).plot(figsize=(10,5), lw=1, title="Raw audio zoomed in example")

In [2]:
#Functions for pre-processing
def load_audio(audio_path, target_sr=16000):
    y, sr = librosa.load(audio_path, sr=target_sr)
    return y, sr
    
def butter_lowpass_filter(y, sample_rate, low_freq=300, high_freq=3400, order=4):
    nyquist = 0.5 * sample_rate
    low = low_freq / nyquist
    high = high_freq / nyquist
    
    b, a = butter(order, [low, high], btype='band')
    y_filtered = lfilter(b, a, y)
    return y_filtered

def pad_or_trim(y, target_length):
    if len(y) > target_length:
        return y[:target_length]
    else:
        padding_length = target_length - len(y)
        pad_before = padding_length // 2
        pad_after = padding_length - pad_before
        return np.pad(y, (pad_before, pad_after), mode='constant')

# Load audio file using librosa & resample them to a constant sampling rate
def preprocess_audio(file_path, target_length=220500, sr=22050):
    y, _ = librosa.load(file_path, sr=sr, duration=4)
    y = butter_lowpass_filter(y, sample_rate=sr)
    y = pad_or_trim(y, target_length)
    return y, sr  

def process_batch(file_paths, target_length=220500, sr=22050):
    #Process a batch of audio files: load, filter, pad, and trim each file.
    #Returns: A numpy array containing all processed audio files in the batch.
    processed_batch = []
    for file_path in file_paths:
        y, _ = preprocess_audio(file_path, target_length, sr)
        processed_batch.append(y)
    return np.array(processed_batch)

def process_all_files_in_batches(file_paths, batch_size, target_length=220500, sr=22050):
    #Process all audio files in a directory in batches. Yields batches of processed audio files.
    # Ensure we have files to process
    if not file_paths:
        raise ValueError("No .wav files found in the specified directory.")
    # Process files in batches
    for i in range(0, len(file_paths), batch_size):
        batch_files = file_paths[i:i + batch_size]
        processed_batch = process_batch(batch_files, target_length, sr)
        yield processed_batch

In [3]:
#Functions to plot spectrograms
def plot_waveform(y, sr, title="Waveform", save_path=None):
    plt.figure(figsize=(10, 4))
    librosa.display.waveshow(y, sr=sr)
    plt.title(title)
    plt.xlabel("Time")
    plt.ylabel("Amplitude")
    plt.tight_layout()
    if save_path:
        plt.savefig(save_path)
    else:
        plt.show()
    plt.close()
    gc.collect()  # Explicitly request garbage collection

def plot_fourier_transform(y, sr, title="Fourier Transform", save_path=None):
    # Compute the Fourier Transform
    N = len(y)
    T = 1.0 / sr
    yf = np.fft.fft(y)
    xf = np.fft.fftfreq(N, T)
    magnitude = np.abs(yf)
    
    # Plot the magnitude spectrum
    plt.figure(figsize=(12, 6))
    plt.plot(xf[:N//2], magnitude[:N//2])  # Plot only the positive frequencies
    plt.title(title)
    plt.xlabel('Frequency (Hz)')
    plt.ylabel('Magnitude')
    plt.grid()
    
    if save_path:
        plt.savefig(save_path)
    else:
        plt.show()
    plt.close()
    gc.collect()  # Explicitly request garbage collection

def plot_stft_spectro(y, sr, title="Spectrogram", save_path=None):
    plt.figure(figsize=(10, 4))
    spec = np.abs(librosa.stft(y))
    spec_db = librosa.amplitude_to_db(spec, ref=np.max)

    librosa.display.specshow(spec_db, sr=sr, x_axis='time', y_axis='log')
    plt.colorbar(format='%+2.0f dB')
    plt.title(title)
    plt.tight_layout()
    if save_path:
        plt.savefig(save_path)
    else:
        plt.show()
    plt.close()
    gc.collect()  # Explicitly request garbage collection

def plot_cwt_spectro(y, sr, wavelet='cmor', scales=None, title="CWT Spectrogram", save_path=None):
    if scales is None:
        # Define a range of scales
        scales = np.arange(1, 128)
    # Compute the Continuous Wavelet Transform
    coefficients, _ = pywt.cwt(y, scales, wavelet, sampling_period=1/sr)
    power = np.abs(coefficients) ** 2
    # Plot the CWT spectrogram
    plt.figure(figsize=(12, 6))
    plt.imshow(power, extent=[0, len(y)/sr, 1, 128], aspect='auto', cmap='jet', origin='lower')
    plt.colorbar(label='Power (dB)')
    plt.title(title)
    plt.xlabel('Time (s)')
    plt.ylabel('Scale')

    if save_path:
        plt.savefig(save_path)
    else:
        plt.show()
    plt.close()
    gc.collect()  # Explicitly request garbage collection

def preprocess_and_plot_batches(file_paths, batch_size=5, sr=22050, wavelet='cmor', scales=None):
   #Process audio files in batches to generate and save waveform, Fourier transform, STFT spectrogram, and CWT spectrogram images.
    for i in range(0, len(file_paths), batch_size):
        batch_files = file_paths[i:i + batch_size]
        for audio_file in batch_files:
            # Load the audio file
            y, sr = librosa.load(audio_file, sr=sr)
            
            # Generate plots
            parent_dir = os.path.basename(os.path.dirname(audio_file))
            waveform_image_path = os.path.splitext(audio_file)[0] +'-'+ parent_dir +'_waveform.png'
            ft_image_path = os.path.splitext(audio_file)[0] +'-'+ parent_dir+ '_FT_spectro.png'
            stft_image_path = os.path.splitext(audio_file)[0] +'-'+ parent_dir+ '_STFT_spectro.png'
            #cwt_image_path = os.path.splitext(audio_file)[0] +'-'+ parent_dir+ '_CWT_spectro.png'
            
            plot_waveform(y, sr, title="Waveform", save_path=waveform_image_path)
            plot_stft_spectro(y, sr, title="STFT Spectrogram", save_path=stft_image_path)
            #plot_cwt_spectro(audio, sr, title="CWT Spectrogram", save_path=cwt_image_path)
            plot_fourier_transform(y, sr, title="FT Spectrogram", save_path=ft_image_path)
        print(f"Processed batch {i // batch_size + 1} of {len(file_paths) // batch_size + 1}")

In [4]:
#Pre-process the training audio files in batches of 16
train_files = glob(os.path.join("EATD-Corpus/t_*", '**/*out.wav').replace('\\','/'), recursive=True)
for batch in process_all_files_in_batches(train_files, batch_size=16):
    print(f'Processed batch of shape: {batch.shape}')

Processed batch of shape: (16, 220500)
Processed batch of shape: (16, 220500)
Processed batch of shape: (16, 220500)
Processed batch of shape: (16, 220500)
Processed batch of shape: (16, 220500)
Processed batch of shape: (16, 220500)
Processed batch of shape: (16, 220500)
Processed batch of shape: (16, 220500)
Processed batch of shape: (16, 220500)
Processed batch of shape: (16, 220500)
Processed batch of shape: (16, 220500)
Processed batch of shape: (16, 220500)
Processed batch of shape: (16, 220500)
Processed batch of shape: (16, 220500)
Processed batch of shape: (16, 220500)
Processed batch of shape: (9, 220500)


In [9]:
file_paths = glob(os.path.join("EATD-Corpus/t_9*", '**/*out.wav').replace('\\','/'), recursive=True)
preprocess_and_plot_batches(file_paths, batch_size=5)

Processed batch 1 of 5
Processed batch 2 of 5
Processed batch 3 of 5
Processed batch 4 of 5
Processed batch 5 of 5


In [5]:
#Pre-process the validatio audio files in batches of 16
val_files = glob(os.path.join("EATD-Corpus/v_*", '**/*out.wav').replace('\\','/'), recursive=True)
for batch in process_all_files_in_batches(val_files, batch_size=16):
    print(f'Processed batch of shape: {batch.shape}')

Processed batch of shape: (16, 220500)
Processed batch of shape: (16, 220500)
Processed batch of shape: (16, 220500)
Processed batch of shape: (16, 220500)
Processed batch of shape: (16, 220500)
Processed batch of shape: (16, 220500)
Processed batch of shape: (16, 220500)
Processed batch of shape: (16, 220500)
Processed batch of shape: (16, 220500)
Processed batch of shape: (16, 220500)
Processed batch of shape: (16, 220500)
Processed batch of shape: (16, 220500)
Processed batch of shape: (16, 220500)
Processed batch of shape: (16, 220500)
Processed batch of shape: (13, 220500)


In [None]:
file_paths = glob(os.path.join("EATD-Corpus/v_9*", '**/*out.wav').replace('\\','/'), recursive=True)
preprocess_and_plot_batches(file_paths, batch_size=5)

In [None]:
#files_in_directory = os.listdir("EATD-Corpus")
#train_files = glob(os.path.join("EATD-Corpus/t_3*", '**/*out.wav').replace('\\','/'), recursive=True)
#train_files = glob(os.path.join("EATD-Corpus/t_4*", '**/*out.wav').replace('\\','/'), recursive=True)

for audio_file in train_files:
    # Load audio file
    audio, sr = load_audio(audio_file)
    # Generate plots
    parent_dir = os.path.basename(os.path.dirname(audio_file))
    cwt_image_path = os.path.splitext(audio_file)[0] +'-'+ parent_dir+ '_CWT_spectro.png'
    plot_cwt_spectro(audio, sr, title="CWT Spectrogram", save_path=cwt_image_path)
    print(cwt_image_path)

In [None]:
#files = librosa.util.find_files('EATD-Corpus')
data_dir = glob(os.path.join("EATD-Corpus", '**/*out.wav').replace('\\','/'), recursive=True)
print(data_dir)
preprocessed_data = preprocess_dataset(data_dir)
preprocessed_data.to_csv('preprocessed_dataset.csv', index=False)