# Preprocessing

### Initialization

In [1]:
import os
import gc
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import splitfolders

In [2]:
AUDIO_PATH = './augmented_data_v2/'
MEL_SPECTROGRAM_PATH = './mel_spectrogram_data_v2'
MFCC_PATH = './mfcc_data'

In [3]:
BIRDS = os.listdir(AUDIO_PATH)

### Mel Spectrogram

In [4]:
# Mel spectrogram parameters based on base paper
SAMPLE_RATE = 32000          
N_MELS = 48             
WIN_SIZE = 1024        
HOP_SIZE = 512    
WINDOW_TYPE = 'hann'   
FMIN = 500     
FMAX = 12500    

In [5]:
def save_mel_spectrograms(audio_path, mel_path):
    for bird_class in os.listdir(audio_path):
        print(f"Processing {bird_class} ...")
        bird_audio_folder = os.path.join(audio_path, bird_class)
        bird_mel_folder = os.path.join(mel_path, bird_class)

        os.makedirs(bird_mel_folder, exist_ok=True)

        for idx, audio_file in enumerate(os.listdir(bird_audio_folder)):
            audio_file_path = os.path.join(bird_audio_folder, audio_file)
            
            # Load audio file
            signal, sr = librosa.load(audio_file_path, sr=SAMPLE_RATE, duration=10)
            
            # Generate Mel spectrogram
            S = librosa.feature.melspectrogram(y=signal,
                                               sr=sr,
                                               n_fft=WIN_SIZE,
                                               hop_length=HOP_SIZE,
                                               n_mels=N_MELS,
                                               fmin=FMIN,
                                               fmax=FMAX,
                                               window=WINDOW_TYPE)
            
            # Convert power spectrogram to decibel (log scale)
            mel_spec = librosa.power_to_db(S, ref=np.max)

            # Plot and save the Mel spectrogram as an image
            plt.figure(figsize=(5, 5))  # You can adjust this for different image sizes
            librosa.display.specshow(mel_spec, sr=sr, fmin=FMIN, fmax=FMAX, y_axis='linear')
            plt.axis('off')  # Remove axes for cleaner image
            
            # Save image to target folder
            save_path = os.path.join(bird_mel_folder, f"{bird_class}_{idx + 1}.jpg")
            plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
            
            plt.clf()
            plt.close()
            del signal, S, mel_spec
            gc.collect()




In [6]:
# Generate and save Mel spectrograms
save_mel_spectrograms(AUDIO_PATH, MEL_SPECTROGRAM_PATH)

Processing Swamp Francolin ...
Processing Swamp Grass Babbler ...
Processing White-throated Bush Chat ...
Processing Wood Snipe ...


**Split**

In [7]:
MEL_SPECTROGRAM_DATASET = './mel_spectrogram_dataset_v2'

In [8]:
splitfolders.ratio(input=MEL_SPECTROGRAM_PATH, output=MEL_SPECTROGRAM_DATASET, seed=2024, ratio=(.7, 0.2,0.1), group_prefix=None, move=False)

Copying files: 20500 files [02:46, 123.38 files/s]


### MFCC