# Feature Engineering

In [2]:
import os
import glob
import librosa
import soundfile as sf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import librosa.display

input_csv = '../data/cleaned/70_15_15_cleaned_train.csv'
input_audio_dir = '../data/raw/audio/xeno_canto'
output_spectrogram_dir = '../data/processed/spectrograms/xeno_canto'
os.makedirs(output_spectrogram_dir, exist_ok=True)

SAMPLE_RATE = 16000
TARGET_DB_LEVEL = -20
SEGMENT_DURATION = 10  

df = pd.read_csv(input_csv)

def normalize_audio(y, target_db_level):
    """Normalize audio to a target decibel (dB) level for uniform loudness."""
    rms = librosa.feature.rms(y=y)[0]
    current_db = librosa.amplitude_to_db(rms, ref=np.max)
    db_adjustment = target_db_level - np.mean(current_db)
    return y * (10 ** (db_adjustment / 20))

def apply_gain(y, gain_range=(13, 17)):
    """Randomly amplify the audio signal within a specified dB range to vary audio intensity."""
    gain_db = np.random.uniform(*gain_range)
    return y * (10 ** (gain_db / 20))

def add_gaussian_noise(y, noise_level=0.005):
    """Add Gaussian noise to simulate background noise conditions."""
    noise = np.random.normal(0, noise_level, y.shape)
    return y + noise

def add_background_noise(y, background, snr=10):
    """
    Mix background environmental noise into the audio.
    Parameters:
    - background: A sample of environmental sound.
    - snr (Signal-to-Noise Ratio): Controls the relative loudness of the noise.
    """
    signal_power = np.mean(y ** 2)
    noise_power = np.mean(background ** 2)
    scaling_factor = np.sqrt(signal_power / (10 ** (snr / 10)) / noise_power)
    return y + background[:len(y)] * scaling_factor

def process_audio_file(audio_path, background_noise_path=None):
    """Load, resample, normalize, and apply augmentations to the audio."""
    y, sr = librosa.load(audio_path, sr=None)
    
    if sr != SAMPLE_RATE:
        y = librosa.resample(y, orig_sr=sr, target_sr=SAMPLE_RATE)

    # Trim silence and normalize to target dB level
    y, _ = librosa.effects.trim(y, top_db=20)
    y = normalize_audio(y, TARGET_DB_LEVEL)

    # Apply gain augmentation to simulate various recording intensities
    y = apply_gain(y)
    
    # Add Gaussian noise to increase model robustness to noisy backgrounds
    y = add_gaussian_noise(y)
    
    if background_noise_path:
        # Mix in environmental noise
        background, _ = librosa.load(background_noise_path, sr=SAMPLE_RATE)
        y = add_background_noise(y, background)
    
    # Trim or pad to ensure consistent segment length for all samples
    segment_length = SEGMENT_DURATION * SAMPLE_RATE
    if len(y) < segment_length:
        padding_length = segment_length - len(y)
        y = np.tile(y, int(np.ceil(segment_length / len(y))))[:segment_length]
    else:
        y = y[:segment_length]

    
    return y

def create_log_mel_spectrogram(y, sr):
    """Generate a Log-Mel Spectrogram from the audio signal."""
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=512, n_mels=128)
    return librosa.power_to_db(mel_spec, ref=np.max)

def apply_spectrogram_augmentation(S_db):
    """
    Apply milder augmentations to the Mel-spectrogram to prevent large blank areas.
    """

    time_mask = np.random.randint(50, 100)  # Random width between 300 and 500
    t0 = np.random.randint(0, max(S_db.shape[1] - time_mask, 1))

    freq_mask = np.random.randint(1, 7)  # Adjust as desired for frequency masking width
    f0 = np.random.randint(0, max(S_db.shape[0] - freq_mask, 1))

    
    # Apply the masks
    S_db[:, t0:t0 + time_mask] = 0
    S_db[f0:f0 + freq_mask, :] = 0
    
    return S_db


def save_spectrogram_image(S_db, output_path):
    """Save the Log-Mel Spectrogram as an image for use in visual-based models."""
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(S_db, sr=SAMPLE_RATE, hop_length=512, x_axis=None, y_axis=None)
    plt.axis('off')
    plt.savefig(output_path, bbox_inches='tight', pad_inches=0)
    plt.close()

# Main processing loop
for index, row in df.head(20).iterrows():
    audio_id = row['id']
    audio_path_pattern = os.path.join(input_audio_dir, f"{audio_id}.*")
    audio_files = glob.glob(audio_path_pattern)
    
    if not audio_files:
        print(f"No audio file found for ID {audio_id}. Skipping.")
        continue
    if len(audio_files) > 1:
        print(f"Multiple audio files found for ID {audio_id}. Skipping.")
        continue

    audio_path = audio_files[0]
    
    try:
        background_noise_path = '' 
        y = process_audio_file(audio_path, background_noise_path)
        S_db = create_log_mel_spectrogram(y, SAMPLE_RATE)
        
        # Apply spectrogram augmentation with 50% chance
        if np.random.rand() < 0.5:
            S_db = apply_spectrogram_augmentation(S_db)
        
        spectrogram_filename = f"{audio_id}_logmel.png"
        spectrogram_path = os.path.join(output_spectrogram_dir, spectrogram_filename)
        save_spectrogram_image(S_db, spectrogram_path)
        
    except Exception as e:
        print(f"Error processing {audio_id}: {e}")

print("Log-Mel spectrogram generation with augmentations complete.")


Log-Mel spectrogram generation with augmentations complete.
