# Resampling and Normalization

In [4]:
import pandas as pd
import librosa
import soundfile as sf
import os
import numpy as np
import glob

In [6]:


input_csv = '../data/cleaned/70_15_15_cleaned_train.csv'
input_audio_dir = '../data/raw/audio/xeno_canto'
output_audio_dir = '../data/processed/audio/xeno_canto'


os.makedirs(output_audio_dir, exist_ok=True)


sample_rate = 16000  # Set sample rate the same as used in the pretrained model
target_db_level = -20  


df = pd.read_csv(input_csv)


for index, row in df.iterrows():
    audio_id = row['id']
    

    audio_path_pattern = os.path.join(input_audio_dir, f"{audio_id}.*")
    audio_files = glob.glob(audio_path_pattern)
    
    if len(audio_files) == 0:
        print(f"No audio file found for ID {audio_id}. Skipping.")
        continue
    elif len(audio_files) > 1:
        print(f"Multiple audio files found for ID {audio_id}. Skipping to avoid ambiguity.")
        continue
    
    audio_path = audio_files[0]
    
    try:
        y, sr = librosa.load(audio_path, sr=None)
        
        if sr != sample_rate:
            y = librosa.resample(y, orig_sr=sr, target_sr=sample_rate)
            sr = sample_rate
        
        rms = librosa.feature.rms(y=y)[0]
        current_db = librosa.amplitude_to_db(rms, ref=np.max)
        db_adjustment = target_db_level - np.mean(current_db)
        y = y * (10 ** (db_adjustment / 20))

        output_path = os.path.join(output_audio_dir, f"{audio_id}.wav")
        sf.write(output_path, y, sr)
        
    except Exception as e:
        print(f"Error processing {audio_id}: {e}")

print("Processing complete.")

  y, sr = librosa.load(audio_path, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Error processing 375211: 
No audio file found for ID 899960. Skipping.
No audio file found for ID 930482. Skipping.
Processing complete.


# Trim and Segment Audio

In [8]:
input_audio_dir = '../data/processed/audio/xeno_canto'  # Assuming previous output directory as input here
output_audio_dir = '../data/processed/segmented_audio/xeno_canto'

os.makedirs(output_audio_dir, exist_ok=True)


sample_rate = 16000  
segment_duration = 5  # Segment duration in seconds
overlap_duration = 1  # Overlap duration in seconds
target_db_level = -20  

df = pd.read_csv(input_csv)

for index, row in df.iterrows():
    audio_id = row['id']
    audio_path = os.path.join(input_audio_dir, f"{audio_id}.wav")
    
    try:
        y, sr = librosa.load(audio_path, sr=sample_rate)
        
        # Trim silence from the beginning and end of the audio
        y, _ = librosa.effects.trim(y, top_db=20) 

        # Normalize the audio to target dB level
        rms = librosa.feature.rms(y=y)[0]
        current_db = librosa.amplitude_to_db(rms, ref=np.max)
        db_adjustment = target_db_level - np.mean(current_db)
        y = y * (10 ** (db_adjustment / 20))

        # Calculate segment and overlap lengths in samples
        segment_length = segment_duration * sr
        overlap_length = overlap_duration * sr
        step = segment_length - overlap_length  

        # Split audio into overlapping segments
        total_segments = int(np.ceil((len(y) - overlap_length) / step))

        for i in range(total_segments):
            start = i * step
            end = min(start + segment_length, len(y))
            segment = y[start:end]

            if len(segment) < segment_length and end != len(y):
                continue
            
            segment_filename = f"{audio_id}_segment_{i+1}.wav"
            output_path = os.path.join(output_audio_dir, segment_filename)
            sf.write(output_path, segment, sr)
        
    except Exception as e:
        print(f"Error processing {audio_id}: {e}")

print("Audio trimming and segmentation complete with overlapping segments.")


  y, sr = librosa.load(audio_path, sr=sample_rate)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Error processing 375211: [Errno 2] No such file or directory: '../data/processed/audio/xeno_canto\\375211.wav'
Error processing 899960: [Errno 2] No such file or directory: '../data/processed/audio/xeno_canto\\899960.wav'
Error processing 930482: [Errno 2] No such file or directory: '../data/processed/audio/xeno_canto\\930482.wav'
Audio trimming and segmentation complete with overlapping segments.


# Mel Spectrograms

In [15]:
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np

input_audio_dir = '../data/processed/segmented_audio/xeno_canto'
output_spectrogram_dir = '../data/processed/spectrograms/xeno_canto'
os.makedirs(output_spectrogram_dir, exist_ok=True)

sample_rate = 16000  
audio_files = [f for f in os.listdir(input_audio_dir) if f.endswith('.wav')]

for audio_file in audio_files:
    audio_path = os.path.join(input_audio_dir, audio_file)
    audio_id, segment_num = audio_file.split('_segment_')
    segment_num = segment_num.split('.')[0]

    try:
        y, sr = librosa.load(audio_path, sr=sample_rate)

        S = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=512, n_mels=128)
        S_db = librosa.power_to_db(S, ref=np.max)

        # Plot the mel-spectrogram without extra text or labels
        plt.figure(figsize=(10, 4))
        librosa.display.specshow(S_db, sr=sr, hop_length=512, x_axis=None, y_axis=None)
        plt.axis('off')  
        spectrogram_filename = f"{audio_id}_segment_{segment_num}.png"
        spectrogram_path = os.path.join(output_spectrogram_dir, spectrogram_filename)
        plt.savefig(spectrogram_path, bbox_inches='tight', pad_inches=0) 
        plt.close()

    except Exception as e:
        print(f"Error processing {audio_file}: {e}")

print("Mel-spectrogram generation complete.")


Mel-spectrogram generation complete.
