In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go



from concurrent.futures import ThreadPoolExecutor
import random

from sklearn.preprocessing import MinMaxScaler, StandardScaler #only used MinMAx and seems to work better
from sklearn.decomposition import PCA # I tried PCA but didn't spend much time on it. The reduced version had no visualizaiton benefits
from sklearn.manifold import TSNE

from scipy.ndimage import gaussian_filter


import librosa
import librosa.display
import noisereduce as nr
import logging


from scipy.signal import butter, filtfilt, iirnotch
from scipy.io.wavfile import write

In [4]:

# Set up logging
logging.basicConfig(level=logging.INFO)

root_dirs = ["~/Documents/Data/Birds/2021/"]
locations = ["BRY", "CAL", "FIO", "HAR", "KEA", "LAW", "LIF", "MCK", "PEN", "SYL", "WAT"]

n_fft = 256  
hop_length = n_fft  
lowcut, highcut = 1024, 5000  
sr_downsampled = 11025 

def detect_constant_frequencies(audio, sr, duration=10):
    stft = librosa.stft(audio)
    magnitude = np.abs(stft)
    average_magnitude = np.mean(magnitude, axis=1)
    threshold = 0.7 * np.max(average_magnitude)
    constant_freqs = np.where(average_magnitude > threshold)[0]
    frequencies = librosa.fft_frequencies(sr=sr)
    return frequencies[constant_freqs]

def apply_notch_filters(audio, frequencies, sr, quality=30):
    filtered_audio = audio
    for freq in frequencies:
        w0 = freq / (0.5 * sr)
        b, a = iirnotch(w0, quality)
        filtered_audio = filtfilt(b, a, filtered_audio)
    return filtered_audio

def butter_bandpass(lowcut, highcut, fs, order=6):
    nyquist = 0.5 * fs
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = butter(order, [low, high], btype='band')
    return b, a

def apply_bandpass_filter(data, lowcut, highcut, fs, order=6):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    return filtfilt(b, a, data)

def trim_spectrogram_to_frequency_range(spectrogram, sr, lowcut, highcut, n_fft):

    frequencies = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
    lowcut = max(lowcut, frequencies[0])  
    highcut = min(highcut, frequencies[-1]) 
    freq_indices = np.where((frequencies >= lowcut) & (frequencies <= highcut))[0]
    if len(freq_indices) == 0:
        raise ValueError(f"No valid frequencies in range {lowcut}-{highcut} for n_fft={n_fft} and sr={sr}")
    return spectrogram[freq_indices, :]

def process_file(args):
    audio_path, label_row = args
    try:
        audio_path = os.path.expanduser(audio_path)
        x, sr = librosa.load(audio_path, sr=sr_downsampled)

        denoised_audio = nr.reduce_noise(y=x, sr=sr, y_noise=x, prop_decrease=0.95)

        constant_frequencies = detect_constant_frequencies(denoised_audio, sr)
        filtered_denoised_audio = apply_notch_filters(denoised_audio, constant_frequencies, sr)

        final_filtered_audio = apply_bandpass_filter(filtered_denoised_audio, lowcut, highcut, sr)

        stft_final = librosa.stft(final_filtered_audio, n_fft=n_fft, hop_length=hop_length)
        spectrogram_final = librosa.amplitude_to_db(np.abs(stft_final))

        trimmed_spectrogram = trim_spectrogram_to_frequency_range(spectrogram_final, sr, lowcut, highcut, n_fft)

        spec_shifted = trimmed_spectrogram - trimmed_spectrogram.min()
        spec_normalized = (spec_shifted - spec_shifted.min()) / (spec_shifted.max() - spec_shifted.min())

        label_data = label_row[1:].values  

        return spec_normalized, label_data
    except Exception as e:
        logging.error(f"Error processing {audio_path}: {e}")
        return None, None

batch_size = 50  
batch_count = 0

for root_dir in root_dirs:
    for location in locations:
        location_dir = os.path.join(os.path.expanduser(root_dir), location)
        audio_dir = os.path.join(location_dir, "MP3")  
        labels_file = os.path.join(location_dir, "train_labels.csv")

        if not os.path.exists(labels_file):
            logging.warning(f"Labels file not found: {labels_file}")
            continue

        labels_df = pd.read_csv(labels_file)

        tasks = []
        for _, row in labels_df.iterrows():
            fname = row['fname']  
            audio_path = os.path.join(audio_dir, fname)
            tasks.append((audio_path, row))

        for i in range(0, len(tasks), batch_size):
            batch = tasks[i:i + batch_size]
            spectrograms = []
            labels = []

            with ThreadPoolExecutor(max_workers=4) as executor:
                results = list(executor.map(process_file, batch))

            for spec_data, label_data in results:
                if spec_data is not None and label_data is not None:
                    spectrograms.append(spec_data)
                    labels.append(label_data)

            np.save(f'{location}_spectrograms_batch_{batch_count}.npy', np.array(spectrograms))
            np.save(f'{location}_labels_batch_{batch_count}.npy', np.array(labels))
            logging.info(f"Saved batch {batch_count} with {len(spectrograms)} spectrograms")

            batch_count += 1


INFO:root:Saved batch 0 with 50 spectrograms
INFO:root:Saved batch 1 with 50 spectrograms
INFO:root:Saved batch 2 with 50 spectrograms
INFO:root:Saved batch 3 with 50 spectrograms
INFO:root:Saved batch 4 with 22 spectrograms
INFO:root:Saved batch 5 with 50 spectrograms
INFO:root:Saved batch 6 with 50 spectrograms
INFO:root:Saved batch 7 with 50 spectrograms
INFO:root:Saved batch 8 with 50 spectrograms
INFO:root:Saved batch 9 with 50 spectrograms
INFO:root:Saved batch 10 with 16 spectrograms
INFO:root:Saved batch 11 with 50 spectrograms
INFO:root:Saved batch 12 with 50 spectrograms
INFO:root:Saved batch 13 with 50 spectrograms
INFO:root:Saved batch 14 with 50 spectrograms
INFO:root:Saved batch 15 with 50 spectrograms
INFO:root:Saved batch 16 with 30 spectrograms
INFO:root:Saved batch 17 with 50 spectrograms
INFO:root:Saved batch 18 with 50 spectrograms
INFO:root:Saved batch 19 with 50 spectrograms
INFO:root:Saved batch 20 with 50 spectrograms
INFO:root:Saved batch 21 with 50 spectrogram

In [5]:
import glob
spectrogram_files = sorted(glob.glob("*_spectrograms_batch_*.npy"))
label_files = sorted(glob.glob("*_labels_batch_*.npy"))

all_spectrograms = []
all_labels = []

for spec_file, label_file in zip(spectrogram_files, label_files):
    spectrograms_batch = np.load(spec_file)
    labels_batch = np.load(label_file,allow_pickle=True)
    all_spectrograms.append(spectrograms_batch)
    all_labels.append(labels_batch)

all_spectrograms = np.concatenate(all_spectrograms, axis=0)
all_labels = np.concatenate(all_labels, axis=0)

print(f"Loaded all spectrograms with shape: {all_spectrograms.shape}")
print(f"Loaded all labels with shape: {all_labels.shape}")


Loaded all spectrograms with shape: (2714, 93, 2584)
Loaded all labels with shape: (2714, 10)


In [6]:
np.save("spectrograms-3.npy", all_spectrograms)
np.save("labels-3.npy", all_labels)