In [11]:
import os
import numpy as np
import matplotlib.pyplot as plt
import librosa
import librosa.display
import glob
import librosa
import soundfile as sf
from typing import Optional, List, Dict

In [60]:
base_dir = os.getenv('BASE_DIR')
if not base_dir:
    raise ValueError("BASE_DIR environment variable is not set.")

country = 'indonesia'

project_directory = os.path.join(base_dir, 'marrs_acoustics/data/output_dir_' + country)
agile_results = os.path.join(project_directory, 'agile_outputs')
raw_audio = '/media/mars_5tb_drive/mars_global_acoustic_study/' + country + '_acoustics/raw_audio/*.[wW][aA][vV]'
output_dir = os.path.join(base_dir, 'marrs_acoustics/code/manuscript_docs/example_queries/')
sounds_to_plot = os.path.join(output_dir, country)
# if output_dir doesn't exist, create it
if not os.path.exists(sounds_to_plot):
    os.makedirs(sounds_to_plot)



## Create plots of sounds
I went to the results csv and found the samples with the highest logit score. In thoery these should be nice clear examples of the target which are good for plotting. However, in practice I find some are bit iffy, so I used a mixture of these and some of the original target sounds I created.

In [None]:
def get_wav_files(folder_path: str) -> List[str]:
    """
    Get a sorted list of .wav files from the specified folder.
    
    :param folder_path: Path to the folder containing .wav files.
    :return: Sorted list of file paths.
    """
    return sorted(
        [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.lower().endswith('.wav')]
    )


def compute_log_mel_spectrogram(file_path: str, sr: int = 16000, duration: float = 5.0) -> np.ndarray:
    """
    Compute a 5sec log-scaled Mel spectrogram for a given audio file.
    
    :param file_path: Path to the audio file.
    :param sr: Sampling rate for audio processing.
    :param duration: Duration (in seconds) to load from the audio file.
    :return: Log-scaled Mel spectrogram as a 2D NumPy array.
    """
    y, _ = librosa.load(file_path, sr=sr, duration=duration)

    # cut audio down to 5sec if needed
    if len(y) > sr * duration:
        y = y[:int(sr * duration)]
    
    # plot spec
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
    return librosa.power_to_db(mel_spec, ref=np.max)


def plot_spectrogram_grid(
    spectrograms: List[np.ndarray], 
    titles: List[str], 
    output_file: str, 
    ncols: int = 2, 
    subplot_size: tuple = (6, 3),
    label_fontsize: int = 18
) -> None:
    """
    Plot a grid of spectrograms with given titles and save the plot.

    :param spectrograms: List of spectrogram arrays.
    :param titles: List of titles for the spectrograms.
    :param output_file: File path to save the output plot.
    :param ncols: Number of columns in the grid.
    :param subplot_size: Tuple (width, height) for each subplot in inches.
    :param x_label: Label for the x-axis.
    :param y_label: Label for the y-axis.
    :param label_fontsize: Font size for the axis labels.
    """
    n_files = len(spectrograms)
    nrows = (n_files + ncols - 1) // ncols  # Calculate required rows

    # Compute overall figure size
    fig_width = ncols * subplot_size[0]
    fig_height = nrows * subplot_size[1]

    fig, axes = plt.subplots(nrows, ncols, figsize=(fig_width, fig_height), constrained_layout=True)
    axes = axes.flatten()

    for i, (spec, title) in enumerate(zip(spectrograms, titles)):
        img = librosa.display.specshow(
            spec, sr=16000, x_axis='time', y_axis='mel', 
            fmax=8000, ax=axes[i], cmap='Greys'
        )
        axes[i].set_title(title.capitalize(), fontsize=15)
        axes[i].label_outer()  # Only show outer labels

        # Set tick label size
        axes[i].tick_params(axis='both', which='major', labelsize=15)


    # Set axis labels for the bottom-most and left-most subplots
    for row in range(nrows):
        for col in range(ncols):
            idx = row * ncols + col
            if idx < len(spectrograms):
                if row == nrows - 1:  # Bottom-most row
                    axes[idx].set_xlabel("Time (s)", fontsize=label_fontsize)
                if col == 0:  # Left-most column
                    axes[idx].set_ylabel("Frequency (Hz)", fontsize=label_fontsize)

    # Hide unused axes
    for j in range(len(spectrograms), len(axes)):
        fig.delaxes(axes[j])

    # Save the figure
    plt.savefig(output_file, dpi=300)
    plt.close(fig)
    print(f"Spectrogram grid saved to {output_file}")



def plot_spectrograms_from_folder(folder_path: str, output_dir: str, country: str, sr: int = 16000, duration: float = 5.0) -> None:
    """
    Generate and save a grid of spectrograms from all .wav files in a folder.
    
    :param folder_path: Path to the folder containing .wav files.
    :param output_dir: Path to save the generated plot.
    :param country: Country name for output file naming.
    :param sr: Sampling rate for audio files.
    :param duration: Duration (in seconds) to load for each audio file.
    """
    # Get the .wav files
    wav_files = get_wav_files(folder_path)

    # Compute spectrograms
    spectrograms = [compute_log_mel_spectrogram(file, sr=sr, duration=duration) for file in wav_files]
    titles = [os.path.splitext(os.path.basename(file))[0] for file in wav_files]

    # Save the grid plot
    output_file = os.path.join(output_dir, f"{country}_spectrograms.png")
    plot_spectrogram_grid(spectrograms, titles, output_file)


In [64]:
plot_spectrograms_from_folder(sounds_to_plot, output_dir, country)

Spectrogram grid saved to /home/bwilliams/ucl_projects/marrs_acoustics/code/manuscript_docs/example_queries/indonesia_spectrograms.png


### This is the old code to find the samples with the highest logit scores and write these out:

In [58]:

def find_row_with_highest_logit(csv_path: str) -> Optional[Dict[str, str]]:
    """
    Finds the row with the highest logit in the given CSV file.
    
    :param csv_path: Path to the CSV file.
    :return: A dictionary with keys 'filename', 'timestamp_s', 'label', 'logit', or None if the file is empty.
    """
    with open(csv_path) as f:
        lines = f.readlines()

        if len(lines) <= 1:  # No data rows
            return None

        max_logit = 0
        best_row = None

        for line in lines[1:]:  # Skip header
            parts = line.strip().split(',')
            logit = float(parts[3])  # Index 3 for 'logit'
            if logit > max_logit:
                max_logit = logit
                best_row = {
                    "filename": parts[0],
                    "timestamp_s": parts[1],
                    "label": parts[2],
                    "logit": parts[3],
                }

        return best_row


def extract_and_save_audio(row: Dict[str, str], raw_audio_pattern: str, sounds_to_plot: str) -> None:
    """
    Extracts a 5-second audio segment starting from `timestamp_s` and saves it in `sounds_to_plot`.
    
    :param row: The row dictionary with keys 'filename', 'timestamp_s', 'label'.
    :param raw_audio_pattern: Glob pattern to search for raw audio files.
    :param sounds_to_plot: Directory to save the extracted audio file.
    """
    filename = row['filename'].split('/')[-1]  # Get the filename after the last '/'
    timestamp_s = float(row['timestamp_s'])
    label = row['label']

    # Find the corresponding audio file
    audio_files = glob.glob(raw_audio_pattern)
    matching_file = next((f for f in audio_files if filename in f), None)

    if matching_file:
        # Load the audio file using librosa
        y, sr = librosa.load(matching_file, sr=None)

        # Calculate start and end samples
        start_sample = int(timestamp_s * sr)
        end_sample = int((timestamp_s + 5) * sr)

        # Extract the segment
        segment = y[start_sample:end_sample]

        # Ensure the output directory exists
        os.makedirs(sounds_to_plot, exist_ok=True)

        # Save the extracted segment
        output_file = os.path.join(sounds_to_plot, f"{label}_{os.path.splitext(filename)[0]}.wav")
        sf.write(output_file, segment, sr)

        print(f"Saved: {output_file}")
    else:
        print(f"Audio file {filename} not found in raw audio path.")


# Main processing loop
def process_csv_files(agile_results: str, raw_audio: str, sounds_to_plot: str) -> None:
    """
    Process all CSV files in the agile_results directory, find the row with the highest logit,
    and extract and save the corresponding audio segment.
    
    :param agile_results: Base directory containing CSV files.
    :param raw_audio: Glob pattern to locate raw audio files.
    :param sounds_to_plot: Directory to save extracted audio segments.
    """
    dirs = os.listdir(agile_results)

    for dir in dirs:
        for root, _, files in os.walk(os.path.join(agile_results, dir)):
            for file in files:
                if file.endswith("inference.csv"):
                    csv_path = os.path.join(root, file)
                    best_row = find_row_with_highest_logit(csv_path)

                    if best_row:
                        extract_and_save_audio(best_row, raw_audio, sounds_to_plot)


# Example usage
raw_audio_pattern = f'/media/mars_5tb_drive/mars_global_acoustic_study/{country}_acoustics/raw_audio/*.[wW][aA][vV]'
output_dir = os.path.join('/home/bwilliams/ucl_projects/marrs_acoustics/code/manuscript_docs/example_queries/', country)

process_csv_files(agile_results, raw_audio_pattern, sounds_to_plot)


Saved: /home/bwilliams/ucl_projects/marrs_acoustics/code/manuscript_docs/example_queries/indonesia/wipe_ind_H3_20220908_204400.wav
Saved: /home/bwilliams/ucl_projects/marrs_acoustics/code/manuscript_docs/example_queries/indonesia/oink_ind_H1_20220906_182200.wav
Saved: /home/bwilliams/ucl_projects/marrs_acoustics/code/manuscript_docs/example_queries/indonesia/laugh_ind_R1_20220916_172600.wav
Saved: /home/bwilliams/ucl_projects/marrs_acoustics/code/manuscript_docs/example_queries/indonesia/rattle_ind_R3_20220829_221200.wav
Saved: /home/bwilliams/ucl_projects/marrs_acoustics/code/manuscript_docs/example_queries/indonesia/scrape_fullband_ind_D2_20220915_095200.wav
Saved: /home/bwilliams/ucl_projects/marrs_acoustics/code/manuscript_docs/example_queries/indonesia/foghorn_ind_R1_20220830_030200.wav
Saved: /home/bwilliams/ucl_projects/marrs_acoustics/code/manuscript_docs/example_queries/indonesia/croak_ind_H6_20220901_042600.wav
Saved: /home/bwilliams/ucl_projects/marrs_acoustics/code/manuscri