# Supplementary Material: Practical benchmarking (Python)

## Tool: scikit-maad (parallel processing across CPU cores)

In [None]:
# Install if needed
!pip install scikit-maad

**Imports**

In [1]:
from maad import sound, features
import glob, time
import numpy as np
import os
import glob
import tarfile
import tempfile
import requests

### 1) Download + unpack dataset (cached)

In [2]:
# Remove previous download if needed
# import os
# os.remove("/tmp/amistosa_audio_session/Amistosa_soundscape.tar")

In [3]:
TAR_URL = "https://zenodo.org/records/18778160/files/Amistosa_soundscape.tar"

# 1) Crear carpeta temporal persistente
base_temp = tempfile.gettempdir()
tmp_dir = os.path.join(base_temp, "amistosa_audio_session")

if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

tar_path = os.path.join(tmp_dir, "Amistosa_soundscape.tar")

# 2) Descargar solo si no existe ya
if not os.path.exists(tar_path):
    print("Descargando archivo .tar...")
    with requests.get(TAR_URL, stream=True, timeout=60*10) as r:
        r.raise_for_status()
        with open(tar_path, "wb") as f:
            for chunk in r.iter_content(chunk_size=1024 * 1024):
                if chunk:
                    f.write(chunk)
    print("Descarga completada.")

# 3) Extraer si no se ha extraído antes
extract_flag = os.path.join(tmp_dir, ".extracted")

if not os.path.exists(extract_flag):
    print("Extrayendo archivos...")
    with tarfile.open(tar_path, "r:*") as tar:
        tar.extractall(path=tmp_dir)
    open(extract_flag, "w").close()  # Marca que ya se extrajo
    print("Extracción completada.")

# 4) Listar todos los WAV
wav_files = glob.glob(os.path.join(tmp_dir, "**", "*.wav"), recursive=True)

print(f"Total de archivos WAV encontrados: {len(wav_files)}")
print("Directorio activo para análisis:")
print(tmp_dir)


Descargando archivo .tar...
Descarga completada.
Extrayendo archivos...


  tar.extractall(path=tmp_dir)


Extracción completada.
Total de archivos WAV encontrados: 180
Directorio activo para análisis:
/tmp/amistosa_audio_session


### 2) scikit-maad feature computation (timing-only)

In [4]:
import os
import glob
import time
import pandas as pd
from concurrent.futures import ProcessPoolExecutor

from maad import sound, features

WAV_DIR = tmp_dir
CORES_TO_TEST = [1, 2, 4, 6, 8]

WINDOW = "hann"
NPERSEG = 1024
NOVERLAP = 512

FLIM_LOW = (0, 2000)
FLIM_MID = (2000, 8000)
BI_FLIM  = (2000, 8000)

files = sorted(glob.glob(os.path.join(WAV_DIR, "*.wav")))
if not files:
    raise RuntimeError(f"No se encontraron .wav en {WAV_DIR}")

def compute_only_time(path):
    wave, fs = sound.load(
        filename=path,
        channel="left",
        detrend=False,
        verbose=False
    )

    Sxx_power, tn, fn, ext = sound.spectrogram(
        wave, fs,
        window=WINDOW,
        nperseg=NPERSEG,
        noverlap=NOVERLAP,
        detrend=False,
        verbose=False,
        display=False
    )

    # Ejecutar funciones (sin post-procesar outputs)
    features.acoustic_complexity_index(Sxx_power)

    features.soundscape_index(
        Sxx_power, fn,
        flim_bioPh= FLIM_LOW,
        flim_antroPh= FLIM_MID,
    )

    features.acoustic_diversity_index(Sxx_power, fn)
    features.acoustic_eveness_index(Sxx_power, fn)

    features.bioacoustics_index(
        Sxx_power, fn,
        flim= BI_FLIM,
    )

    # devolver algo mínimo
    return os.path.basename(path)



### 3) Benchmark runner

In [5]:
def run_benchmark(files, n_workers):
    t0 = time.perf_counter()

    if n_workers == 1:
        _ = [compute_only_time(f) for f in files]
    else:
        with ProcessPoolExecutor(max_workers=n_workers) as pool:
            _ = list(pool.map(compute_only_time, files))

    return time.perf_counter() - t0

if __name__ == "__main__":
    rows = []
    baseline = None

    for n in CORES_TO_TEST:
        elapsed = run_benchmark(files, n_workers=n)
        if baseline is None:
            baseline = elapsed

        rows.append({
            "Tool": "scikit-maad",
            "N_files": len(files),
            "Cores": n,
            "Elapsed_sec": round(elapsed, 3),
            "Sec_per_file": round(elapsed / len(files), 4),
            "Speedup_vs_1core": round(baseline / elapsed, 3),
        })

    bench_df = pd.DataFrame(rows).sort_values("Cores").reset_index(drop=True)
    print(bench_df)
    bench_df.to_csv("benchmark_parallel_indices_scikit_maad.csv", index=False)
    print("\nGuardado: benchmark_parallel_indices_scikit_maad.csv")

          Tool  N_files  Cores  Elapsed_sec  Sec_per_file  Speedup_vs_1core
0  scikit-maad      180      1       30.507        0.1695             1.000
1  scikit-maad      180      2       17.567        0.0976             1.737
2  scikit-maad      180      4       12.954        0.0720             2.355
3  scikit-maad      180      6       11.684        0.0649             2.611
4  scikit-maad      180      8       11.376        0.0632             2.682

Guardado: benchmark_parallel_indices_scikit_maad.csv
