In [1]:
import pandas as pd
import librosa
import soundfile as sf
import os

## YOLO predictions

In [2]:
# Cut YOLOv8 predictions
import os
from pydub import AudioSegment
from PIL import Image

def desnormalizar_y_recortar_audio(prediccion_txt_path, output_folder):
    # Extraer el nombre base del archivo de predicción
    base_name = os.path.basename(prediccion_txt_path).replace('.txt', '')
    
    recorder = base_name.split('_')[0]
    date = base_name.split('_')[1]
    year = date[:4]
    month = date[4:6]
    day = date[6:]

    # Construir el path de la imagen y del audio original
    image_path = f"../Data/Dataset/images/{recorder}/{year}_{month}_{day}/{base_name}.PNG"
    audio_path = f"../Data/Dataset/Audios/{recorder}/{year}_{month}_{day}/{base_name}.WAV"
    
    # Leer el tamaño de la imagen
    with Image.open(image_path) as img:
        WIDTH, _ = img.size
    
    # Leer el archivo de predicciones
    with open(prediccion_txt_path, 'r') as file:
        predictions = file.readlines()
    
    # Cargar el audio original
    audio = AudioSegment.from_wav(audio_path)
    
    # Duración total del audio en milisegundos
    audio_duration_ms = len(audio)
    # Pasar a segundos
    audio_duration_sec = audio_duration_ms / 1000

    predictions_processed = 0
    
    # Procesar cada predicción
    for i, line in enumerate(predictions):
        _, x_center, _, width, _, score = map(float, line.split())
        
        # Desnormalizar las coordenadas X (inicio y fin del segmento en segundos)
        # Desnormalizar x_center y width
        x_center_desnorm = x_center * WIDTH
        width_desnorm = width * WIDTH

        # Convertir de coordenadas de imagen a segundos
        start_sec = (x_center_desnorm - width_desnorm / 2) * 60 / WIDTH
        end_sec = (x_center_desnorm + width_desnorm / 2) * 60 / WIDTH
        
        # Asegurar que los tiempos están dentro de los límites del audio
        start_sec = max(0, min(start_sec, audio_duration_sec))
        end_sec = max(0, min(end_sec, audio_duration_sec))

        # Pasar a milisegundos
        start_msec = start_sec * 1000
        end_msec = end_sec * 1000
        
        # Recortar el audio
        segment = audio[start_msec:end_msec]

        # If output_folder does not exist, create it
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)
        
        # Construir el path de salida para el segmento de audio
        output_path = f"{output_folder}{base_name}_{start_sec:.2f}_{end_sec:.2f}_{score:.2f}.WAV"
        
        # Guardar el segmento de audio
        segment.export(output_path, format="wav")
        
        print(f"Segmento {i} guardado: {output_path} ({start_sec:.2f}s - {end_sec:.2f}s)")

        predictions_processed += 1

    return predictions_processed

In [4]:
# prediction_folder = "../runs/detect/predict__val_model11_conf0/labels"
# output_folder = "../BirdNET/Audios/predict_val_model11_conf0/"

prediction_folder = "../runs/detect/predict_test_model12_conf015/labels"
output_folder = "../BirdNET/Audios/BirdSongDetectorSegments/"

# Usar para los TXTs del directorio raiz ../run/predict/labels

total_segments = 0

for file in os.listdir(prediction_folder):
    if file.endswith(".txt"):
        segments = desnormalizar_y_recortar_audio(os.path.join(prediction_folder, file), output_folder)

        total_segments += segments

print(f"Proceso completado. Se han guardado {total_segments} segmentos de audio.")

Segmento 0 guardado: ../BirdNET/Audios/BirdSongDetectorSegments/AM8_20230304_093000_0.20_5.52_0.71.WAV (0.20s - 5.52s)
Segmento 1 guardado: ../BirdNET/Audios/BirdSongDetectorSegments/AM8_20230304_093000_3.38_60.00_0.54.WAV (3.38s - 60.00s)
Segmento 2 guardado: ../BirdNET/Audios/BirdSongDetectorSegments/AM8_20230304_093000_50.07_59.94_0.36.WAV (50.07s - 59.94s)
Segmento 3 guardado: ../BirdNET/Audios/BirdSongDetectorSegments/AM8_20230304_093000_26.75_28.70_0.34.WAV (26.75s - 28.70s)
Segmento 4 guardado: ../BirdNET/Audios/BirdSongDetectorSegments/AM8_20230304_093000_9.10_11.13_0.33.WAV (9.10s - 11.13s)
Segmento 5 guardado: ../BirdNET/Audios/BirdSongDetectorSegments/AM8_20230304_093000_46.17_49.91_0.28.WAV (46.17s - 49.91s)
Segmento 6 guardado: ../BirdNET/Audios/BirdSongDetectorSegments/AM8_20230304_093000_34.96_42.66_0.26.WAV (34.96s - 42.66s)
Segmento 7 guardado: ../BirdNET/Audios/BirdSongDetectorSegments/AM8_20230304_093000_31.77_35.04_0.24.WAV (31.77s - 35.04s)
Segmento 8 guardado: ../