In [2]:
import pandas as pd
import librosa
import soundfile as sf
import os

In [3]:
PATH = "../Data/" # "../../../desarrollo/Data/"
OUTPUT_FOLDER = PATH + "Dataset/Audios/BirdNet/train/"

train_csv = PATH + "Dataset/CSVs/train_with_bg.csv"

In [4]:
# Read the csv
df = pd.read_csv(train_csv)

In [5]:
def crop_audio():
    # Itera sobre cada fila del DataFrame
    for index, row in df.iterrows():
        audio_path = row['path']
        audio_path = PATH + "Dataset/Audios/" + audio_path
        start_time = row['start_time']
        end_time = row['end_time']
        specie = row['specie']
        
        # Crea la carpeta para la especie si no existe
        specie_folder = os.path.join(OUTPUT_FOLDER, specie)
        if not os.path.exists(specie_folder):
            os.makedirs(specie_folder)
        
        # Carga el audio
        audio, sr = librosa.load(audio_path, sr=None)
        
        # Calcula los índices de inicio y fin en muestras
        start_sample = int(start_time * sr)
        end_sample = int(end_time * sr)
        
        # Recorta el audio
        audio_cropped = audio[start_sample:end_sample]
        
        # Define el nombre del archivo de salida
        output_filename = os.path.join(specie_folder, os.path.basename(audio_path))
        
        # Guarda el audio recortado
        sf.write(output_filename, audio_cropped, sr)

    print("Proceso completado.")

In [6]:
# Background:
# Itera sobre cada fila del DataFrame
def background():
    for index, row in df.iterrows():
        audio_path = row['path']
        audio_path = PATH + "Dataset/Audios/" + audio_path
        start_time = row['start_time']
        end_time = row['end_time']
        specie = row['specie']

        num_bg = 0

        if specie == "No audio":
        
            # Crea la carpeta para la especie si no existe
            background_folder_name = "Background"
            specie_folder = os.path.join(OUTPUT_FOLDER, background_folder_name)

            if not os.path.exists(specie_folder):
                os.makedirs(specie_folder)

            print(audio_path)
            print(specie_folder)
            
            # Define el nombre del archivo de salida
            output_filename = os.path.join(specie_folder, os.path.basename(audio_path))
            print(output_filename)

            # Copiar el audio en la carpeta de salida
            os.system(f"cp {audio_path} {output_filename}")

            print("\n")

            num_bg += 1

    print("Proceso completado.")
    print(f"Se han guardado {num_bg} audios de background.")

In [7]:
Species_names_file = "../BirdNET_GLOBAL_6K_V2.4_Labels.txt"
species = pd.read_csv(Species_names_file, sep="\t", header=None)

In [8]:
# Change names of folders

SPECIES_FOLDER = PATH + "Dataset/Audios/BirdNet/train/"

# Change name for each subfolder
def change_subfolder_name():
    for folder in os.listdir(SPECIES_FOLDER):
        if os.path.isdir(os.path.join(SPECIES_FOLDER, folder)):
            # Lowercase
            # folder = folder.lower()

            # Ensure folder name is lowercase, first letter uppercase and if there are two words, the second word begins with lowercase
            folder = folder.lower().capitalize()
            if " " in folder:
                folder = folder.split(" ")[0] + " " + folder.split(" ")[1].lower()
            
            # Look for the row of the species that contains the folder name
            row = species[species[0].str.contains(folder)]

            # Don't change folder name if it is Bird, Curruca, Alaudidae
            No_change = ["Bird", "Curruca", "Alaudidae", "Fringilla", "Lanius", "Passer", "Sturnus", "Sylvia", "Background"]
            if folder in No_change:
                print(f"Folder {folder} not renamed because it is a general category. +++++++++++++++")
            elif folder == "No audio":
                # Rename to Background
                os.rename(os.path.join(SPECIES_FOLDER, folder), os.path.join(SPECIES_FOLDER, "Background"))
            else:
                # If the species is found, change the name of the folder
                if not row.empty:
                    species_name = row.iloc[0][0]
                    os.rename(os.path.join(SPECIES_FOLDER, folder), os.path.join(SPECIES_FOLDER, species_name))
                    print(f"Folder {folder} renamed to {species_name}")
                else:
                    print(f"Species {folder} not found in the species list. ----------------")

In [9]:
# Cut YOLOv8 predictions
import os
from pydub import AudioSegment
from PIL import Image

def desnormalizar_y_recortar_audio(prediccion_txt_path, output_folder):
    # Extraer el nombre base del archivo de predicción
    base_name = os.path.basename(prediccion_txt_path).replace('.txt', '')
    
    recorder = base_name.split('_')[0]
    date = base_name.split('_')[1]
    year = date[:4]
    month = date[4:6]
    day = date[6:]

    # Construir el path de la imagen y del audio original
    image_path = f"../Data/Dataset/images/{recorder}/{year}_{month}_{day}/{base_name}.PNG"
    audio_path = f"../Data/Dataset/Audios/{recorder}/{year}_{month}_{day}/{base_name}.WAV"
    
    # Leer el tamaño de la imagen
    with Image.open(image_path) as img:
        WIDTH, _ = img.size
    
    # Leer el archivo de predicciones
    with open(prediccion_txt_path, 'r') as file:
        predictions = file.readlines()
    
    # Cargar el audio original
    audio = AudioSegment.from_wav(audio_path)
    
    # Duración total del audio en milisegundos
    audio_duration_ms = len(audio)
    # Pasar a segundos
    audio_duration_sec = audio_duration_ms / 1000

    predictions_processed = 0
    
    # Procesar cada predicción
    for i, line in enumerate(predictions):
        _, x_center, _, width, _, score = map(float, line.split())
        
        # Desnormalizar las coordenadas X (inicio y fin del segmento en segundos)
        # Desnormalizar x_center y width
        x_center_desnorm = x_center * WIDTH
        width_desnorm = width * WIDTH

        # Convertir de coordenadas de imagen a segundos
        start_sec = (x_center_desnorm - width_desnorm / 2) * 60 / WIDTH
        end_sec = (x_center_desnorm + width_desnorm / 2) * 60 / WIDTH
        
        # Asegurar que los tiempos están dentro de los límites del audio
        start_sec = max(0, min(start_sec, audio_duration_sec))
        end_sec = max(0, min(end_sec, audio_duration_sec))

        # Pasar a milisegundos
        start_msec = start_sec * 1000
        end_msec = end_sec * 1000
        
        # Recortar el audio
        segment = audio[start_msec:end_msec]

        # If output_folder does not exist, create it
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)
        
        # Construir el path de salida para el segmento de audio
        output_path = f"{output_folder}{base_name}_{start_sec:.2f}_{end_sec:.2f}_{score:.2f}.WAV"
        
        # Guardar el segmento de audio
        segment.export(output_path, format="wav")
        
        print(f"Segmento {i} guardado: {output_path} ({start_sec:.2f}s - {end_sec:.2f}s)")

        predictions_processed += 1

    return predictions_processed

In [13]:
prediction_folder = "../runs/detect/predict__val_model12_conf0/labels"
output_folder = "../BirdNET/Audios/predict__val_model12_conf0/"

# Usar para los TXTs del directorio raiz ../run/predict/labels

total_segments = 0

for file in os.listdir(prediction_folder):
    if file.endswith(".txt"):
        segments = desnormalizar_y_recortar_audio(os.path.join(prediction_folder, file), output_folder)

        total_segments += segments

print(f"Proceso completado. Se han guardado {total_segments} segmentos de audio.")

Segmento 0 guardado: ../BirdNET/Audios/predict__val_model12_conf0/AM1_20230530_093000_6.38_7.41_0.01.WAV (6.38s - 7.41s)
Segmento 1 guardado: ../BirdNET/Audios/predict__val_model12_conf0/AM1_20230530_093000_0.00_2.17_0.01.WAV (0.00s - 2.17s)
Segmento 2 guardado: ../BirdNET/Audios/predict__val_model12_conf0/AM1_20230530_093000_4.36_6.38_0.01.WAV (4.36s - 6.38s)
Segmento 3 guardado: ../BirdNET/Audios/predict__val_model12_conf0/AM1_20230530_093000_29.36_29.95_0.01.WAV (29.36s - 29.95s)
Segmento 4 guardado: ../BirdNET/Audios/predict__val_model12_conf0/AM1_20230530_093000_45.65_48.22_0.01.WAV (45.65s - 48.22s)
Segmento 5 guardado: ../BirdNET/Audios/predict__val_model12_conf0/AM1_20230530_093000_3.25_4.45_0.00.WAV (3.25s - 4.45s)
Segmento 6 guardado: ../BirdNET/Audios/predict__val_model12_conf0/AM1_20230530_093000_29.18_31.47_0.00.WAV (29.18s - 31.47s)
Segmento 7 guardado: ../BirdNET/Audios/predict__val_model12_conf0/AM1_20230530_093000_0.00_0.59_0.00.WAV (0.00s - 0.59s)
Segmento 8 guardado: