In [16]:
import numpy as np
import os

from sklearn.ensemble import RandomForestClassifier
# import xgboost as xgb
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

## Load Embeddings

In [55]:
# Load saved embeddings and labels
train_embeddings = np.load("../../../Data/Dataset/Embeddings/train_embeddings.npy", allow_pickle=True)
train_labels = np.load("../../../Data/Dataset/Embeddings/train_labels.npy", allow_pickle=True)

validation_embeddings = np.load("../../../Data/Dataset/Embeddings/validation_embeddings.npy", allow_pickle=True)
validation_labels = np.load("../../../Data/Dataset/Embeddings/validation_labels.npy", allow_pickle=True)

test_embeddings = np.load("../../../Data/Dataset/Embeddings/test_embeddings.npy", allow_pickle=True)
test_file_names = np.load("../../../Data/Dataset/Embeddings/test_file_names.npy", allow_pickle=True)
test_files_idx = np.load("../../../Data/Dataset/Embeddings/test_file_idx.npy", allow_pickle=True)

bsd_test_embeddings = np.load("../../../Data/Dataset/Embeddings/bsd_test_embeddings.npy", allow_pickle=True)
bsd_test_file_names = np.load("../../../Data/Dataset/Embeddings/bsd_test_file_names.npy", allow_pickle=True)
bsd_test_files_idx = np.load("../../../Data/Dataset/Embeddings/bsd_files_idx.npy", allow_pickle=True)

## Train Classifier with embeddings

In [23]:
# Encode the labels
label_encoder = LabelEncoder()
train_encoded_labels = label_encoder.fit_transform(train_labels)
validation_encoded_labels = label_encoder.transform(validation_labels)

# Reshape embeddings to 2D
train_embeddings_reshaped = train_embeddings.reshape(train_embeddings.shape[0], -1)
validation_embeddings_reshaped = validation_embeddings.reshape(validation_embeddings.shape[0], -1)

# Train various RandomForest models with different configurations and take the best one
best_model = None
best_accuracy = 0

for n_estimators in [100, 200, 300, 400, 500]:
    for max_depth in [None, 10, 20, 30, 40, 50]:
        for min_samples_split in [2, 5, 10]:
            for min_samples_leaf in [1, 2, 4]:
                model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth,
                                            min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf)
                model.fit(train_embeddings_reshaped, train_encoded_labels)

                validation_predictions = model.predict(validation_embeddings_reshaped)
                accuracy = accuracy_score(validation_encoded_labels, validation_predictions)

                if accuracy > best_accuracy:
                    best_accuracy = accuracy
                    best_model = model

print("Best model accuracy: ", best_accuracy)
rf_best_model = best_model

Best model accuracy:  0.6243654822335025


In [24]:
# print best model configuration
print(rf_best_model)

RandomForestClassifier(max_depth=50, min_samples_leaf=2, min_samples_split=10)


In [26]:
!ls ../../

BirdNET  Scripts  models


In [27]:
# Save best model
np.save("../../models/RandomForest/best_model.npy", rf_best_model)

## Test Model

In [56]:
# Predecir etiquetas de los audios de prueba
test_predictions = rf_best_model.predict(test_embeddings.reshape(test_embeddings.shape[0], -1))

# Decodificar las etiquetas predichas
test_predictions = label_encoder.inverse_transform(test_predictions)

# Mostrar las predicciones para cada archivo
for file_name, pred, idx in zip(test_file_names, test_predictions, test_files_idx):
    print(f"Archivo: {file_name}, Predicción: {pred}, Índice: {idx}")

Archivo: AM1_20230530_073000.WAV, Predicción: Background, Índice: 0
Archivo: AM1_20230530_073000.WAV, Predicción: Background, Índice: 1
Archivo: AM1_20230530_073000.WAV, Predicción: Background, Índice: 2
Archivo: AM1_20230530_073000.WAV, Predicción: Background, Índice: 3
Archivo: AM1_20230530_073000.WAV, Predicción: Background, Índice: 4
Archivo: AM1_20230530_073000.WAV, Predicción: Background, Índice: 5
Archivo: AM1_20230530_073000.WAV, Predicción: Background, Índice: 6
Archivo: AM1_20230530_073000.WAV, Predicción: Background, Índice: 7
Archivo: AM1_20230530_073000.WAV, Predicción: Background, Índice: 8
Archivo: AM1_20230530_073000.WAV, Predicción: Background, Índice: 9
Archivo: AM1_20230530_073000.WAV, Predicción: Background, Índice: 10
Archivo: AM1_20230530_073000.WAV, Predicción: Background, Índice: 11
Archivo: AM1_20230530_073000.WAV, Predicción: Background, Índice: 12
Archivo: AM1_20230530_073000.WAV, Predicción: Background, Índice: 13
Archivo: AM1_20230530_073000.WAV, Predicción

In [59]:
import os

# Save predictions in a folder named "predictions" for each prediction, one file with the name of the audio and the prediction
predictions_folder = "../../RandomForest/predictions/test"
os.makedirs(predictions_folder, exist_ok=True)

start_s = 0
end_s = 3.0
current_file_name = None

# sort filenames and predictions by filename alphabetically
test_file_names, test_predictions, test_files_idx = zip(*sorted(zip(test_file_names, test_predictions, test_files_idx)))

for file_name, pred, idx in zip(test_file_names, test_predictions, test_files_idx):
    filename = file_name.replace(".WAV", ".BirdNET.results").replace(".wav", ".BirdNET.results")
    file_path = os.path.join(predictions_folder, filename + ".csv")

    # Reset time if a new file starts
    if current_file_name != filename:
        current_file_name = filename
        # write_header = True  # Flag to write header only once
        with open(file_path, "a") as f:
            f.write("Start (s),End (s),Scientific name,Common name,Confidence\n")

    start_s = 0 + 3.0 * idx
    end_s = 3.0 + 3.0 * idx

    if end_s > 60:
        print(f"Skipping prediction for {file_name}: {pred}, {end_s}")

    # Only write predictions if not "Background"
    if pred != "Background":
        # print(f"Saving prediction for {file_name}: {pred}")
        
        # Extract scientific and common name
        if "_" in pred:
            scientific_name, common_name = pred.split("_", 1)
        else:
            scientific_name = pred
            common_name = pred

        # Append to the file
        with open(file_path, "a") as f:
            f.write(f"{start_s},{end_s},{scientific_name},{common_name},1\n")

print("Predictions saved successfully.")

Predictions saved successfully.


In [61]:
# Predecir etiquetas de los audios de prueba
bsd_test_predictions = rf_best_model.predict(bsd_test_embeddings.reshape(bsd_test_embeddings.shape[0], -1))

# Decodificar las etiquetas predichas
bsd_test_predictions = label_encoder.inverse_transform(bsd_test_predictions)

# Mostrar las predicciones para cada archivo
for file_name, pred, idx in zip(bsd_test_file_names, bsd_test_predictions, bsd_test_files_idx):
    print(f"Archivo: {file_name}, Predicción: {pred}, Índice: {idx}")

Archivo: AM1_20230510_083000_28.74_30.30_0.40.WAV, Predicción: Galerida theklae_Thekla's Lark, Índice: 0
Archivo: AM10_20230523_070000_0.00_1.70_0.61.WAV, Predicción: Emberiza calandra_Corn Bunting, Índice: 0
Archivo: AM1_20230512_090000_20.60_22.01_0.80.WAV, Predicción: Sylvia, Índice: 0
Archivo: AM10_20230523_070000_58.76_59.99_0.22.WAV, Predicción: Emberiza calandra_Corn Bunting, Índice: 0
Archivo: AM8_20230523_064000_52.48_53.54_0.17.WAV, Predicción: Sturnus, Índice: 0
Archivo: AM1_20230512_090000_31.75_33.38_0.20.WAV, Predicción: Sylvia, Índice: 0
Archivo: AM1_20230512_090000_40.38_42.01_0.50.WAV, Predicción: Sylvia, Índice: 0
Archivo: AM8_20230523_101000_44.08_44.68_0.22.WAV, Predicción: Luscinia megarhynchos_Common Nightingale, Índice: 0
Archivo: AM10_20230718_070000_23.95_24.99_0.28.WAV, Predicción: Carduelis carduelis_European Goldfinch, Índice: 0
Archivo: AM10_20230523_063000_45.84_47.01_0.18.WAV, Predicción: Background, Índice: 0
Archivo: AM1_20230511_090000_6.04_7.29_0.17.W

In [62]:
import os

# Save predictions in a folder named "predictions" for each prediction, one file with the name of the audio and the prediction
predictions_folder = "../../RandomForest/predictions/BirdSongDetectorTest"
os.makedirs(predictions_folder, exist_ok=True)

start_s = 0
end_s = 3.0
current_file_name = None

# sort filenames and predictions by filename alphabetically
bsd_test_file_names, bsd_test_predictions, bsd_test_files_idx = zip(*sorted(zip(bsd_test_file_names, bsd_test_predictions, bsd_test_files_idx)))

for file_name, pred, idx in zip(bsd_test_file_names, bsd_test_predictions, bsd_test_files_idx):
    filename = file_name.replace(".WAV", ".BirdNET.results").replace(".wav", ".BirdNET.results")
    file_path = os.path.join(predictions_folder, filename + ".csv")

    # Reset time if a new file starts
    if current_file_name != filename:
        current_file_name = filename
        # write_header = True  # Flag to write header only once
        with open(file_path, "a") as f:
            f.write("Start (s),End (s),Scientific name,Common name,Confidence\n")

    start_s = 0 + 3.0 * idx
    end_s = 3.0 + 3.0 * idx

    if end_s > 60:
        print(f"Skipping prediction for {file_name}: {pred}, {end_s}")

    # Only write predictions if not "Background"
    if pred != "Background":
        # print(f"Saving prediction for {file_name}: {pred}")
        
        # Extract scientific and common name
        if "_" in pred:
            scientific_name, common_name = pred.split("_", 1)
        else:
            scientific_name = pred
            common_name = pred

        # Append to the file
        with open(file_path, "a") as f:
            f.write(f"{start_s},{end_s},{scientific_name},{common_name},1\n")

print("Predictions saved successfully.")

Predictions saved successfully.


In [58]:
# Save predictions in a folder named "predictions" for each prediction, one file with the name of the audio and the prediction, start_time = 0 and end_time = 3
predictions_folder = "BirdSongDetectorRGBPredictions"
os.makedirs(predictions_folder, exist_ok=True)

for file_name, pred in zip(test_file_names, test_predictions):
    filename = file_name.replace("WAV", "BirdNET.results")
    with open(os.path.join(predictions_folder, filename + ".csv"), "w") as f:
        # first line is header: Start (s),End (s),Scientific name,Common name,Confidence
        f.write("Start (s),End (s),Scientific name,Common name,Confidence\n")
        if pred != "Background":
            # if pred contains _ then split it and use the first part as scientific name and the second part as common name
            if "_" in pred:
                scientific_name, common_name = pred.split("_", 1)
            else:
                scientific_name = pred
                common_name = pred
            f.write(f"0,3.0,{scientific_name},{common_name},1\n")

print("Predictions saved to the 'predictions' folder")

Predictions saved to the 'predictions' folder


In [None]:
# print all csv filename in predict_test_model12_conf015 sorted alphabetically
for file_name in sorted(os.listdir(test_folder)):
    if file_name.endswith(".WAV"):
        print(file_name)

AM10_20230509_063000_21.77_23.40_0.18.WAV
AM10_20230509_090000_0.01_1.03_0.16.WAV
AM10_20230509_090000_1.79_3.22_0.19.WAV
AM10_20230509_090000_13.00_14.38_0.20.WAV
AM10_20230509_090000_19.78_20.83_0.16.WAV
AM10_20230509_090000_23.78_25.38_0.16.WAV
AM10_20230509_090000_31.95_32.46_0.22.WAV
AM10_20230509_090000_36.93_38.23_0.15.WAV
AM10_20230509_090000_49.04_50.29_0.18.WAV
AM10_20230509_090000_5.47_6.95_0.30.WAV
AM10_20230509_090000_52.85_53.90_0.28.WAV
AM10_20230509_090000_58.66_59.97_0.19.WAV
AM10_20230523_063000_0.00_0.88_0.17.WAV
AM10_20230523_063000_11.74_13.34_0.40.WAV
AM10_20230523_063000_18.33_19.56_0.62.WAV
AM10_20230523_063000_2.42_3.88_0.18.WAV
AM10_20230523_063000_24.40_25.89_0.73.WAV
AM10_20230523_063000_30.41_31.32_0.23.WAV
AM10_20230523_063000_32.95_34.51_0.79.WAV
AM10_20230523_063000_45.84_47.01_0.18.WAV
AM10_20230523_063000_5.74_6.93_0.30.WAV
AM10_20230523_063000_50.13_51.67_0.55.WAV
AM10_20230523_063000_51.25_53.21_0.25.WAV
AM10_20230523_063000_53.93_55.37_0.45.WAV
AM10

In [None]:
!ls ../Data/Dataset/Audios/test/

 AM1   AM10   AM11   AM15   AM4   AM8  'Data Augmentation'


In [None]:
segment_duration = 3  # Duración de cada segmento en segundos

# Función para preprocesar y ajustar un segmento de audio
def preprocess_segment(audio_segment):
    if len(audio_segment) < target_length:
        padding = target_length - len(audio_segment)
        audio_segment = np.pad(audio_segment, (0, padding), 'constant')
    return audio_segment[:target_length]

# Función para dividir el audio en segmentos de 3 segundos
def split_audio_into_segments(audio_path):
    audio, sr = librosa.load(audio_path, sr=sample_rate)
    segments = []
    for start in range(0, len(audio), target_length):
        segment = audio[start:start + target_length]
        segment = preprocess_segment(segment)
        segments.append(segment)
    return segments

# Carpeta de audios de prueba organizados en subcarpetas
test_folder = "../Data/Dataset/Audios/test/"

# Carpeta de salida para las predicciones
# predictions_folder = "RFPredictions"
predictions_folder = "RGBPredictions"
os.makedirs(predictions_folder, exist_ok=True)

# Procesar cada archivo de audio en cada subcarpeta
for root, _, files in os.walk(test_folder):
    for file_name in files:
        if file_name.endswith(".WAV"):
            audio_path = os.path.join(root, file_name)
            segments = split_audio_into_segments(audio_path)

            # Obtener embeddings y predicciones para cada segmento
            segment_predictions = []
            for i, segment in enumerate(segments):
                embedding = get_embedding(segment)
                # prediction = rf_clf.predict([embedding])[0]  # Predecir usando RandomForest
                prediction = xgb_clf.predict([embedding])[0]

                # decode prediction
                prediction = label_encoder.inverse_transform([prediction])[0]
                
                # Solo agregar predicción si no es "background"
                if prediction != "Background":
                    start_time = i * segment_duration
                    end_time = start_time + segment_duration
                    segment_predictions.append((start_time, end_time, prediction))

            # Guardar predicciones en un archivo CSV replicando la estructura de carpetas
            relative_path = os.path.relpath(audio_path, test_folder)
            output_path = os.path.join(predictions_folder, os.path.dirname(relative_path))
            os.makedirs(output_path, exist_ok=True)
            output_filename = os.path.splitext(file_name)[0] + ".BirdNET.results.csv"
            with open(os.path.join(output_path, output_filename), "w") as f:
                f.write("Start (s),End (s),Scientific name,Common name,Confidence\n")
                for start, end, pred in segment_predictions:
                    if "_" in pred:
                        scientific_name, common_name = pred.split("_", 1)
                    else:
                        scientific_name = common_name = pred
                    f.write(f"{start},{end},{scientific_name},{common_name},1\n")

print("Predictions saved to the 'RFPredictions' folder")

Predictions saved to the 'RFPredictions' folder
