In [33]:
import glob
import os

import pandas as pd

In [34]:
# Ground Truth CSV
PATH = "../Data/"
gt_csv = PATH + "Dataset/CSVs/test_with_bg.csv"

# Read the Ground Truth CSV
gt_df = pd.read_csv(gt_csv)
print("Número de Ground Truths: ", len(gt_df))

Número de Ground Truths:  542


### Functions to group predictions and ground truth annotations (if same specie and overlapping times)

In [35]:
def group_consecutive_predictions_birdnet(predictions_df):
    grouped_predictions = []
    current_group = []
    last_end = None
    for _, row in predictions_df.iterrows():
        if current_group and (row['Start (s)'] != last_end or row['Scientific name'] != current_group[-1]['Scientific name']):
            # New group starts here
            grouped_predictions.append(current_group)
            current_group = []
        current_group.append(row)
        last_end = row['End (s)']
    if current_group:  # Add last group
        grouped_predictions.append(current_group)
    # Combine groups in unique predictions
    combined_predictions = []
    for group in grouped_predictions:
        combined_prediction = {
            'Start (s)': group[0]['Start (s)'],
            'End (s)': group[-1]['End (s)'],
            'Scientific name': group[0]['Scientific name'],
            'Confidence': max(item['Confidence'] for item in group)  # conf = max confidence in group
        }
        combined_predictions.append(combined_prediction)
    return combined_predictions

def group_consecutive_annotations_gt(annotations_df):
    annotations_df.sort_values(by=['path', 'specie', 'start_time'], inplace=True)
    grouped_predictions = []
    current_group = []
    last_end = None
    last_path = None
    last_specie = None

    for _, row in annotations_df.iterrows():
        if current_group and (row['path'] != last_path or row['specie'] != last_specie or row['start_time'] - last_end >= 1):
            # New group starts here
            grouped_predictions.append(current_group)
            current_group = []
        current_group.append(row)
        last_end = row['end_time']
        last_path = row['path']
        last_specie = row['specie']
    if current_group:  # Add last group
        grouped_predictions.append(current_group)

    # Combine groups into unique predictions
    combined_annotations = []
    for group in grouped_predictions:
        combined_prediction = {
            'path': group[0]['path'],
            'start_time': group[0]['start_time'],
            'end_time': group[-1]['end_time'],
            'specie': group[0]['specie'],
            # Assuming bbox or confidence needs to be handled here. Adjust as necessary.
            # 'Confidence': max(item['Confidence'] for item in group)  # Example for confidence
        }
        combined_annotations.append(combined_prediction)
    
    return combined_annotations

In [36]:
gt_df = group_consecutive_annotations_gt(gt_df)
gt_df = pd.DataFrame(gt_df)  # Convert list dict to DataFrame
print("Número de Ground Truths agrupados: ", len(gt_df))

Número de Ground Truths agrupados:  469


### Auxiliar Functions

In [44]:
# Function to calculate the IoU
def calculate_iou(interval1, interval2):
    start_max = max(interval1[0], interval2[0])
    end_min = min(interval1[1], interval2[1])
    intersection = max(0, end_min - start_max)
    union = (interval1[1] - interval1[0]) + (interval2[1] - interval2[0]) - intersection
    return intersection / union if union != 0 else 0

In [47]:
def is_detection_birdnet(gt_row, grouped_predictions, confidence_threshold, iou_threshold=0):
    for prediction in grouped_predictions:
        iou = calculate_iou((prediction['Start (s)'], prediction['End (s)']), (gt_row['start_time'], gt_row['end_time']))
        if iou >= iou_threshold and prediction['Confidence'] >= confidence_threshold:
            return True
    return False

def is_detection_classification_birdnet(gt_row, grouped_predictions, confidence_threshold, iou_threshold=0):
    for prediction in grouped_predictions:
        iou = calculate_iou((prediction['Start (s)'], prediction['End (s)']), (gt_row['start_time'], gt_row['end_time']))
        if (prediction['Scientific name'].lower() == gt_row['specie'].lower() and
            iou >= iou_threshold and
            prediction['Confidence'] >= confidence_threshold):
            return True
    return False

'''def is_detection_birdnet(gt_row, grouped_predictions, confidence_threshold, iou_threshold=0): 
    for prediction in grouped_predictions: 
        if not (prediction['End (s)'] < gt_row['start_time'] or prediction['Start (s)'] > gt_row['end_time']) and prediction['Confidence'] >= confidence_threshold: 
            return True 
        return False

def is_detection_classification_birdnet(gt_row, grouped_predictions, confidence_threshold, iou_threshold=0): 
    for prediction in grouped_predictions: 
        if (prediction['Scientific name'].lower() == gt_row['specie'].lower() and not (prediction['End (s)'] < gt_row['start_time'] or prediction['Start (s)'] > gt_row['end_time']) and prediction['Confidence'] >= confidence_threshold): 
            return True 
    return False'''

'''# Function to check if a prediction is correct
def is_prediction_correct_detector_birdnet(prediction, gt_annotation, iou_threshold):
    iou = calculate_iou((prediction['Start (s)'], prediction['End (s)']), (gt_annotation['start_time'], gt_annotation['end_time']))
    return iou >= iou_threshold

# Function to check if a prediction is correct
def is_prediction_correct_birdnet(prediction, gt_annotation, iou_threshold):
    iou = calculate_iou((prediction['Start (s)'], prediction['End (s)']), (gt_annotation['start_time'], gt_annotation['end_time']))
    prediction_class = prediction['Scientific name'].lower()
    gt_class = gt_annotation['specie'].lower()
    scientific_name_matches = prediction_class == gt_class
    return iou >= iou_threshold and scientific_name_matches'''

"# Function to check if a prediction is correct\ndef is_prediction_correct_detector_birdnet(prediction, gt_annotation, iou_threshold):\n    iou = calculate_iou((prediction['Start (s)'], prediction['End (s)']), (gt_annotation['start_time'], gt_annotation['end_time']))\n    return iou >= iou_threshold\n\n# Function to check if a prediction is correct\ndef is_prediction_correct_birdnet(prediction, gt_annotation, iou_threshold):\n    iou = calculate_iou((prediction['Start (s)'], prediction['End (s)']), (gt_annotation['start_time'], gt_annotation['end_time']))\n    prediction_class = prediction['Scientific name'].lower()\n    gt_class = gt_annotation['specie'].lower()\n    scientific_name_matches = prediction_class == gt_class\n    return iou >= iou_threshold and scientific_name_matches"

In [82]:
def analyze_predictions_BirdNET(analysis_name, prediction_conf_score=0.6, iou_threshold=0.1):
    # Variables for metrics
    correct_predictions = 0
    total_predictions = 0
    total_predictions_score = 0

    total_gt = len(gt_df)

    # Metrics
    true_positives_detector = 0
    true_positives_classifier = 0
    false_positives = 0
    false_negatives_detector = 0
    false_negatives_classifier = 0
    true_negatives = 0

    # Process the predictions
    for _, gt_annotation in gt_df.iterrows():
        # Load the predictions
        prediction_path = f"../BirdNET/Predictions/{analysis_name}/{gt_annotation['path'].replace('.WAV', '.BirdNET.results.csv')}"

        try:
            predictions_df = pd.read_csv(prediction_path)
            grouped_predictions = group_consecutive_predictions_birdnet(predictions_df)  # Group predictions
            predictions_df = pd.DataFrame(grouped_predictions)  # Convert list dict to DataFrame

            total_predictions += len(grouped_predictions)
            total_predictions_score += len([p for p in grouped_predictions if p['Confidence'] >= prediction_conf_score])

            # If the annotation if not a background
            if gt_annotation['specie'] != 'No audio':
                # Check if the GT is detected by the detector
                if is_detection_birdnet(gt_annotation, grouped_predictions, prediction_conf_score, iou_threshold):
                    true_positives_detector += 1
                else:
                    false_negatives_detector += 1
                
                if is_detection_classification_birdnet(gt_annotation, grouped_predictions, prediction_conf_score, iou_threshold):
                    correct_predictions += 1
                    true_positives_classifier += 1
                else:
                    false_negatives_classifier += 1
            else: # Annotation is a background
                # Take only grouped_predictions with confidence >= prediction_conf_score
                grouped_predictions = [p for p in grouped_predictions if p['Confidence'] >= prediction_conf_score]
                if len(grouped_predictions) == 0:
                    true_negatives += 1
                else: # Background is unique file, with no annotations, so all predictions on that file are false positives
                    false_positives += len(grouped_predictions)

        except FileNotFoundError:
            print(f"Prediction file not found: {prediction_path}")

    # Calculate and display the metrics
    print("================== Metrics ==================\n")
    print(f"Total Predictions: {total_predictions}")
    print(f"Total Predictions with Score >= {prediction_conf_score}: {total_predictions_score}")
    print(f"Total GT: {total_gt}")
    print(f"Correct Predictions Detector: {true_positives_detector}")
    print(f"Correct Predictions Detector + Classifier: {correct_predictions}")

    # Additional calculations for precision, recall, and F1-score
    print("\n================== Detector Metrics ==================\n")

    # Calculate and display the metrics
    accuracy_detector = (true_positives_detector + true_negatives) / (true_positives_detector + true_negatives + false_positives + false_negatives_detector) if true_positives_detector + true_negatives + false_positives + false_negatives_detector != 0 else 0
    precision_detector = true_positives_detector / (true_positives_detector + false_positives) if true_positives_detector + false_positives != 0 else 0
    recall_detector = true_positives_detector / (true_positives_detector + false_negatives_detector) if true_positives_detector + false_negatives_detector != 0 else 0
    f1_score_detector = 2 * precision_detector * recall_detector / (precision_detector + recall_detector) if precision_detector + recall_detector != 0 else 0
    print(f"Accuracy: {accuracy_detector}")
    print(f"Precision: {precision_detector}")
    print(f"Recall: {recall_detector}")
    print(f"F1-Score: {f1_score_detector}")

    print("\n================== Detector + Classifier Metrics ==================\n")

    # Calculate and display the metrics
    accuracy_classifier = (true_positives_classifier + true_negatives) / (true_positives_classifier + true_negatives + false_positives + false_negatives_detector) if true_positives_classifier + true_negatives + false_positives + false_negatives_classifier != 0 else 0
    precision_classifier = true_positives_classifier / (true_positives_classifier + false_positives) if true_positives_classifier + false_positives != 0 else 0
    recall_classifier = true_positives_classifier / (true_positives_classifier + false_negatives_detector) if true_positives_classifier + false_negatives_detector != 0 else 0
    f1_score_classifier = 2 * precision_classifier * recall_classifier / (precision_classifier + recall_classifier) if precision_classifier + recall_classifier != 0 else 0
    print(f"Accuracy: {accuracy_classifier}")
    print(f"Precision: {precision_classifier}")
    print(f"Recall: {recall_classifier}")
    print(f"F1-Score: {f1_score_classifier}")

    print("\n================== Other ==================\n")
    print(f"False Positives: {false_positives}")

## Métricas de BirdNET Base con la lista de especies de Doñana de BIRDeep

In [83]:
analyze_predictions_BirdNET("0_BirdNet_Base_AllTest_DonanaSpecies", 0.6, 0.1)


Total Predictions: 2341
Total Predictions with Score >= 0.6: 527
Total GT: 469
Correct Predictions Detector: 60
Correct Predictions Detector + Classifier: 13


Accuracy: 0.17270788912579957
Precision: 1.0
Recall: 0.13392857142857142
F1-Score: 0.23622047244094488


Accuracy: 0.08056872037914692
Precision: 1.0
Recall: 0.032418952618453865
F1-Score: 0.06280193236714976


False Positives: 0


## Métricas de BirdNET Base con la lista de especies del customClassifier

In [84]:
analyze_predictions_BirdNET("1_BirdNet_Base_AllTest_ClassifierSpecies", 0.6, 0.1)


Total Predictions: 2798
Total Predictions with Score >= 0.6: 751
Total GT: 469
Correct Predictions Detector: 79
Correct Predictions Detector + Classifier: 26


Accuracy: 0.21108742004264391
Precision: 0.9875
Recall: 0.17633928571428573
F1-Score: 0.29924242424242425


Accuracy: 0.11057692307692307
Precision: 0.9629629629629629
Recall: 0.06582278481012659
F1-Score: 0.12322274881516589


False Positives: 1


In [85]:
analyze_predictions_BirdNET("1_BirdNet_Base_AllTest_ClassifierSpecies", 0.1, 0.1)


Total Predictions: 2798
Total Predictions with Score >= 0.1: 2798
Total GT: 469
Correct Predictions Detector: 175
Correct Predictions Detector + Classifier: 66


Accuracy: 0.4088983050847458
Precision: 0.9668508287292817
Recall: 0.390625
F1-Score: 0.5564387917329094


Accuracy: 0.23140495867768596
Precision: 0.9166666666666666
Recall: 0.19469026548672566
F1-Score: 0.32116788321167883


False Positives: 6


In [None]:
# Hacer con las predicciones de yolo como detectorr

# Correr inference.py desde la VPN, intentar encontrar mejores valores de iou y conf y poner esos para las métricas aquí, luego coger esos valores y ponerlos

# predictionms de val de yolov8:
'''
[{"image_id": "AM4_20230531_110000", "category_id": 0, "bbox": [301.605, 0.0, 31.041, 459.45], "score": 0.5276}, {"image_id": "AM15_20230712_074000", "category_id": 0, "bbox": [61.011, 0.439, 175.454, 460.228], "score": 0.48407}, {"image_id": "AM15_20230330_070000", "category_id": 0, "bbox": [57.914, 0.573, 30.844, 461.131], "score": 0.65271}, {"image_id": "AM15_20230330_070000", "category_id": 0, "bbox": [231.294, 1.093, 31.147, 460.902], "score": 0.53149}, {"image_id": "AM15_20230330_070000", "category_id": 0, "bbox": [620.696, 2.401, 34.881, 459.599], "score": 0.44922}, {"image_id": "AM8_20230304_093000", "category_id": 0, "bbox": [765.516, 0.0, 162.609, 460.58], "score": 0.63678}, 
'''

# Y si se hace predict sobre la carpeta test??


# Copilot:
'''
Ahoar quiero modificar estas funciones:

def group_consecutive_predictions_birdnet(predictions_df):
    grouped_predictions = []
    current_group = []
    last_end = None
    for _, row in predictions_df.iterrows():
        if current_group and (row['Start (s)'] != last_end or row['Scientific name'] != current_group[-1]['Scientific name']):
            # New group starts here
            grouped_predictions.append(current_group)
            current_group = []
        current_group.append(row)
        last_end = row['End (s)']
    if current_group:  # Add last group
        grouped_predictions.append(current_group)
    # Combine groups in unique predictions
    combined_predictions = []
    for group in grouped_predictions:
        combined_prediction = {
            'Start (s)': group[0]['Start (s)'],
            'End (s)': group[-1]['End (s)'],
            'Scientific name': group[0]['Scientific name'],
            'Confidence': max(item['Confidence'] for item in group)  # conf = max confidence in group
        }
        combined_predictions.append(combined_prediction)
    return combined_predictions

# Function to calculate the IoU
def calculate_iou(interval1, interval2):
    start_max = max(interval1[0], interval2[0])
    end_min = min(interval1[1], interval2[1])
    intersection = max(0, end_min - start_max)
    union = (interval1[1] - interval1[0]) + (interval2[1] - interval2[0]) - intersection
    return intersection / union if union != 0 else 0

# Function to check if a prediction is correct
def is_prediction_correct_detector(prediction, gt_annotation, iou_threshold):
    iou = calculate_iou((prediction['Start (s)'], prediction['End (s)']), (gt_annotation['start_time'], gt_annotation['end_time']))
    return iou >= iou_threshold

# Function to check if a prediction is correct
def is_prediction_correct(prediction, gt_annotation, iou_threshold):
    iou = calculate_iou((prediction['Start (s)'], prediction['End (s)']), (gt_annotation['start_time'], gt_annotation['end_time']))
    prediction_class = prediction['Scientific name'].lower()
    gt_class = gt_annotation['specie'].lower()
    scientific_name_matches = prediction_class == gt_class
    return iou >= iou_threshold and scientific_name_matches

    Y este codigo:

    # Variables for metrics
correct_predictions = 0
total_predictions = 0
total_predictions_score = 0
correct_predictions_detector = 0
iou_threshold = 0.4  # This value is editable
prediction_conf_score = 0.4  # This value is editable

# Process the predictions
for _, gt_annotation in gt_df.iterrows():
    prediction_path = PATH + f"Dataset/BirdNET_Predictions/{gt_annotation['path'].replace('.WAV', '.BirdNET.results.csv')}"
    try:
        predictions_df = pd.read_csv(prediction_path)
        grouped_predictions = group_consecutive_predictions_birdnet(predictions_df)  # Group predictions
        predictions_df = pd.DataFrame(grouped_predictions)  # Convert list dict to DataFrame

        for _, prediction in predictions_df.iterrows():
            total_predictions += 1
            if prediction['Confidence'] >= prediction_conf_score:
                total_predictions_score += 1
                if is_prediction_correct(prediction, gt_annotation, iou_threshold):
                    correct_predictions += 1
                if is_prediction_correct_detector(prediction, gt_annotation, iou_threshold):
                    correct_predictions_detector += 1
    except FileNotFoundError:
        print(f"Prediction file not found: {prediction_path}")

# Calculate and display the metrics
print("================== Metrics ==================\n")
print(f"Total Predictions: {total_predictions}")
print(f"Total Predictions with Score >= {prediction_conf_score}: {total_predictions_score}")
print(f"Total GT: {len(gt_df)}")
print(f"Correct Predictions Detector: {correct_predictions_detector}")
print(f"Correct Predictions Detector + Classifier: {correct_predictions}")

# Additional calculations for precision, recall, and F1-score
print("\n================== Detector Metrics ==================\n")
true_positives = correct_predictions_detector
false_positives = total_predictions_score - correct_predictions_detector
false_negatives = len(gt_df) - correct_predictions_detector
true_negatives = 0

# Calculate and display the metrics
accuracy = (true_positives + true_negatives) / (true_positives + true_negatives + false_positives + false_negatives) if true_positives + true_negatives + false_positives + false_negatives != 0 else 0
precision = true_positives / (true_positives + false_positives) if true_positives + false_positives != 0 else 0
recall = true_positives / (true_positives + false_negatives) if true_positives + false_negatives != 0 else 0
f1_score = 2 * precision * recall / (precision + recall) if precision + recall != 0 else 0
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-Score: {f1_score}")

print("\n================== Detector + Classifier Metrics ==================\n")
true_positives = correct_predictions
false_positives = total_predictions_score - correct_predictions
false_negatives = len(gt_df) - correct_predictions
true_negatives = 0

# Calculate and display the metrics
accuracy = (true_positives + true_negatives) / (true_positives + true_negatives + false_positives + false_negatives) if true_positives + true_negatives + false_positives + false_negatives != 0 else 0
precision = true_positives / (true_positives + false_positives) if true_positives + false_positives != 0 else 0
recall = true_positives / (true_positives + false_negatives) if true_positives + false_negatives != 0 else 0
f1_score = 2 * precision * recall / (precision + recall) if precision + recall != 0 else 0
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-Score: {f1_score}")

Para que funcione sobre el fichero predictions.json con la estructura de datos:
[{"image_id": "AM4_20230531_110000", "category_id": 0, "bbox": [301.605, 0.0, 31.041, 459.45], "score": 0.5276}, {"image_id": "AM15_20230712_074000", "category_id": 0, "bbox": [61.011, 0.439, 175.454, 460.228], "score": 0.48407}, {"image_id": "AM15_20230330_070000", "category_id": 0, "bbox": [57.914, 0.573, 30.844, 461.131], "score": 0.65271}, {"image_id": "AM15_20230330_070000", "category_id": 0, "bbox": [231.294, 1.093, 31.147, 460.902], "score": 0.53149}, {"image_id": "AM15_20230330_070000", "category_id": 0, "bbox": [620.696, 2.401, 34.881, 459.599], "score": 0.44922}, {"image_id": "AM8_20230304_093000", "category_id": 0, "bbox": [765.516, 0.0, 162.609, 460.58], "score": 0.63678}, ...

En la que 
'''

## Métricas de BirdNET Preentrenado con la lista de especies de Doñana de BIRDeep

In [86]:
analyze_predictions_BirdNET("2_BirdNet_FineTuning_AllTest", 0.6, 0.1)


Total Predictions: 7559
Total Predictions with Score >= 0.6: 167
Total GT: 469
Correct Predictions Detector: 17
Correct Predictions Detector + Classifier: 6


Accuracy: 0.08102345415778252
Precision: 1.0
Recall: 0.03794642857142857
F1-Score: 0.07311827956989246


Accuracy: 0.05895196506550218
Precision: 1.0
Recall: 0.013729977116704805
F1-Score: 0.02708803611738149


False Positives: 0


In [87]:
analyze_predictions_BirdNET("2_BirdNet_FineTuning_AllTest", 0.1, 0.1)


Total Predictions: 7559
Total Predictions with Score >= 0.1: 7559
Total GT: 469
Correct Predictions Detector: 280
Correct Predictions Detector + Classifier: 104


Accuracy: 0.5868263473053892
Precision: 0.877742946708464
Recall: 0.625
F1-Score: 0.7301173402868317


Accuracy: 0.3630769230769231
Precision: 0.7272727272727273
Recall: 0.38235294117647056
F1-Score: 0.5012048192771085


False Positives: 39


## Métricas de BirdNET customClassifier con los recortes de YOLOv8 para test

In [10]:
# Hacer con las predicciones de BIRDNET despues de entrenar sobre los recortes del detector de yolov8

# Hacer con las predicciones de BirdNet después de haber entrenado sobre el conjunto de test total
# Variables for metrics
correct_predictions = 0
total_predictions = 0
total_predictions_score = 0
correct_predictions_detector = 0
iou_threshold = 0.1  # This value is editable
prediction_conf_score = 0.6  # This value is editable

# Process the predictions
for _, gt_annotation in gt_df.iterrows():
    gt_basename = gt_annotation['path'].split('/')[-1]
    # Remove the extension and potential suffix _X
    gt_basename_no_ext = os.path.splitext(gt_basename)[0]
    if "_" in gt_basename_no_ext:
        gt_basename_no_ext = "_".join(gt_basename_no_ext.split("_")[:-1])
    prediction_pattern = f"../BirdNET/Predictions/3_BirdNET_FineTuning_DetectorTest/{gt_basename_no_ext}_*.BirdNET.results.csv"
    
    # Find all matching prediction files
    prediction_files = glob.glob(prediction_pattern)
    for prediction_path in prediction_files:
        try:
            predictions_df = pd.read_csv(prediction_path)
            grouped_predictions = group_consecutive_predictions_birdnet(predictions_df)  # Group predictions
            predictions_df = pd.DataFrame(grouped_predictions)  # Convert list dict to DataFrame

            for _, prediction in predictions_df.iterrows():
                total_predictions += 1
                if prediction['Confidence'] >= prediction_conf_score:
                    total_predictions_score += 1
                    if is_prediction_correct_birdnet(prediction, gt_annotation, iou_threshold):
                        correct_predictions += 1
                    if is_prediction_correct_detector_birdnet(prediction, gt_annotation, iou_threshold):
                        correct_predictions_detector += 1
        except FileNotFoundError:
            print(f"Prediction file not found: {prediction_path}")
        except pd.errors.EmptyDataError:
            print(f"No data in file: {prediction_path}")

# Calculate and display the metrics
print("================== Metrics ==================\n")
print(f"Total Predictions: {total_predictions}")
print(f"Total Predictions with Score >= {prediction_conf_score}: {total_predictions_score}")
print(f"Total GT: {len(gt_df)}")
print(f"Correct Predictions Detector: {correct_predictions_detector}")
print(f"Correct Predictions Detector + Classifier: {correct_predictions}")

# Additional calculations for precision, recall, and F1-score
print("\n================== Detector Metrics ==================\n")
true_positives = correct_predictions_detector
false_positives = total_predictions_score - correct_predictions_detector
false_negatives = len(gt_df) - correct_predictions_detector
true_negatives = 0

# Calculate and display the metrics
accuracy = (true_positives + true_negatives) / (true_positives + true_negatives + false_positives + false_negatives) if true_positives + true_negatives + false_positives + false_negatives != 0 else 0
precision = true_positives / (true_positives + false_positives) if true_positives + false_positives != 0 else 0
recall = true_positives / (true_positives + false_negatives) if true_positives + false_negatives != 0 else 0
f1_score = 2 * precision * recall / (precision + recall) if precision + recall != 0 else 0
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-Score: {f1_score}")

print("\n================== Detector + Classifier Metrics ==================\n")
true_positives = correct_predictions
false_positives = total_predictions_score - correct_predictions
false_negatives = len(gt_df) - correct_predictions
true_negatives = 0

# Calculate and display the metrics
accuracy = (true_positives + true_negatives) / (true_positives + true_negatives + false_positives + false_negatives) if true_positives + true_negatives + false_positives + false_negatives != 0 else 0
precision = true_positives / (true_positives + false_positives) if true_positives + false_positives != 0 else 0
recall = true_positives / (true_positives + false_negatives) if true_positives + false_negatives != 0 else 0
f1_score = 2 * precision * recall / (precision + recall) if precision + recall != 0 else 0
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-Score: {f1_score}")

'''
Quiero editar esta función para que funcione con predicciones que están estructuradas diferente. Esta es la función:

def analyze_predictions_BirdNET(analysis_name, prediction_conf_score=0.6, iou_threshold=0.1):
    # Variables for metrics
    correct_predictions = 0
    total_predictions = 0
    total_predictions_score = 0

    total_gt = len(gt_df)

    # Metrics
    true_positives_detector = 0
    true_positives_classifier = 0
    false_positives = 0
    false_negatives_detector = 0
    false_negatives_classifier = 0
    true_negatives = 0

    # Process the predictions
    for _, gt_annotation in gt_df.iterrows():
        # Load the predictions
        prediction_path = f"../BirdNET/Predictions/{analysis_name}/{gt_annotation['path'].replace('.WAV', '.BirdNET.results.csv')}"

        try:
            predictions_df = pd.read_csv(prediction_path)
            grouped_predictions = group_consecutive_predictions_birdnet(predictions_df)  # Group predictions
            predictions_df = pd.DataFrame(grouped_predictions)  # Convert list dict to DataFrame

            total_predictions += len(grouped_predictions)
            total_predictions_score += len([p for p in grouped_predictions if p['Confidence'] >= prediction_conf_score])

            # If the annotation if not a background
            if gt_annotation['specie'] != 'No audio':
                # Check if the GT is detected by the detector
                if is_detection_birdnet(gt_annotation, grouped_predictions, prediction_conf_score, iou_threshold):
                    true_positives_detector += 1
                else:
                    false_negatives_detector += 1
                
                if is_detection_classification_birdnet(gt_annotation, grouped_predictions, prediction_conf_score, iou_threshold):
                    correct_predictions += 1
                    true_positives_classifier += 1
                else:
                    false_negatives_classifier += 1
            else: # Annotation is a background
                # Take only grouped_predictions with confidence >= prediction_conf_score
                grouped_predictions = [p for p in grouped_predictions if p['Confidence'] >= prediction_conf_score]
                if len(grouped_predictions) == 0:
                    true_negatives += 1
                else: # Background is unique file, with no annotations, so all predictions on that file are false positives
                    false_positives += len(grouped_predictions)

        except FileNotFoundError:
            print(f"Prediction file not found: {prediction_path}")

    # Calculate and display the metrics
    print("================== Metrics ==================\n")
    print(f"Total Predictions: {total_predictions}")
    print(f"Total Predictions with Score >= {prediction_conf_score}: {total_predictions_score}")
    print(f"Total GT: {total_gt}")
    print(f"Correct Predictions Detector: {true_positives_detector}")
    print(f"Correct Predictions Detector + Classifier: {correct_predictions}")

    # Additional calculations for precision, recall, and F1-score
    print("\n================== Detector Metrics ==================\n")

    # Calculate and display the metrics
    accuracy_detector = (true_positives_detector + true_negatives) / (true_positives_detector + true_negatives + false_positives + false_negatives_detector) if true_positives_detector + true_negatives + false_positives + false_negatives_detector != 0 else 0
    precision_detector = true_positives_detector / (true_positives_detector + false_positives) if true_positives_detector + false_positives != 0 else 0
    recall_detector = true_positives_detector / (true_positives_detector + false_negatives_detector) if true_positives_detector + false_negatives_detector != 0 else 0
    f1_score_detector = 2 * precision_detector * recall_detector / (precision_detector + recall_detector) if precision_detector + recall_detector != 0 else 0
    print(f"Accuracy: {accuracy_detector}")
    print(f"Precision: {precision_detector}")
    print(f"Recall: {recall_detector}")
    print(f"F1-Score: {f1_score_detector}")

    print("\n================== Detector + Classifier Metrics ==================\n")

    # Calculate and display the metrics
    accuracy_classifier = (true_positives_classifier + true_negatives) / (true_positives_classifier + true_negatives + false_positives + false_negatives_detector) if true_positives_classifier + true_negatives + false_positives + false_negatives_classifier != 0 else 0
    precision_classifier = true_positives_classifier / (true_positives_classifier + false_positives) if true_positives_classifier + false_positives != 0 else 0
    recall_classifier = true_positives_classifier / (true_positives_classifier + false_negatives_detector) if true_positives_classifier + false_negatives_detector != 0 else 0
    f1_score_classifier = 2 * precision_classifier * recall_classifier / (precision_classifier + recall_classifier) if precision_classifier + recall_classifier != 0 else 0
    print(f"Accuracy: {accuracy_classifier}")
    print(f"Precision: {precision_classifier}")
    print(f"Recall: {recall_classifier}")
    print(f"F1-Score: {f1_score_classifier}")

    print("\n================== Other ==================\n")
    print(f"False Positives: {false_positives}")


    Y la estructura de las predicciones es, dentro de la carpeta analysis_name estan directamente los ficheros con el nombre gt_basename_no_ext:

    for _, gt_annotation in gt_df.iterrows():
    gt_basename = gt_annotation['path'].split('/')[-1]
    # Remove the extension and potential suffix _X
    gt_basename_no_ext = os.path.splitext(gt_basename)[0]

Pero hay varios ficheros para un mismo audios
'''


Total Predictions: 2652
Total Predictions with Score >= 0.6: 106
Total GT: 542
Correct Predictions Detector: 15
Correct Predictions Detector + Classifier: 3


Accuracy: 0.023696682464454975
Precision: 0.14150943396226415
Recall: 0.027675276752767528
F1-Score: 0.0462962962962963


Accuracy: 0.004651162790697674
Precision: 0.02830188679245283
Recall: 0.005535055350553505
F1-Score: 0.009259259259259257
