In [185]:
import glob
import os

import pandas as pd

In [186]:
# Ground Truth CSV
PATH = "../Data/"
gt_csv = PATH + "Dataset/CSVs/test_with_bg.csv"

# Read the Ground Truth CSV
gt_df = pd.read_csv(gt_csv)
print("Número de Ground Truths: ", len(gt_df))

Número de Ground Truths:  542


### Functions to group predictions and ground truth annotations (if same specie and overlapping times)

In [187]:
def group_consecutive_predictions_birdnet(predictions_df):
    grouped_predictions = []
    current_group = []
    last_end = None
    for _, row in predictions_df.iterrows():
        if current_group and (row['Start (s)'] != last_end or row['Scientific name'] != current_group[-1]['Scientific name']):
            # New group starts here
            grouped_predictions.append(current_group)
            current_group = []
        current_group.append(row)
        last_end = row['End (s)']
    if current_group:  # Add last group
        grouped_predictions.append(current_group)
    # Combine groups in unique predictions
    combined_predictions = []
    for group in grouped_predictions:
        combined_prediction = {
            'Start (s)': group[0]['Start (s)'],
            'End (s)': group[-1]['End (s)'],
            'Scientific name': group[0]['Scientific name'],
            'Confidence': max(item['Confidence'] for item in group)  # conf = max confidence in group
        }
        combined_predictions.append(combined_prediction)
    return combined_predictions

def group_consecutive_annotations_gt(annotations_df):
    annotations_df.sort_values(by=['path', 'specie', 'start_time'], inplace=True)
    grouped_predictions = []
    current_group = []
    last_end = None
    last_path = None
    last_specie = None

    for _, row in annotations_df.iterrows():
        if current_group and (row['path'] != last_path or row['specie'] != last_specie or row['start_time'] - last_end >= 1):
            # New group starts here
            grouped_predictions.append(current_group)
            current_group = []
        current_group.append(row)
        last_end = row['end_time']
        last_path = row['path']
        last_specie = row['specie']
    if current_group:  # Add last group
        grouped_predictions.append(current_group)

    # Combine groups into unique predictions
    combined_annotations = []
    for group in grouped_predictions:
        combined_prediction = {
            'path': group[0]['path'],
            'start_time': group[0]['start_time'],
            'end_time': group[-1]['end_time'],
            'specie': group[0]['specie'],
            # Assuming bbox or confidence needs to be handled here. Adjust as necessary.
            # 'Confidence': max(item['Confidence'] for item in group)  # Example for confidence
        }
        combined_annotations.append(combined_prediction)
    
    return combined_annotations

In [188]:
gt_df = group_consecutive_annotations_gt(gt_df)
gt_df = pd.DataFrame(gt_df)  # Convert list dict to DataFrame
print("Número de Ground Truths agrupados: ", len(gt_df))

Número de Ground Truths agrupados:  469


### Auxiliar Functions

In [189]:
# Function to calculate the IoU
def calculate_iou(interval1, interval2):
    start_max = max(interval1[0], interval2[0])
    end_min = min(interval1[1], interval2[1])
    intersection = max(0, end_min - start_max)
    union = (interval1[1] - interval1[0]) + (interval2[1] - interval2[0]) - intersection
    return intersection / union if union != 0 else 0

In [190]:
def is_detection_birdnet(gt_row, grouped_predictions, confidence_threshold, iou_threshold=0):
    for prediction in grouped_predictions:
        if prediction['Confidence'] >= confidence_threshold:
            iou = calculate_iou((prediction['Start (s)'], prediction['End (s)']), (gt_row['start_time'], gt_row['end_time']))
            if iou >= iou_threshold:
                return True
    return False

def is_detection_classification_birdnet(gt_row, grouped_predictions, confidence_threshold, iou_threshold=0):
    for prediction in grouped_predictions:
        if prediction['Confidence'] >= confidence_threshold:
            iou = calculate_iou((prediction['Start (s)'], prediction['End (s)']), (gt_row['start_time'], gt_row['end_time']))
            if (prediction['Scientific name'].lower() == gt_row['specie'].lower() and
                iou >= iou_threshold):
                return True
    return False

In [191]:
def display_metrics(total_predictions, prediction_conf_score, total_predictions_score, total_gt, true_positives_detector, true_positives_classifier, correct_predictions, false_positives, false_negatives_detector, false_negatives_classifier, true_negatives):
    print("================== Metrics ==================\n")
    print(f"Total Predictions: {total_predictions}")
    print(f"Total Predictions with Score >= {prediction_conf_score}: {total_predictions_score}")
    print(f"Total GT: {total_gt}")
    print(f"Correct Predictions Detector: {true_positives_detector}")
    print(f"Correct Predictions Detector + Classifier: {correct_predictions}")

    # Additional calculations for precision, recall, and F1-score
    print("\n================== Detector Metrics ==================\n")

    # Calculate and display the metrics
    accuracy_detector = (true_positives_detector + true_negatives) / (true_positives_detector + true_negatives + false_positives + false_negatives_detector) if true_positives_detector + true_negatives + false_positives + false_negatives_detector != 0 else 0
    precision_detector = true_positives_detector / (true_positives_detector + false_positives) if true_positives_detector + false_positives != 0 else 0
    recall_detector = true_positives_detector / (true_positives_detector + false_negatives_detector) if true_positives_detector + false_negatives_detector != 0 else 0
    f1_score_detector = 2 * precision_detector * recall_detector / (precision_detector + recall_detector) if precision_detector + recall_detector != 0 else 0
    print(f"Accuracy: {accuracy_detector}")
    print(f"Precision: {precision_detector}")
    print(f"Recall: {recall_detector}")
    print(f"F1-Score: {f1_score_detector}")

    print("\n================== Detector + Classifier Metrics ==================\n")

    # Calculate and display the metrics
    accuracy_classifier = (true_positives_classifier + true_negatives) / (true_positives_classifier + true_negatives + false_positives + false_negatives_detector) if true_positives_classifier + true_negatives + false_positives + false_negatives_classifier != 0 else 0
    precision_classifier = true_positives_classifier / (true_positives_classifier + false_positives) if true_positives_classifier + false_positives != 0 else 0
    recall_classifier = true_positives_classifier / (true_positives_classifier + false_negatives_detector) if true_positives_classifier + false_negatives_detector != 0 else 0
    f1_score_classifier = 2 * precision_classifier * recall_classifier / (precision_classifier + recall_classifier) if precision_classifier + recall_classifier != 0 else 0
    print(f"Accuracy: {accuracy_classifier}")
    print(f"Precision: {precision_classifier}")
    print(f"Recall: {recall_classifier}")
    print(f"F1-Score: {f1_score_classifier}")

    print("\n================== Other ==================\n")
    print(f"False Positives: {false_positives}")

def analyze_predictions_BirdNET(analysis_name, prediction_conf_score=0.6, iou_threshold=0.1):
    # Variables for metrics
    correct_predictions = 0
    total_predictions = 0
    total_predictions_score = 0

    total_gt = len(gt_df)

    # Metrics
    true_positives_detector = 0
    true_positives_classifier = 0
    false_positives = 0
    false_negatives_detector = 0
    false_negatives_classifier = 0
    true_negatives = 0

    last_file = None

    # Process the predictions
    for _, gt_annotation in gt_df.iterrows():
        # Load the predictions
        prediction_path = f"../BirdNET/Predictions/{analysis_name}/{gt_annotation['path'].replace('.WAV', '.BirdNET.results.csv')}"

        current_file = gt_annotation['path']

        try:
            predictions_df = pd.read_csv(prediction_path)
            grouped_predictions = group_consecutive_predictions_birdnet(predictions_df)  # Group predictions

            if current_file != last_file:
                total_predictions += len(grouped_predictions)
                current_predictions_score = len([p for p in grouped_predictions if p['Confidence'] >= prediction_conf_score])
                total_predictions_score += current_predictions_score

            # If the annotation if not a background
            if gt_annotation['specie'] != 'No audio':
                # Check if the GT is detected by the detector
                if is_detection_birdnet(gt_annotation, grouped_predictions, prediction_conf_score, iou_threshold):
                    true_positives_detector += 1
                else:
                    false_negatives_detector += 1
                
                if is_detection_classification_birdnet(gt_annotation, grouped_predictions, prediction_conf_score, iou_threshold):
                    correct_predictions += 1
                    true_positives_classifier += 1
                else:
                    false_negatives_classifier += 1
            else: # Annotation is a background
                # Take only grouped_predictions with confidence >= prediction_conf_score
                if current_predictions_score == 0:
                    true_negatives += 1
                else: # Background is unique file, with no annotations, so all predictions on that file are false positives
                    false_positives += current_predictions_score

            last_file = current_file

        except FileNotFoundError:
            print(f"Prediction file not found: {prediction_path}")

    # Calculate and display the metrics
    display_metrics(total_predictions, prediction_conf_score, total_predictions_score, total_gt, true_positives_detector, true_positives_classifier, correct_predictions, false_positives, false_negatives_detector, false_negatives_classifier, true_negatives)

In [192]:
def analyze_predictions_YOLO_BirdNET(analysis_name, prediction_conf_score=0.6, iou_threshold=0.1):
    # Variables for metrics
    correct_predictions = 0
    total_predictions = 0
    total_predictions_score = 0
    total_gt = len(gt_df)
    # Metrics
    true_positives_detector = 0
    true_positives_classifier = 0
    false_positives = 0
    false_negatives_detector = 0
    false_negatives_classifier = 0
    true_negatives = 0

    last_file = None

    # Process the predictions
    for _, gt_annotation in gt_df.iterrows():
        gt_basename = gt_annotation['path'].split('/')[-1]
        gt_basename_no_ext = os.path.splitext(gt_basename)[0]
        # Assuming the structure of the file name is consistent with the example given
        prediction_files = os.listdir(f"../BirdNET/Predictions/{analysis_name}")
        prediction_files_filtered = [file for file in prediction_files if file.startswith(gt_basename_no_ext)]

        current_file = gt_annotation['path']
        
        # predictions_df initialization
        predictions_df = pd.DataFrame(columns=['Start (s)', 'End (s)', 'Scientific name', 'Common name', 'Confidence'])

        for prediction_file in prediction_files_filtered:
            start_time, end_time = prediction_file.split('_')[-2:]
            end_time = end_time.split('.BirdNET')[0]
            start_time = float(start_time)
            end_time = float(end_time)
            prediction_path = f"../BirdNET/Predictions/{analysis_name}/{prediction_file}"

            # Create prediction DataFrame and append all of current prediction file
            current_prediction_df = pd.read_csv(prediction_path)

            # Set start and end times
            current_prediction_df['Start (s)'] = float(start_time)
            current_prediction_df['End (s)'] = float(end_time)

            # If current_prediction_df is not empty, concatenate it to predictions_df
            if not current_prediction_df.empty:
                if predictions_df.empty:
                    predictions_df = current_prediction_df
                else:
                    predictions_df = pd.concat([predictions_df, current_prediction_df], ignore_index=True)

        grouped_predictions = group_consecutive_predictions_birdnet(predictions_df)  # Group predictions
        
        try:
            if current_file != last_file:
                total_predictions += len(grouped_predictions)
                current_predictions_score = len([p for p in grouped_predictions if p['Confidence'] >= prediction_conf_score])
                total_predictions_score += current_predictions_score
            # If the annotation if not a background
            if gt_annotation['specie'] != 'No audio':
                # Check if the GT is detected by the detector
                if is_detection_birdnet(gt_annotation, grouped_predictions, prediction_conf_score, iou_threshold):
                    true_positives_detector += 1
                else:
                    false_negatives_detector += 1
                
                if is_detection_classification_birdnet(gt_annotation, grouped_predictions, prediction_conf_score, iou_threshold):
                    correct_predictions += 1
                    true_positives_classifier += 1
                else:
                    false_negatives_classifier += 1
            else: # Annotation is a background
                # Take only grouped_predictions with confidence >= prediction_conf_score
                if current_predictions_score == 0:
                    true_negatives += 1
                else: # Background is unique file, with no annotations, so all predictions on that file are false positives
                    false_positives += current_predictions_score
            
            last_file = current_file

        except FileNotFoundError:
            print(f"Prediction file not found: {prediction_path}")

    # Calculate and display the metrics
    display_metrics(total_predictions, prediction_conf_score, total_predictions_score, total_gt, true_positives_detector, true_positives_classifier, correct_predictions, false_positives, false_negatives_detector, false_negatives_classifier, true_negatives)

## Métricas de BirdNET Base con la lista de especies de Doñana de BIRDeep

In [193]:
analyze_predictions_BirdNET("0_BirdNet_Base_AllTest_DonanaSpecies", 0.6, 0.2)


Total Predictions: 194
Total Predictions with Score >= 0.6: 27
Total GT: 469
Correct Predictions Detector: 35
Correct Predictions Detector + Classifier: 9


Accuracy: 0.11940298507462686
Precision: 1.0
Recall: 0.078125
F1-Score: 0.14492753623188406


Accuracy: 0.06772009029345373
Precision: 1.0
Recall: 0.02132701421800948
F1-Score: 0.04176334106728538


False Positives: 0


## Métricas de BirdNET Base con la lista de especies del customClassifier

In [194]:
analyze_predictions_BirdNET("1_BirdNet_Base_AllTest_ClassifierSpecies", 0.6, 0.2)


Total Predictions: 160
Total Predictions with Score >= 0.6: 39
Total GT: 469
Correct Predictions Detector: 48
Correct Predictions Detector + Classifier: 19


Accuracy: 0.14498933901918976
Precision: 0.9795918367346939
Recall: 0.10714285714285714
F1-Score: 0.19315895372233396


Accuracy: 0.08863636363636364
Precision: 0.95
Recall: 0.045346062052505964
F1-Score: 0.08656036446469248


False Positives: 1


In [195]:
analyze_predictions_BirdNET("1_BirdNet_Base_AllTest_ClassifierSpecies", 0.1, 0.2)


Total Predictions: 160
Total Predictions with Score >= 0.1: 160
Total GT: 469
Correct Predictions Detector: 119
Correct Predictions Detector + Classifier: 54


Accuracy: 0.2902542372881356
Precision: 0.952
Recall: 0.265625
F1-Score: 0.41535776614310643


Accuracy: 0.1769041769041769
Precision: 0.9
Recall: 0.1409921671018277
F1-Score: 0.24379232505643345


False Positives: 6


## Métricas de BirdNET Fine Tuned

In [196]:
analyze_predictions_BirdNET("2_BirdNet_FineTuning_AllTest", 0.6, 0.2)


Total Predictions: 527
Total Predictions with Score >= 0.6: 10
Total GT: 469
Correct Predictions Detector: 11
Correct Predictions Detector + Classifier: 5


Accuracy: 0.06823027718550106
Precision: 1.0
Recall: 0.024553571428571428
F1-Score: 0.04793028322440087


Accuracy: 0.056155507559395246
Precision: 1.0
Recall: 0.011312217194570135
F1-Score: 0.02237136465324385


False Positives: 0


In [197]:
analyze_predictions_BirdNET("2_BirdNet_FineTuning_AllTest", 0.1, 0.2)


Total Predictions: 527
Total Predictions with Score >= 0.1: 527
Total GT: 469
Correct Predictions Detector: 215
Correct Predictions Detector + Classifier: 85


Accuracy: 0.45708582834331335
Precision: 0.8464566929133859
Recall: 0.4799107142857143
F1-Score: 0.6125356125356126


Accuracy: 0.2668463611859838
Precision: 0.6854838709677419
Recall: 0.2672955974842767
F1-Score: 0.3846153846153846


False Positives: 39


Baja los scores de las clases con las que ha sido entrenado, teniendo que bajar la threshold mucho. Haciendo que haya muchos más Falsos Positivos (en los casos anteriores con 0.6 -> 0, 6... FP, ahora hay que bajar hasta 0.1 el accuracy es mejor pero tenemos muchos Falsos Positivos)

## Métricas de BirdNET customClassifier con los recortes de YOLOv8 para test

In [198]:
analyze_predictions_YOLO_BirdNET("3_BirdNET_FineTuning_DetectorTest", 0.2, 0.2)


Total Predictions: 79
Total Predictions with Score >= 0.2: 37
Total GT: 469
Correct Predictions Detector: 15
Correct Predictions Detector + Classifier: 7


Accuracy: 0.0767590618336887
Precision: 1.0
Recall: 0.033482142857142856
F1-Score: 0.06479481641468683


Accuracy: 0.06073752711496746
Precision: 1.0
Recall: 0.015909090909090907
F1-Score: 0.03131991051454139


False Positives: 0


In [199]:
# Hacer con las predicciones de yolo como detectorr

# Correr inference.py desde la VPN, intentar encontrar mejores valores de iou y conf y poner esos para las métricas aquí, luego coger esos valores y ponerlos

# predictionms de val de yolov8:
'''
[{"image_id": "AM4_20230531_110000", "category_id": 0, "bbox": [301.605, 0.0, 31.041, 459.45], "score": 0.5276}, {"image_id": "AM15_20230712_074000", "category_id": 0, "bbox": [61.011, 0.439, 175.454, 460.228], "score": 0.48407}, {"image_id": "AM15_20230330_070000", "category_id": 0, "bbox": [57.914, 0.573, 30.844, 461.131], "score": 0.65271}, {"image_id": "AM15_20230330_070000", "category_id": 0, "bbox": [231.294, 1.093, 31.147, 460.902], "score": 0.53149}, {"image_id": "AM15_20230330_070000", "category_id": 0, "bbox": [620.696, 2.401, 34.881, 459.599], "score": 0.44922}, {"image_id": "AM8_20230304_093000", "category_id": 0, "bbox": [765.516, 0.0, 162.609, 460.58], "score": 0.63678}, 
'''

# Y si se hace predict sobre la carpeta test??


# Copilot:
'''
Ahoar quiero modificar estas funciones:

def group_consecutive_predictions_birdnet(predictions_df):
    grouped_predictions = []
    current_group = []
    last_end = None
    for _, row in predictions_df.iterrows():
        if current_group and (row['Start (s)'] != last_end or row['Scientific name'] != current_group[-1]['Scientific name']):
            # New group starts here
            grouped_predictions.append(current_group)
            current_group = []
        current_group.append(row)
        last_end = row['End (s)']
    if current_group:  # Add last group
        grouped_predictions.append(current_group)
    # Combine groups in unique predictions
    combined_predictions = []
    for group in grouped_predictions:
        combined_prediction = {
            'Start (s)': group[0]['Start (s)'],
            'End (s)': group[-1]['End (s)'],
            'Scientific name': group[0]['Scientific name'],
            'Confidence': max(item['Confidence'] for item in group)  # conf = max confidence in group
        }
        combined_predictions.append(combined_prediction)
    return combined_predictions

# Function to calculate the IoU
def calculate_iou(interval1, interval2):
    start_max = max(interval1[0], interval2[0])
    end_min = min(interval1[1], interval2[1])
    intersection = max(0, end_min - start_max)
    union = (interval1[1] - interval1[0]) + (interval2[1] - interval2[0]) - intersection
    return intersection / union if union != 0 else 0

# Function to check if a prediction is correct
def is_prediction_correct_detector(prediction, gt_annotation, iou_threshold):
    iou = calculate_iou((prediction['Start (s)'], prediction['End (s)']), (gt_annotation['start_time'], gt_annotation['end_time']))
    return iou >= iou_threshold

# Function to check if a prediction is correct
def is_prediction_correct(prediction, gt_annotation, iou_threshold):
    iou = calculate_iou((prediction['Start (s)'], prediction['End (s)']), (gt_annotation['start_time'], gt_annotation['end_time']))
    prediction_class = prediction['Scientific name'].lower()
    gt_class = gt_annotation['specie'].lower()
    scientific_name_matches = prediction_class == gt_class
    return iou >= iou_threshold and scientific_name_matches

    Y este codigo:

    # Variables for metrics
correct_predictions = 0
total_predictions = 0
total_predictions_score = 0
correct_predictions_detector = 0
iou_threshold = 0.4  # This value is editable
prediction_conf_score = 0.4  # This value is editable

# Process the predictions
for _, gt_annotation in gt_df.iterrows():
    prediction_path = PATH + f"Dataset/BirdNET_Predictions/{gt_annotation['path'].replace('.WAV', '.BirdNET.results.csv')}"
    try:
        predictions_df = pd.read_csv(prediction_path)
        grouped_predictions = group_consecutive_predictions_birdnet(predictions_df)  # Group predictions
        predictions_df = pd.DataFrame(grouped_predictions)  # Convert list dict to DataFrame

        for _, prediction in predictions_df.iterrows():
            total_predictions += 1
            if prediction['Confidence'] >= prediction_conf_score:
                total_predictions_score += 1
                if is_prediction_correct(prediction, gt_annotation, iou_threshold):
                    correct_predictions += 1
                if is_prediction_correct_detector(prediction, gt_annotation, iou_threshold):
                    correct_predictions_detector += 1
    except FileNotFoundError:
        print(f"Prediction file not found: {prediction_path}")

# Calculate and display the metrics
print("================== Metrics ==================\n")
print(f"Total Predictions: {total_predictions}")
print(f"Total Predictions with Score >= {prediction_conf_score}: {total_predictions_score}")
print(f"Total GT: {len(gt_df)}")
print(f"Correct Predictions Detector: {correct_predictions_detector}")
print(f"Correct Predictions Detector + Classifier: {correct_predictions}")

# Additional calculations for precision, recall, and F1-score
print("\n================== Detector Metrics ==================\n")
true_positives = correct_predictions_detector
false_positives = total_predictions_score - correct_predictions_detector
false_negatives = len(gt_df) - correct_predictions_detector
true_negatives = 0

# Calculate and display the metrics
accuracy = (true_positives + true_negatives) / (true_positives + true_negatives + false_positives + false_negatives) if true_positives + true_negatives + false_positives + false_negatives != 0 else 0
precision = true_positives / (true_positives + false_positives) if true_positives + false_positives != 0 else 0
recall = true_positives / (true_positives + false_negatives) if true_positives + false_negatives != 0 else 0
f1_score = 2 * precision * recall / (precision + recall) if precision + recall != 0 else 0
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-Score: {f1_score}")

print("\n================== Detector + Classifier Metrics ==================\n")
true_positives = correct_predictions
false_positives = total_predictions_score - correct_predictions
false_negatives = len(gt_df) - correct_predictions
true_negatives = 0

# Calculate and display the metrics
accuracy = (true_positives + true_negatives) / (true_positives + true_negatives + false_positives + false_negatives) if true_positives + true_negatives + false_positives + false_negatives != 0 else 0
precision = true_positives / (true_positives + false_positives) if true_positives + false_positives != 0 else 0
recall = true_positives / (true_positives + false_negatives) if true_positives + false_negatives != 0 else 0
f1_score = 2 * precision * recall / (precision + recall) if precision + recall != 0 else 0
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-Score: {f1_score}")

Para que funcione sobre el fichero predictions.json con la estructura de datos:
[{"image_id": "AM4_20230531_110000", "category_id": 0, "bbox": [301.605, 0.0, 31.041, 459.45], "score": 0.5276}, {"image_id": "AM15_20230712_074000", "category_id": 0, "bbox": [61.011, 0.439, 175.454, 460.228], "score": 0.48407}, {"image_id": "AM15_20230330_070000", "category_id": 0, "bbox": [57.914, 0.573, 30.844, 461.131], "score": 0.65271}, {"image_id": "AM15_20230330_070000", "category_id": 0, "bbox": [231.294, 1.093, 31.147, 460.902], "score": 0.53149}, {"image_id": "AM15_20230330_070000", "category_id": 0, "bbox": [620.696, 2.401, 34.881, 459.599], "score": 0.44922}, {"image_id": "AM8_20230304_093000", "category_id": 0, "bbox": [765.516, 0.0, 162.609, 460.58], "score": 0.63678}, ...

En la que 
'''

