# DER calculation
This file is used to calculate Diarization Error Rate. It is calculated as follows:
1. For each speaker, we calculate the number of false alarms, missed detections and overlapped speech.
2. We sum up the number of false alarms, missed detections and overlapped speech.
3. We divide the sum by the total number of speaker speech segments.
4. The result is the DER.

The formula is as follows:
$$
DER = \frac{FA + MISS + OVER}{N_{spk}}
$$

Where:
- $FA$ is the number of false alarms
- $MISS$ is the number of missed detections
- $OVER$ is the number of overlapped speech
- $N_{spk}$ is the total number of speaker speech segments

The code below is used to calculate the DER.

In [22]:
import pandas as pd
from pyannote.core import Annotation, Segment
from pyannote.metrics.diarization import DiarizationErrorRate
import os
from tqdm import tqdm
import sys

# Paths to the different RTTM files
RTTM_TRUTH = "../Dataset/RTTMs/Test"
RTTM_ORACLE_VAD = "../Results/Oracle_vad/Test"
RTTM_ORACLE_DECODER = "../Results/Oracle_decoder/Test"


# Load the RTTM files
def load_rttm(filename):
    """Load RTTM file and convert it to pyannote.core.Annotation"""
    annotation = Annotation()
    with open(filename, "r") as file:
        for line in file:
            parts = line.strip().split()
            if len(parts) < 9:
                continue
            file_id, channel, start_time, duration, _, _, *speaker_parts = parts
            speaker = " ".join(speaker_parts)
            start_time = float(start_time)
            duration = float(duration)
            end_time = start_time + duration
            segment = Segment(start_time, end_time)
            annotation[segment] = speaker
    return annotation


def calculate_der_metrics(ground_truth_path, prediction_path, output_csv):
    """
    This function calculates DER metrics for diarization and saves them to a CSV file.

    Args:
        ground_truth_path: Path to the directory containing ground truth RTTM files.
        prediction_path: Path to the directory containing prediction RTTM files.
        output_csv: The name (including path) for the CSV file to store results.
    """

    # Initialize the diarization error rate metric
    der_metric = DiarizationErrorRate()

    # Store results
    results = []

    # Iterate over files in prediction directory
    for file_name in tqdm(os.listdir(prediction_path)):
        truth_file = os.path.join(ground_truth_path, file_name)
        prediction_file = os.path.join(prediction_path, file_name)

        if os.path.exists(truth_file) and os.path.exists(prediction_file):
            ground_truth = load_rttm(truth_file)
            prediction = load_rttm(prediction_file)

            # Calculate DER and components manually
            detailed = der_metric(ground_truth, prediction, detailed=True)
            print(detailed)

            der = detailed["diarization error rate"]

            # Assuming "confusion" represents combined missed speech and speaker error
            confusion_duration = detailed["confusion"]

            # Calculate False Alarm (FA) duration
            false_alarm = detailed["false alarm"]

            # Missed Speech (OVER) can be approximated by subtracting FA from confusion
            missed_speech = confusion_duration - false_alarm

            total_duration = detailed["total"]

            # Append results
            results.append(
                {
                    "File": file_name,
                    "DER": der,
                    "Miss Duration": missed_speech / total_duration,
                    "Speaker Error Duration": confusion_duration / total_duration
                    - missed_speech / total_duration,
                    "False Alarm Duration": false_alarm / total_duration,
                }
            )
        else:
            print(f"File {file_name} not found")
            sys.exit(1)  # Exit with an error if a file is missing

    # Convert results to DataFrame and save to CSV
    results_df = pd.DataFrame(results)
    results_df.to_csv(output_csv, index=False)




  2%|▏         | 4/232 [00:00<00:09, 23.13it/s]

{'missed detection': 0.0, 'confusion': 32.65, 'false alarm': 59.300000000000004, 'correct': 2430.3499999999995, 'total': 2463.0, 'diarization error rate': 0.03733252131546894}
{'missed detection': 0.040000000000020464, 'confusion': 790.1100000000001, 'false alarm': 303.5400000000001, 'correct': 3838.48, 'total': 4628.629999999999, 'diarization error rate': 0.23628805931776795}
{'missed detection': 0.0, 'confusion': 15.319999999999993, 'false alarm': 817.0699999999998, 'correct': 3930.1600000000003, 'total': 3945.48, 'diarization error rate': 0.2109730628465991}
{'missed detection': 0.0, 'confusion': 14402.389999999996, 'false alarm': 12819.560000000003, 'correct': 36996.07000000002, 'total': 51398.45999999999, 'diarization error rate': 0.5296257903446913}
{'missed detection': 863.6400000000001, 'confusion': 769.55, 'false alarm': 0.11000000000001364, 'correct': 6591.9400000000005, 'total': 8225.13, 'diarization error rate': 0.19857436903732834}
{'missed detection': 1441.7600000000004, 

  3%|▎         | 7/232 [00:03<01:55,  1.95it/s]

{'missed detection': 832.28, 'confusion': 19332.67999999999, 'false alarm': 6709.069999999997, 'correct': 119470.10000000003, 'total': 139635.0599999999, 'diarization error rate': 0.19245904287934565}
{'missed detection': 2900.4700000000003, 'confusion': 228.2799999999999, 'false alarm': 0.0, 'correct': 12714.040000000005, 'total': 15842.790000000014, 'diarization error rate': 0.19748731126272565}


  3%|▎         | 8/232 [00:04<02:02,  1.83it/s]


KeyboardInterrupt: 