In [1]:
import pandas as pd

# -----------------------------
# Load and preprocess files, grouped by audio_name
# -----------------------------
def load_and_group(file_path):
    df = pd.read_csv(file_path)
    grouped = {}
    for audio_name, group in df.groupby("audio_name"):
        intervals = [(row["start"], row["end"], row["label"]) for _, row in group.iterrows()]
        max_end = max(row["end"] for _, row in group.iterrows())
        grouped[audio_name] = (intervals, max_end)
    return grouped

In [None]:
# -----------------------------
# Find matching time between intervals of one audio
# -----------------------------
def find_matching_times(file1_intervals, file2_intervals):
    non_speech_time = 0.0  # Both labels = 0
    language_time = 0.0    # Both labels = 1
    non_matching_time = 0.0
    i = j = 0
    
    while i < len(file1_intervals) and j < len(file2_intervals):
        # Current intervals
        start1, end1, label1 = file1_intervals[i]
        start2, end2, label2 = file2_intervals[j]
        
        # Find overlapping interval
        overlap_start = max(start1, start2)
        overlap_end = min(end1, end2)
        
        if overlap_start < overlap_end:  # If there is an overlap
            if label1 == label2:
                if label1 == 0:  # NON_SPEECH match
                    non_speech_time += overlap_end - overlap_start
                else:            # Language match
                    language_time += overlap_end - overlap_start
            else:
                non_matching_time+=overlap_end - overlap_start
            # Move the pointer that ends first
            if end1 < end2:
                i += 1
            else:
                j += 1
        else:
            # No overlap, move the pointer that starts first
            if start1 < start2:
                i += 1
            else:
                j += 1
                
    return non_speech_time, language_time



In [None]:
# -----------------------------
# Global matching across all audios
# -----------------------------
def calculate_global_match(true_grouped, pred_grouped):
    total_non_speech = 0
    total_language = 0
    total_duration = 0

    for audio_name in true_grouped:
        if audio_name not in pred_grouped:
            print(f"⚠️ Skipping {audio_name}: not found in predictions.")
            continue
        
        true_intervals, true_duration = true_grouped[audio_name]
        pred_intervals, pred_duration = pred_grouped[audio_name]
        non_speech, language = find_matching_times(true_intervals, pred_intervals)
        
        total_non_speech += non_speech
        total_language += language
        total_duration += max(true_duration, pred_duration)

    return total_non_speech, total_language, total_duration



In [4]:
# -----------------------------
# File paths
# -----------------------------
true_file = "/home/teaching/Desktop/priyam/labels/TrueLabelUp.csv"
pred_file = "/home/teaching/Desktop/priyam/labels/vadPred.csv"



In [5]:
# -----------------------------
# Load and calculate
# -----------------------------
true_grouped = load_and_group(true_file)
pred_grouped = load_and_group(pred_file)



In [9]:
pred_grouped

{'TTS_P10040TT_VCST_ECxxx_01_AO_35259847_v001_R004_CRR_MERLIon-CCS.wav': ([(0.0,
    2060.0,
    0),
   (2060.0, 2930.0, 1),
   (2930.0, 4870.0, 0),
   (4870.0, 5230.0, 1),
   (5230.0, 5890.0, 0),
   (5890.0, 11340.0, 1),
   (11340.0, 12300.0, 0),
   (12300.0, 13530.0, 1),
   (13530.0, 14260.0, 0),
   (14260.0, 15250.0, 1),
   (15250.0, 17430.0, 0),
   (17430.0, 18540.0, 1),
   (18540.0, 18800.0, 0),
   (18800.0, 20600.0, 1),
   (20600.0, 20970.0, 0),
   (20970.0, 22040.0, 1),
   (22040.0, 30070.0, 0),
   (30070.0, 31500.0, 1),
   (31500.0, 32630.000000000004, 0),
   (32630.000000000004, 33780.0, 1),
   (33780.0, 34420.0, 0),
   (34420.0, 34880.0, 1),
   (34880.0, 36280.0, 0),
   (36280.0, 37850.0, 1),
   (37850.0, 38050.0, 0),
   (38050.0, 38250.0, 1),
   (38250.0, 38730.0, 0),
   (38730.0, 39210.0, 1),
   (39210.0, 39720.0, 0),
   (39720.0, 40160.0, 1),
   (40160.0, 40500.0, 0),
   (40500.0, 41160.0, 1),
   (41160.0, 41320.0, 0),
   (41320.0, 45040.0, 1),
   (45040.0, 46300.0, 0),
  

In [10]:
non_speech_match, language_match, total_duration = calculate_global_match(true_grouped, pred_grouped)



In [11]:
# -----------------------------
# Report
# -----------------------------
print("=== Aggregated Results Across All Files ===")
print(f"NON_SPEECH matching time: {non_speech_match:.2f} ms")
print(f"Language matching time: {language_match:.2f} ms")
print(f"Combined matching time: {non_speech_match + language_match:.2f} ms")
print(f"\nMatching ratio (of total duration): {100*(non_speech_match + language_match)/total_duration:.1f}%")
print(f"LDER: {100 - 100*(non_speech_match + language_match)/total_duration:.3f}%")


=== Aggregated Results Across All Files ===
NON_SPEECH matching time: 21838916.00 ms
Language matching time: 48074880.00 ms
Combined matching time: 69913796.00 ms

Matching ratio (of total duration): 67.9%
LDER: 32.067%
