In [6]:
# Union between the diarization and the transcription
from pathlib import Path


def process_diarization(dia: list[str]):
    dia = [
        line.replace("ms", "")
        .replace("start=", "")
        .replace("stop=", "")
        .replace("speaker_SPEAKER_", "")
        .strip()
        .split()
        for line in dia
    ]
    return [(int(start), int(stop), speaker) for start, stop, speaker in dia]


def process_transcription(trans: list[str]):
    trans = [line.strip().split("\t") for line in trans]
    return [(int(t[0]), int(t[1]), t[2]) for t in trans if len(t) == 3]


def intersection_1d(start_a: int, stop_a: int, start_b: int, stop_b: int) -> int:
    return max(0, min(stop_a, stop_b) - max(start_a, start_b))


def union_1d(
    start_a: int, stop_a: int, start_b: int, stop_b: int, intersection: int
) -> int:
    return (stop_a - start_a) + (stop_b - start_b) - intersection


def iou_1d(start_a: int, stop_a: int, start_b: int, stop_b: int) -> float:
    intersection = intersection_1d(start_a, stop_a, start_b, stop_b)
    union = union_1d(start_a, stop_a, start_b, stop_b, intersection)
    return intersection / union


root = Path("transcricoes/rpguaxa/o_corvo_rpguaxa_02")

diarization_path = root / "diarization.txt"
transcription_path = root / (root.stem + ".tsv")

with open(diarization_path) as f:
    diarization = f.readlines()
diarization = process_diarization(diarization)

with open(transcription_path) as f:
    transcription = f.readlines()[1:]
    transcription = process_transcription(transcription)
for idx_t, (start_t, stop_t, text) in enumerate(transcription):
    speakers = set()
    for idx_d, (start_d, stop_d, speaker) in enumerate(diarization):
        if intersection_1d(start_t, stop_t, start_d, stop_d) > 0:
            speakers.add(speaker)

    print(speakers, start_t, stop_t, text)