Задание для выполнения в классе:

1. Определите паузы в файле cta0003.wav с помощью Praat. Используйте команду To TextGrid (silences)
2. Используя разметку на уровне .seg_R2, определите accuracy, HR1 и HR0 (с шагом в 10 мс)

In [None]:
!wget https://pkholyavin.github.io/signal/cta0003.wav
!wget https://pkholyavin.github.io/signal/cta0003.seg_R2

In [None]:
!pip install praat-parselmouth

In [1]:
from itertools import product
letters = "GBRY"
nums = "1234"
levels = [ch + num for num, ch in product(nums, letters)]
level_codes = [2 ** i for i in range(len(levels))]
code_to_level = {i: j for i, j in zip(level_codes, levels)}
level_to_code = {j: i for i, j in zip(level_codes, levels)}


def detect_encoding(file_path):
    encoding = "utf-8"
    try:
        l = open(file_path, 'r', encoding="utf-8").read()
        if l.startswith("\ufeff"):  # т.н. byte order mark
            encoding = "utf-8-sig"
    except UnicodeDecodeError:
        try:
            open(file_path, 'r', encoding="utf-16").read()
            encoding = "utf-16"
        except UnicodeError:
            encoding = "cp1251"
    return encoding


def read_seg(filename: str, encoding: str = "utf-8-sig") -> tuple[dict, list[dict]]:
    with open(filename, encoding=encoding) as f:
        lines = [line.strip() for line in f.readlines()]

    # найдём границы секций в списке строк:
    header_start = lines.index("[PARAMETERS]") + 1
    data_start = lines.index("[LABELS]") + 1

    # прочитаем параметры
    params = {}
    for line in lines[header_start:data_start - 1]:
        key, value = line.split("=")
        params[key] = int(value)

    # прочитаем метки
    labels = []
    for line in lines[data_start:]:
        # если в строке нет запятых, значит, это не метка и метки закончились
        if line.count(",") < 2:
            break
        pos, level, name = line.split(",", maxsplit=2)
        label = {
            "position": int(pos) // params["BYTE_PER_SAMPLE"] // params["N_CHANNEL"],
            "level": code_to_level[int(level)],
            "name": name
        }
        labels.append(label)
    return params, labels

In [2]:
params, labels = read_seg("cta0003.seg_R2")

In [3]:
import parselmouth

In [6]:
sound = parselmouth.Sound("cta0003.wav")
silences = parselmouth.praat.call(sound, "To TextGrid (silences)", 100, 0, -25, 0.1, 0.1, "silent", "sounding")

In [7]:
time = 0
num = parselmouth.praat.call(silences, "Get interval at time", 1, time)
label = parselmouth.praat.call(silences, "Get label of interval", 1, num)
print(label)

sounding


In [11]:
ground_truth = parselmouth.TextGrid(start_time=0, end_time=silences.xmax, tier_names=["silences"])
for i, label in enumerate(labels):
    time = label["position"] / params["SAMPLING_FREQ"]
    if time:
        parselmouth.praat.call(ground_truth, "Insert boundary", 1, time)
    name = label["name"] if not label["name"].startswith("p") else ""
    parselmouth.praat.call(ground_truth, "Set interval text", 1, i + 1, name)

In [16]:
time = 0
num_total = 0
num_hr1 = 0
num_true1 = 0
num_true0 = 0
num_hr0 = 0
while time <= silences.xmax:
    num_total += 1
    num = parselmouth.praat.call(silences, "Get interval at time", 1, time)
    label = parselmouth.praat.call(silences, "Get label of interval", 1, num)
    num = parselmouth.praat.call(ground_truth, "Get interval at time", 1, time)
    true_label = parselmouth.praat.call(ground_truth, "Get label of interval", 1, num)
    if label == "silent" and not true_label:
        num_hr0 += 1
    elif label == "sounding" and true_label:
        num_hr1 += 1
    if true_label:
        num_true1 += 1
    else:
        num_true0 += 1
    time += 0.01

In [18]:
print("HR1 =", num_hr1 / num_true1)
print("HR0 =", num_hr0 / num_true0)
print("Accuracy =", (num_hr0 + num_hr1) / num_total)

HR1 = 0.8894472361809045
HR0 = 1.0
Accuracy = 0.908256880733945
