## imports and setting up the environment

In [24]:
# database related imports
from pyannote.database import registry, FileFinder

# training related imports
from pyannote.audio import Pipeline, Model
from pyannote.audio import Inference
from pyannote.audio.tasks import SpeakerDiarization, Segmentation

# metrics related imports
from pyannote.metrics.diarization import DiarizationErrorRate

import os

huggingface_token = os.getenv("HUGGINGFACE_TOKEN")


## Data preparation

In [28]:
registry.load_database("database.yml")

protocol = registry.get_protocol("BP.SpeakerDiarization.VlaamseAudio", {"audio":FileFinder()})

for file in protocol.train():
   print(file["uri"])
   assert "annotation" in file
   assert "annotated" in file


file1
file2
file3
file4




In [29]:
database = registry.get_database("BP")

database.get_tasks()

['SpeakerDiarization']

In [26]:
protocol

<pyannote.database.custom.BP__SpeakerDiarization__VlaamseAudio at 0x24f4e9f3590>

## pretrained pyannote pipeline

In [17]:
# get the model from the pipeline
pretrained_pipeline = Pipeline.from_pretrained(
    "pyannote/speaker-diarization-3.1",
    use_auth_token=huggingface_token)


current error rate of the pipeline

In [20]:
metric = DiarizationErrorRate()

for file in protocol.test():
    print(file)
    file["pretrained pipeline"] = pretrained_pipeline(file)
    metric(file["annotation"], file["pretrained pipeline"], uem=file["annotated"])

print(f"Diarization error rate is {100 * abs(metric):.1f}% for the pretrained model")

<pyannote.database.protocol.protocol.ProtocolFile object at 0x0000024F5E1CA7E0>
<pyannote.database.protocol.protocol.ProtocolFile object at 0x0000024F4EA1F6E0>
<pyannote.database.protocol.protocol.ProtocolFile object at 0x0000024F7C24F9E0>
<pyannote.database.protocol.protocol.ProtocolFile object at 0x0000024F4E51E420>
Diarization error rate is 25.0% for the pretrained model


## train the model

In [None]:
pretrained_segm_model = pretrained_pipeline.segmentation_model
output_dir = "./models"

task = Segmentation(
    protocol,
    # pretrained_segm_model.specifications.duration,
    vad_loss="bce",
    loss="bce"
)

TypeError: __str__ returned non-string (type NoneType)