In [2]:
import os
from huggingface_hub import login

# To save your Huggingface token, run your terminal:
# echo 'export HF_TOKEN="hf_*******************************"' >> $HOME/.bashrc

# Otherwise, the login function will prompt a login interface
login(token=os.environ.get("HF_TOKEN"))

# FROM https://github.com/yinruiqing/pyannote-whisper
from pyannote.audio import Pipeline
import torch
import utils # CREDIT: https://github.com/yinruiqing/pyannote-whisper
import whisper
from IPython.display import clear_output

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [3]:
%%time

device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu")

# Initialize Pyannote pipeline
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1").to(device)

# Load audio file
audio_file = "241118_1543.wav"

model = whisper.load_model("base.en")
asr_result = model.transcribe(audio_file)
diarization_result = pipeline(audio_file)
final_result = utils.diarize_text(asr_result, diarization_result)

clear_output()
for seg, spk, sent in final_result:
    line = f'{seg.start:.2f} {seg.end:.2f} {spk} {sent}'
    print(line)

0.00 14.50 SPEAKER_00  So welcome to this interview today. I'm sitting here with Natalie and we're going to have just a little interview with two questions. My name is Lars and I'm handing over to my interviewee to introduce yourself.
14.50 21.00 SPEAKER_01  Okay, thank you. My name is Natalie. I'm an employee at GAG and we are testing this new device.
21.00 29.00 SPEAKER_00  Okay, thank you. My first question to you is when is your next field trip?
29.00 34.00 SPEAKER_01  That is a great question Lars. I am not sure. We are hoping for me.
34.00 39.00 SPEAKER_00  Can you open for me? When was your last field trip?
39.00 41.00 SPEAKER_01  I last went in July.
41.00 44.00 SPEAKER_00  Okay, thank you very much for the interview.
CPU times: user 15.3 s, sys: 2.48 s, total: 17.8 s
Wall time: 9.11 s
