In [1]:
import os, sys
from pathlib import Path
import pandas as pd

ROOT = Path.cwd().resolve().parents[0] if Path.cwd().name == "notebooks" else Path.cwd()
AUDIO_DIR = ROOT / "audios-wav"
DATASET_CSV = ROOT / "data" / "cleaned_dataset.csv"

print("ROOT:", ROOT)
print("AUDIO_DIR exists:", AUDIO_DIR.exists())
print("DATASET_CSV exists:", DATASET_CSV.exists())


ROOT: /Users/s.n.h/Voice-AI/Audio-AI
AUDIO_DIR exists: True
DATASET_CSV exists: True


In [2]:
df = pd.read_csv(DATASET_CSV)
print("Total rows:", len(df))
display(df.head())


Total rows: 60


Unnamed: 0,audio,speaker_count,language
0,../audios-wav/audios-ar/3_speakers_ar/three_sp...,3 Speakers,ar
1,../audios-wav/audios-en/2_speakers_en/two_spea...,2 Speakers,en
2,../audios-wav/audios-ar/2_speakers_ar/two_spea...,2 Speakers,ar
3,../audios-wav/audios-ar/3_speakers_ar/three_sp...,3 Speakers,ar
4,../audios-wav/audios-en/2_speakers_en/two_spea...,2 Speakers,en


In [8]:
from dotenv import load_dotenv
load_dotenv()

from pathlib import Path
import os
from pyannote.audio import Pipeline as PNA_Pipeline

row = df.iloc[0]
audio_path = Path(row["audio"]).resolve()
print(f"Testing: {audio_path.name} | language={row['language']} | true={row['speaker_count']}")

token = os.environ.get("HUGGINGFACE_TOKEN")
pipe = PNA_Pipeline.from_pretrained(
    "pyannote/speaker-diarization-3.1",
    use_auth_token=token
)

diarization = pipe(str(audio_path))

speakers = {spk for _, _, spk in diarization.itertracks(yield_label=True)}
print("Predicted speaker count:", len(speakers))


Testing: three_speakers7_ar.wav | language=ar | true=3 Speakers


  std = sequences.std(dim=-1, correction=1)


Predicted speaker count: 3


In [5]:
from dotenv import load_dotenv
load_dotenv()

import os
from pyannote.audio import Pipeline as PNA_Pipeline

# (Re)create the global pipeline only if missing
if 'pipe' not in globals() or pipe is None:
    token = os.environ.get("HUGGINGFACE_TOKEN")
    if not token:
        raise RuntimeError("HUGGINGFACE_TOKEN not found. Put it in .env and rerun load_dotenv().")
    pipe = PNA_Pipeline.from_pretrained(
        "pyannote/speaker-diarization-3.1",
        use_auth_token=token
    )
print("PyAnnote pipeline ready:", type(pipe))


  from .autonotebook import tqdm as notebook_tqdm


PyAnnote pipeline ready: <class 'pyannote.audio.pipelines.speaker_diarization.SpeakerDiarization'>


In [6]:
from pathlib import Path
import time, re
import numpy as np
import pandas as pd

# ensure results dir exists
RESULTS_DIR = Path.cwd().parents[0] / "results" if Path.cwd().name == "notebooks" else Path.cwd() / "results"
RESULTS_DIR.mkdir(parents=True, exist_ok=True)
OUT_CSV = RESULTS_DIR / "pyannote_predictions.csv"

# reuse the already-loaded pipeline `pipe`
assert 'pipe' in globals(), "Run the PyAnnote smoke-test cell first to create `pipe`."

def true_count(s):
    # extract the first integer from "speaker_count" (e.g., '3 Speakers' -> 3)
    m = re.search(r"\d+", str(s))
    return int(m.group()) if m else np.nan

rows = []
failures = 0

for i, r in df.iterrows():
    audio_path = Path(r["audio"]).resolve()
    t0 = time.time()
    try:
        dia = pipe(str(audio_path))
        speakers = {spk for _, _, spk in dia.itertracks(yield_label=True)}
        pred = len(speakers)
    except Exception as e:
        pred = np.nan
        failures += 1
    dt = time.time() - t0

    rows.append({
        "audio": str(audio_path),
        "language": r["language"],               # untouched
        "true_speakers": true_count(r["speaker_count"]),
        "pred_speakers": pred,
        "runtime_sec": dt,
    })

pred_df = pd.DataFrame(rows)
pred_df.to_csv(OUT_CSV, index=False)

print(f"Saved: {OUT_CSV}")
print(f"Total rows: {len(pred_df)} | Failures: {failures}")
display(pred_df.head(10))


KeyboardInterrupt: 