In [None]:
# 🧱 BLOCK 1: Setup + Google Drive Connection
# Run this block first in Google Colab before doing anything else.

# ✅ Install dependencies
# Includes:
# - yt-dlp: for downloading audio from YouTube
# - pydub + ffmpeg: for audio trimming and conversion
# - pyannote.audio: for speaker diarization and overlap detection (takes ~2–3 mins)

!pip install --quiet --upgrade pip
!pip install --quiet librosa ffmpeg pydub yt-dlp
!pip install --quiet git+https://github.com/pyannote/pyannote-audio.git
!apt-get -qq install ffmpeg

# ✅ Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# ✅ Standard Libraries
import os
import sys
import tempfile
import subprocess
import logging
import traceback
from pathlib import Path

# ✅ Audio / Diarization Libraries
from pyannote.core import Segment
from pydub import AudioSegment
from pydub.silence import detect_nonsilent

# ✅ Google Drive Output Folder
print("📂 Mounting Google Drive...")
drive_export_path = Path("/content/drive/MyDrive/YouTubeAudio")
drive_export_path.mkdir(parents=True, exist_ok=True)

if drive_export_path.exists():
    print("✅ Google Drive mounted successfully!")
    print(f"📁 Folder for audio output is ready: {drive_export_path}\n")
else:
    print("❌ ERROR: Failed to access or create the export folder in Google Drive.")


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for ffmpeg (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m34.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for pyannote.audio (setup.py) ... [?25l[?25hdone
  Building wheel for antlr4-python3-runtime (setup.py) ... [?25l[?25hdone
  Building wheel for docopt (setup.py) ... [?25l[?25hdone
  Building wheel for julius (setup.py) ... [?25l[?25hdone
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
📂 Mounting Google Drive...
✅ Goo

In [None]:
# 🧱 BLOCK 2: Hugging Face Token Setup + Logger
# Prompts the user to enter their Hugging Face access token.
# Also sets up structured logging across the notebook.

from getpass import getpass

# 🔐 Prompt for token
HUGGINGFACE_TOKEN = getpass("🔐 Enter your Hugging Face token:")

# 🪵 Logger setup
def setup_logger(log_level=logging.INFO, log_to_file=False, log_file_path="diarization_log.txt"):
    """
    Sets up a global logger for the notebook.

    Args:
        log_level (int): Logging level, e.g., logging.INFO or logging.DEBUG.
        log_to_file (bool): Whether to also log to a file.
        log_file_path (str): Path to the log file (if log_to_file is True).

    Returns:
        Logger object.
    """
    logger = logging.getLogger("diarization_logger")
    logger.setLevel(log_level)

    # Avoid duplicate handlers in Colab reruns
    if not logger.handlers:
        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')

        console_handler = logging.StreamHandler(sys.stdout)
        console_handler.setFormatter(formatter)
        logger.addHandler(console_handler)

        if log_to_file:
            file_handler = logging.FileHandler(log_file_path)
            file_handler.setFormatter(formatter)
            logger.addHandler(file_handler)

    return logger

# 🎯 Initialize logger
logger = setup_logger()
logger.info("Logger initialized and ready.")

# ✅ Confirm token was entered
if not HUGGINGFACE_TOKEN:
    raise ValueError("❌ Hugging Face token not provided. Please run this cell again and enter it.")


🔐 Enter your Hugging Face token:··········
2025-04-05 21:10:00,166 - INFO - Logger initialized and ready.


INFO:diarization_logger:Logger initialized and ready.


In [None]:
# 🧱 BLOCK 3: Logging Setup
# Creates a global logger object that prints timestamped logs to the console.
# Also optionally supports logging to a file for later review.
# Replaces print() with structured logging via logger.info(), logger.error(), etc.



from pyannote.audio import Pipeline

# 🧠 Load diarization and overlap detection pipelines using Hugging Face token
# Make sure HUGGINGFACE_TOKEN is already defined in an earlier block

logger.info("🔐 Loading diarization and overlap detection models...")
try:
    diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=HUGGINGFACE_TOKEN)
    osd_pipeline = Pipeline.from_pretrained("pyannote/overlapped-speech-detection", use_auth_token=HUGGINGFACE_TOKEN)
    logger.info("✅ Diarization models loaded successfully.")
except Exception as e:
    logger.error("❌ Failed to load pyannote pipelines", exc_info=True)


2025-04-05 21:10:28,685 - INFO - 🔐 Loading diarization and overlap detection models...


INFO:diarization_logger:🔐 Loading diarization and overlap detection models...


config.yaml:   0%|          | 0.00/500 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/17.7M [00:00<?, ?B/s]

config.yaml:   0%|          | 0.00/318 [00:00<?, ?B/s]

INFO:pytorch_lightning.utilities.migration.utils:Lightning automatically upgraded your loaded checkpoint from v1.5.4 to v2.5.1. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../root/.cache/torch/pyannote/models--pyannote--segmentation/snapshots/c4c8ceafcbb3a7a280c2d357aee9fbc9b0be7f9b/pytorch_model.bin`


Model was trained with pyannote.audio 0.0.1, yours is 3.3.2. Bad things might happen unless you revert pyannote.audio to 0.x.
Model was trained with torch 1.10.0+cu102, yours is 2.6.0+cu124. Bad things might happen unless you revert torch to 1.x.


hyperparams.yaml:   0%|          | 0.00/1.92k [00:00<?, ?B/s]

  wrapped_fwd = torch.cuda.amp.custom_fwd(fwd, cast_inputs=cast_inputs)


embedding_model.ckpt:   0%|          | 0.00/83.3M [00:00<?, ?B/s]

mean_var_norm_emb.ckpt:   0%|          | 0.00/1.92k [00:00<?, ?B/s]

classifier.ckpt:   0%|          | 0.00/5.53M [00:00<?, ?B/s]

label_encoder.txt:   0%|          | 0.00/129k [00:00<?, ?B/s]


Could not download 'pyannote/overlapped-speech-detection' pipeline.
It might be because the pipeline is private or gated so make
sure to authenticate. Visit https://hf.co/settings/tokens to
create your access token and retry with:

   >>> Pipeline.from_pretrained('pyannote/overlapped-speech-detection',
   ...                          use_auth_token=YOUR_AUTH_TOKEN)

If this still does not work, it might be because the pipeline is gated:
visit https://hf.co/pyannote/overlapped-speech-detection to accept the user conditions.
2025-04-05 21:10:34,192 - INFO - ✅ Diarization models loaded successfully.


INFO:diarization_logger:✅ Diarization models loaded successfully.


In [None]:
# 🧱 BLOCK 4: Core Utility Functions
# Includes helper functions used throughout the app:
# - download_audio(): Downloads the best audio stream from a YouTube URL.
# - convert_audio(): Converts .webm to 48kHz mono .wav using ffmpeg.
# - trim_and_normalize(): Removes leading/trailing silence and normalizes volume.
# - get_youtube_title(): Gets a clean title from the video for use in filenames.
# - chunk_and_export(): Splits audio into chunks and uploads to Google Drive.

from pydub.silence import detect_nonsilent  # make sure this is in your imports in Block 1

def download_audio(url: str, output_path: str):
    """
    Download the best audio stream from a YouTube URL using yt-dlp.
    """
    logger.info(f"⬇️ Starting download for: {url}")
    try:
        subprocess.run(
            ["yt-dlp", "-f", "bestaudio", "-o", output_path, url],
            check=True
        )
        logger.info(f"✅ Audio downloaded to: {output_path}")
    except subprocess.CalledProcessError:
        logger.error(f"❌ yt-dlp failed to download: {url}", exc_info=True)
        raise

def convert_audio(input_path: str, output_path: str):
    """
    Convert downloaded .webm audio to 48kHz mono .wav format using ffmpeg.
    """
    logger.info(f"🎧 Converting audio: {input_path} → {output_path}")
    try:
        subprocess.run(
            [
                "ffmpeg", "-i", input_path,
                "-ar", "48000", "-ac", "1",
                output_path
            ],
            stdout=subprocess.DEVNULL,
            stderr=subprocess.DEVNULL,
            check=True
        )
        logger.info("✅ Conversion complete")
    except subprocess.CalledProcessError:
        logger.error(f"❌ ffmpeg conversion failed for: {input_path}", exc_info=True)
        raise

def trim_and_normalize(wav_path: str):
    """
    Remove leading/trailing silence and normalize volume.

    Args:
        wav_path (str): Path to WAV file to trim and normalize.

    Side Effects:
        Overwrites the original WAV file.
    """
    logger.info(f"✨ Trimming and normalizing audio: {wav_path}")
    try:
        audio = AudioSegment.from_wav(wav_path)

        # Detect nonsilent regions
        nonsilent_ranges = detect_nonsilent(audio, min_silence_len=500, silence_thresh=-40)

        if not nonsilent_ranges:
            logger.warning("⚠️ No nonsilent audio found. Skipping trimming.")
            return

        # Trim to first-to-last nonsilent part
        start_trim = nonsilent_ranges[0][0]
        end_trim = nonsilent_ranges[-1][1]
        trimmed_audio = audio[start_trim:end_trim]

        # Normalize volume
        change_in_dBFS = -trimmed_audio.max_dBFS
        normalized_audio = trimmed_audio.apply_gain(change_in_dBFS)

        normalized_audio.export(wav_path, format="wav")
        logger.info(f"✅ Audio trimmed and normalized: {wav_path}")

    except Exception:
        logger.error(f"❌ Error while trimming/normalizing: {wav_path}", exc_info=True)
        raise

def get_youtube_title(url: str) -> str:
    """
    Extract video title using yt-dlp for cleaner filenames.
    """
    logger.info(f"🔍 Getting title for: {url}")
    try:
        result = subprocess.run(
            ["yt-dlp", "--get-title", url],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
            check=True
        )
        title = result.stdout.strip().replace(" ", "_")
        logger.info(f"✅ Title extracted: {title}")
        return title
    except subprocess.CalledProcessError:
        logger.warning("⚠️ Could not extract title, using fallback.")
        return None

def chunk_and_export(wav_path: str, speaker_label: str = "unknown"):
    """
    Chunk audio file into smaller segments (~30 sec) and export to Google Drive.
    """
    logger.info(f"📤 Exporting chunk for speaker: {speaker_label}")
    try:
        base_output_path = "/content/drive/MyDrive/YouTubeAudio"
        os.makedirs(base_output_path, exist_ok=True)

        audio = AudioSegment.from_wav(wav_path)
        filename = Path(wav_path).stem
        output_path = f"{base_output_path}/{filename}_{speaker_label}.wav"
        audio.export(output_path, format="wav")

        logger.info(f"✅ Uploaded chunk to Google Drive: {output_path}")
    except Exception:
        logger.error(f"❌ Failed to export chunk: {wav_path}", exc_info=True)
        raise


In [None]:
# 🧱 BLOCK 4.5: Load Diarization Pipelines
# Loads speaker diarization and (optionally) overlapped speech detection models from Hugging Face.
# If the OSD model fails to load, diarization will still run without overlap filtering.

from pyannote.audio import Pipeline

logger.info("🔐 Loading diarization and overlap detection models...")

try:
    diarization_pipeline = Pipeline.from_pretrained(
        "pyannote/speaker-diarization",
        use_auth_token=HUGGINGFACE_TOKEN
    )
    logger.info("✅ Diarization model loaded.")

    try:
        osd_pipeline = Pipeline.from_pretrained(
            "pyannote/overlapped-speech-detection",
            use_auth_token=HUGGINGFACE_TOKEN
        )
        logger.info("✅ Overlap detection model loaded.")
    except Exception:
        osd_pipeline = None
        logger.warning("⚠️ OSD model not loaded. Overlapping speech will NOT be filtered.")

except Exception:
    logger.error("❌ Failed to load diarization pipeline — cannot continue.", exc_info=True)
    raise  # hard fail if diarization model doesn't load


2025-04-05 21:17:58,112 - INFO - 🔐 Loading diarization and overlap detection models...


INFO:diarization_logger:🔐 Loading diarization and overlap detection models...
INFO:pytorch_lightning.utilities.migration.utils:Lightning automatically upgraded your loaded checkpoint from v1.5.4 to v2.5.1. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../root/.cache/torch/pyannote/models--pyannote--segmentation/snapshots/c4c8ceafcbb3a7a280c2d357aee9fbc9b0be7f9b/pytorch_model.bin`


Model was trained with pyannote.audio 0.0.1, yours is 3.3.2. Bad things might happen unless you revert pyannote.audio to 0.x.
Model was trained with torch 1.10.0+cu102, yours is 2.6.0+cu124. Bad things might happen unless you revert torch to 1.x.
2025-04-05 21:17:59,630 - INFO - ✅ Diarization model loaded.


INFO:diarization_logger:✅ Diarization model loaded.



Could not download 'pyannote/overlapped-speech-detection' pipeline.
It might be because the pipeline is private or gated so make
sure to authenticate. Visit https://hf.co/settings/tokens to
create your access token and retry with:

   >>> Pipeline.from_pretrained('pyannote/overlapped-speech-detection',
   ...                          use_auth_token=YOUR_AUTH_TOKEN)

If this still does not work, it might be because the pipeline is gated:
visit https://hf.co/pyannote/overlapped-speech-detection to accept the user conditions.
2025-04-05 21:17:59,713 - INFO - ✅ Overlap detection model loaded.


INFO:diarization_logger:✅ Overlap detection model loaded.


In [None]:
# 🧱 BLOCK 5: Main Processing Engine — YouTube to Clean Speaker Segments
# Runs the full pipeline: download → convert → trim → diarize (+ optional OSD) → export

# 🎯 CONFIGURATION
MIDDLE_SEGMENT_DURATION = 180  # ⏱️ Duration (in seconds) to extract from middle of audio

# 🎯 URL(s) to process
YOUTUBE_URLS = [
    "https://www.youtube.com/watch?v=gu-GvzSvykY",  # 👈👈👈 Add or replace YouTube links here
]

# 🚀 Main Processing Loop
for url_index, url in enumerate(YOUTUBE_URLS):
    logger.info(f"\n📥 Processing URL {url_index+1}/{len(YOUTUBE_URLS)}")
    title = get_youtube_title(url)
    if not title:
        logger.warning("⚠️ Could not extract title from YouTube URL.")
        continue

    webm_path = f"audio_{url_index}.webm"
    wav_path = f"audio_{url_index}.wav"

    logger.info(f"📥 Downloading best audio from: {url}")
    try:
        download_audio(url, webm_path)
        logger.info(f"✅ Audio downloaded: {webm_path}")
    except Exception:
        logger.error(f"❌ Failed to download audio from: {url}", exc_info=True)
        continue

    logger.info(f"🎧 Converting {webm_path} to 48000Hz WAV")
    convert_audio(webm_path, wav_path)

    logger.info("✨ Trimming silence and normalizing volume")
    trim_and_normalize(wav_path)

    logger.info("🧠 Running diarization and saving speaker-separated files...")
    try:
        diarization = diarization_pipeline(wav_path)

        # Handle overlap detection (optional)
        if osd_pipeline:
            osd = osd_pipeline(wav_path)
            osd_timeline = osd.get_timeline()
            logger.info("✅ Overlap detection applied.")
        else:
            osd_timeline = []
            logger.info("⚠️ No overlap detection — using all segments.")

        # 🧠 Filter out overlapping speech regions
        diarized_segments = []
        for turn, _, speaker in diarization.itertracks(yield_label=True):
            if any(turn.overlaps(overlap) for overlap in osd_timeline):
                continue
            if float(turn.end - turn.start) < 3.0:
                continue
            diarized_segments.append((turn, speaker))

        # 🧠 Extract only the middle 3 minutes of the audio
        audio = AudioSegment.from_wav(wav_path)
        total_duration = len(audio) / 1000  # ms → seconds
        middle_start = max(0, (total_duration - MIDDLE_SEGMENT_DURATION) / 2)
        middle_end = middle_start + MIDDLE_SEGMENT_DURATION

        for turn, speaker in diarized_segments:
            try:
                turn_start = float(turn.start)
                turn_end = float(turn.end)
            except Exception:
                logger.warning("⚠️ Skipping segment due to cast error", exc_info=True)
                continue

            if turn_end <= middle_start or turn_start >= middle_end:
                continue

            start_ms = max(turn_start, middle_start) * 1000
            end_ms = min(turn_end, middle_end) * 1000
            segment_audio = audio[int(start_ms):int(end_ms)]

            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile:
                segment_audio.export(tmpfile.name, format="wav")
                chunk_and_export(tmpfile.name, speaker)
                os.unlink(tmpfile.name)

    except Exception:
        logger.error("❌ Diarization failed. Proceeding with full audio export.", exc_info=True)
        chunk_and_export(wav_path)

logger.info("✅ All YouTube URLs have been processed!")


2025-04-05 21:20:03,351 - INFO - 
📥 Processing URL 1/1


INFO:diarization_logger:
📥 Processing URL 1/1


2025-04-05 21:20:03,352 - INFO - 🔍 Getting title for: https://www.youtube.com/watch?v=gu-GvzSvykY


INFO:diarization_logger:🔍 Getting title for: https://www.youtube.com/watch?v=gu-GvzSvykY


2025-04-05 21:20:06,980 - INFO - ✅ Title extracted: INSIDE_|__Dilian_Francisca_Toro_Torres_Gobernadora_del_Valle_del_Cauca.


INFO:diarization_logger:✅ Title extracted: INSIDE_|__Dilian_Francisca_Toro_Torres_Gobernadora_del_Valle_del_Cauca.


2025-04-05 21:20:06,981 - INFO - 📥 Downloading best audio from: https://www.youtube.com/watch?v=gu-GvzSvykY


INFO:diarization_logger:📥 Downloading best audio from: https://www.youtube.com/watch?v=gu-GvzSvykY


2025-04-05 21:20:06,982 - INFO - ⬇️ Starting download for: https://www.youtube.com/watch?v=gu-GvzSvykY


INFO:diarization_logger:⬇️ Starting download for: https://www.youtube.com/watch?v=gu-GvzSvykY


2025-04-05 21:20:12,688 - INFO - ✅ Audio downloaded to: audio_0.webm


INFO:diarization_logger:✅ Audio downloaded to: audio_0.webm


2025-04-05 21:20:12,689 - INFO - ✅ Audio downloaded: audio_0.webm


INFO:diarization_logger:✅ Audio downloaded: audio_0.webm


2025-04-05 21:20:12,690 - INFO - 🎧 Converting audio_0.webm to 48000Hz WAV


INFO:diarization_logger:🎧 Converting audio_0.webm to 48000Hz WAV


2025-04-05 21:20:12,691 - INFO - 🎧 Converting audio: audio_0.webm → audio_0.wav


INFO:diarization_logger:🎧 Converting audio: audio_0.webm → audio_0.wav


2025-04-05 21:20:15,937 - INFO - ✅ Conversion complete


INFO:diarization_logger:✅ Conversion complete


2025-04-05 21:20:15,938 - INFO - ✨ Trimming silence and normalizing volume


INFO:diarization_logger:✨ Trimming silence and normalizing volume


2025-04-05 21:20:15,939 - INFO - ✨ Trimming and normalizing audio: audio_0.wav


INFO:diarization_logger:✨ Trimming and normalizing audio: audio_0.wav


2025-04-05 21:21:24,692 - INFO - ✅ Audio trimmed and normalized: audio_0.wav


INFO:diarization_logger:✅ Audio trimmed and normalized: audio_0.wav


2025-04-05 21:21:24,709 - INFO - 🧠 Running diarization and saving speaker-separated files...


INFO:diarization_logger:🧠 Running diarization and saving speaker-separated files...


2025-04-05 21:51:52,654 - INFO - ⚠️ No overlap detection — using all segments.


INFO:diarization_logger:⚠️ No overlap detection — using all segments.


2025-04-05 21:51:52,785 - INFO - 📤 Exporting chunk for speaker: SPEAKER_01


INFO:diarization_logger:📤 Exporting chunk for speaker: SPEAKER_01


2025-04-05 21:51:52,809 - INFO - ✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmpxjk3pxwl_SPEAKER_01.wav


INFO:diarization_logger:✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmpxjk3pxwl_SPEAKER_01.wav


2025-04-05 21:51:52,815 - INFO - 📤 Exporting chunk for speaker: SPEAKER_01


INFO:diarization_logger:📤 Exporting chunk for speaker: SPEAKER_01


2025-04-05 21:51:52,851 - INFO - ✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmpegkyotlm_SPEAKER_01.wav


INFO:diarization_logger:✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmpegkyotlm_SPEAKER_01.wav


2025-04-05 21:51:52,854 - INFO - 📤 Exporting chunk for speaker: SPEAKER_03


INFO:diarization_logger:📤 Exporting chunk for speaker: SPEAKER_03


2025-04-05 21:51:52,861 - INFO - ✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmpp227skq__SPEAKER_03.wav


INFO:diarization_logger:✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmpp227skq__SPEAKER_03.wav


2025-04-05 21:51:52,864 - INFO - 📤 Exporting chunk for speaker: SPEAKER_01


INFO:diarization_logger:📤 Exporting chunk for speaker: SPEAKER_01


2025-04-05 21:51:52,871 - INFO - ✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmp8zc4gdbu_SPEAKER_01.wav


INFO:diarization_logger:✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmp8zc4gdbu_SPEAKER_01.wav


2025-04-05 21:51:52,874 - INFO - 📤 Exporting chunk for speaker: SPEAKER_03


INFO:diarization_logger:📤 Exporting chunk for speaker: SPEAKER_03


2025-04-05 21:51:52,880 - INFO - ✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmpra6l2vfo_SPEAKER_03.wav


INFO:diarization_logger:✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmpra6l2vfo_SPEAKER_03.wav


2025-04-05 21:51:52,884 - INFO - 📤 Exporting chunk for speaker: SPEAKER_02


INFO:diarization_logger:📤 Exporting chunk for speaker: SPEAKER_02


2025-04-05 21:51:52,895 - INFO - ✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmptgq0fihg_SPEAKER_02.wav


INFO:diarization_logger:✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmptgq0fihg_SPEAKER_02.wav


2025-04-05 21:51:52,901 - INFO - 📤 Exporting chunk for speaker: SPEAKER_01


INFO:diarization_logger:📤 Exporting chunk for speaker: SPEAKER_01


2025-04-05 21:51:52,914 - INFO - ✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmppcpr8tzn_SPEAKER_01.wav


INFO:diarization_logger:✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmppcpr8tzn_SPEAKER_01.wav


2025-04-05 21:51:52,915 - INFO - ✅ All YouTube URLs have been processed!


INFO:diarization_logger:✅ All YouTube URLs have been processed!
