<a href="https://colab.research.google.com/github/ClintonAImadscience/YouTube-audio-cleaner/blob/main/YouTube_Terror_Forge.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 🧱 BLOCK 1: Setup + Google Drive Connection
# Run this block first in Google Colab before doing anything else.

# ✅ Install dependencies
# Includes:
# - yt-dlp: for downloading audio from YouTube
# - pydub + ffmpeg: for audio trimming and conversion
# - pyannote.audio: for speaker diarization and overlap detection (takes ~2–3 mins)

!pip install --quiet --upgrade pip
!pip install --quiet librosa ffmpeg pydub yt-dlp
!pip install --quiet git+https://github.com/pyannote/pyannote-audio.git
!apt-get -qq install ffmpeg

# ✅ Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# ✅ Standard Libraries
import os
import sys
import tempfile
import subprocess
import logging
import traceback
from pathlib import Path
import shutil


# ✅ Audio / Diarization Libraries
from pyannote.core import Segment
from pydub import AudioSegment
from pydub.silence import detect_nonsilent
from pyannote.core import Timeline


# ✅ Google Drive Output Folder
print("📂 Mounting Google Drive...")
drive_export_path = Path("/content/drive/MyDrive/YouTubeAudio")
drive_export_path.mkdir(parents=True, exist_ok=True)

if drive_export_path.exists():
    print("✅ Google Drive mounted successfully!")
    print(f"📁 Folder for audio output is ready: {drive_export_path}\n")
else:
    print("❌ ERROR: Failed to access or create the export folder in Google Drive.")


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.8 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m68.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m113.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for ffmpeg (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m818.9/818.9 kB[0m [31m41.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m864.1/864.1 kB[0m [31m49.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 M

In [None]:
# 🧱 BLOCK 2: Hugging Face Token Setup + Logger
# Prompts the user to enter their Hugging Face access token.
# Also sets up structured logging across the notebook.

from getpass import getpass

# 🔐 Prompt for token
HUGGINGFACE_TOKEN = getpass("🔐 Enter your Hugging Face token:")

# 🪵 Logger setup
def setup_logger(log_level=logging.INFO, log_to_file=False, log_file_path="diarization_log.txt"):
    """
    Sets up a global logger for the notebook.

    Args:
        log_level (int): Logging level, e.g., logging.INFO or logging.DEBUG.
        log_to_file (bool): Whether to also log to a file.
        log_file_path (str): Path to the log file (if log_to_file is True).

    Returns:
        Logger object.
    """
    logger = logging.getLogger("diarization_logger")
    logger.setLevel(log_level)

    # Avoid duplicate handlers in Colab reruns
    if not logger.handlers:
        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')

        console_handler = logging.StreamHandler(sys.stdout)
        console_handler.setFormatter(formatter)
        logger.addHandler(console_handler)

        if log_to_file:
            file_handler = logging.FileHandler(log_file_path)
            file_handler.setFormatter(formatter)
            logger.addHandler(file_handler)

    return logger

# 🎯 Initialize logger
logger = setup_logger()
logger.info("Logger initialized and ready.")

# ✅ Confirm token was entered
if not HUGGINGFACE_TOKEN:
    raise ValueError("❌ Hugging Face token not provided. Please run this cell again and enter it.")


🔐 Enter your Hugging Face token:··········
2025-04-09 21:58:42,962 - INFO - Logger initialized and ready.


INFO:diarization_logger:Logger initialized and ready.


In [None]:
# 🧱 BLOCK 3: Logging Setup
# Creates a global logger object that prints timestamped logs to the console.
# Also optionally supports logging to a file for later review.
# Replaces print() with structured logging via logger.info(), logger.error(), etc.



from pyannote.audio import Pipeline

# 🧠 Load diarization and overlap detection pipelines using Hugging Face token
# Make sure HUGGINGFACE_TOKEN is already defined in an earlier block

logger.info("🔐 Loading diarization and overlap detection models...")
try:
    diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=HUGGINGFACE_TOKEN)
    osd_pipeline = Pipeline.from_pretrained("pyannote/overlapped-speech-detection", use_auth_token=HUGGINGFACE_TOKEN)
    logger.info("✅ Diarization models loaded successfully.")
except Exception as e:
    logger.error("❌ Failed to load pyannote pipelines", exc_info=True)


2025-04-09 21:59:12,978 - INFO - 🔐 Loading diarization and overlap detection models...


INFO:diarization_logger:🔐 Loading diarization and overlap detection models...


config.yaml:   0%|          | 0.00/500 [00:00<?, ?B/s]

DEBUG:speechbrain.utils.checkpoints:Registered checkpoint save hook for _speechbrain_save
DEBUG:speechbrain.utils.checkpoints:Registered checkpoint load hook for _speechbrain_load
DEBUG:speechbrain.utils.checkpoints:Registered checkpoint save hook for save
DEBUG:speechbrain.utils.checkpoints:Registered checkpoint load hook for load
DEBUG:speechbrain.utils.checkpoints:Registered checkpoint save hook for _save
DEBUG:speechbrain.utils.checkpoints:Registered checkpoint load hook for _recover


pytorch_model.bin:   0%|          | 0.00/17.7M [00:00<?, ?B/s]

config.yaml:   0%|          | 0.00/318 [00:00<?, ?B/s]

INFO:pytorch_lightning.utilities.migration.utils:Lightning automatically upgraded your loaded checkpoint from v1.5.4 to v2.5.1. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../root/.cache/torch/pyannote/models--pyannote--segmentation/snapshots/c4c8ceafcbb3a7a280c2d357aee9fbc9b0be7f9b/pytorch_model.bin`
INFO:speechbrain.utils.fetching:Fetch hyperparams.yaml: Fetching from HuggingFace Hub 'speechbrain/spkrec-ecapa-voxceleb' if not cached


Model was trained with pyannote.audio 0.0.1, yours is 3.3.2. Bad things might happen unless you revert pyannote.audio to 0.x.
Model was trained with torch 1.10.0+cu102, yours is 2.6.0+cu124. Bad things might happen unless you revert torch to 1.x.


hyperparams.yaml:   0%|          | 0.00/1.92k [00:00<?, ?B/s]

DEBUG:speechbrain.utils.fetching:Fetch: Local file found, creating symlink '/root/.cache/huggingface/hub/models--speechbrain--spkrec-ecapa-voxceleb/snapshots/0f99f2d0ebe89ac095bcc5903c4dd8f72b367286/hyperparams.yaml' -> '/root/.cache/torch/pyannote/speechbrain/hyperparams.yaml'
INFO:speechbrain.utils.fetching:Fetch custom.py: Fetching from HuggingFace Hub 'speechbrain/spkrec-ecapa-voxceleb' if not cached
DEBUG:speechbrain.utils.checkpoints:Registered checkpoint save hook for _save
DEBUG:speechbrain.utils.checkpoints:Registered checkpoint load hook for _load
DEBUG:speechbrain.utils.checkpoints:Registered parameter transfer hook for _load
  wrapped_fwd = torch.cuda.amp.custom_fwd(fwd, cast_inputs=cast_inputs)
DEBUG:speechbrain.utils.checkpoints:Registered checkpoint save hook for save
DEBUG:speechbrain.utils.checkpoints:Registered checkpoint load hook for load_if_possible
DEBUG:speechbrain.utils.parameter_transfer:Collecting files (or symlinks) for pretraining in /root/.cache/torch/pyann

embedding_model.ckpt:   0%|          | 0.00/83.3M [00:00<?, ?B/s]

DEBUG:speechbrain.utils.fetching:Fetch: Local file found, creating symlink '/root/.cache/huggingface/hub/models--speechbrain--spkrec-ecapa-voxceleb/snapshots/0f99f2d0ebe89ac095bcc5903c4dd8f72b367286/embedding_model.ckpt' -> '/root/.cache/torch/pyannote/speechbrain/embedding_model.ckpt'
DEBUG:speechbrain.utils.parameter_transfer:Set local path in self.paths["embedding_model"] = /root/.cache/torch/pyannote/speechbrain/embedding_model.ckpt
INFO:speechbrain.utils.fetching:Fetch mean_var_norm_emb.ckpt: Fetching from HuggingFace Hub 'speechbrain/spkrec-ecapa-voxceleb' if not cached


mean_var_norm_emb.ckpt:   0%|          | 0.00/1.92k [00:00<?, ?B/s]

DEBUG:speechbrain.utils.fetching:Fetch: Local file found, creating symlink '/root/.cache/huggingface/hub/models--speechbrain--spkrec-ecapa-voxceleb/snapshots/0f99f2d0ebe89ac095bcc5903c4dd8f72b367286/mean_var_norm_emb.ckpt' -> '/root/.cache/torch/pyannote/speechbrain/mean_var_norm_emb.ckpt'
DEBUG:speechbrain.utils.parameter_transfer:Set local path in self.paths["mean_var_norm_emb"] = /root/.cache/torch/pyannote/speechbrain/mean_var_norm_emb.ckpt
INFO:speechbrain.utils.fetching:Fetch classifier.ckpt: Fetching from HuggingFace Hub 'speechbrain/spkrec-ecapa-voxceleb' if not cached


classifier.ckpt:   0%|          | 0.00/5.53M [00:00<?, ?B/s]

DEBUG:speechbrain.utils.fetching:Fetch: Local file found, creating symlink '/root/.cache/huggingface/hub/models--speechbrain--spkrec-ecapa-voxceleb/snapshots/0f99f2d0ebe89ac095bcc5903c4dd8f72b367286/classifier.ckpt' -> '/root/.cache/torch/pyannote/speechbrain/classifier.ckpt'
DEBUG:speechbrain.utils.parameter_transfer:Set local path in self.paths["classifier"] = /root/.cache/torch/pyannote/speechbrain/classifier.ckpt
INFO:speechbrain.utils.fetching:Fetch label_encoder.txt: Fetching from HuggingFace Hub 'speechbrain/spkrec-ecapa-voxceleb' if not cached


label_encoder.txt:   0%|          | 0.00/129k [00:00<?, ?B/s]

DEBUG:speechbrain.utils.fetching:Fetch: Local file found, creating symlink '/root/.cache/huggingface/hub/models--speechbrain--spkrec-ecapa-voxceleb/snapshots/0f99f2d0ebe89ac095bcc5903c4dd8f72b367286/label_encoder.txt' -> '/root/.cache/torch/pyannote/speechbrain/label_encoder.ckpt'
DEBUG:speechbrain.utils.parameter_transfer:Set local path in self.paths["label_encoder"] = /root/.cache/torch/pyannote/speechbrain/label_encoder.ckpt
INFO:speechbrain.utils.parameter_transfer:Loading pretrained files for: embedding_model, mean_var_norm_emb, classifier, label_encoder
DEBUG:speechbrain.utils.parameter_transfer:Redirecting (loading from local path): embedding_model -> /root/.cache/torch/pyannote/speechbrain/embedding_model.ckpt
DEBUG:speechbrain.utils.parameter_transfer:Redirecting (loading from local path): mean_var_norm_emb -> /root/.cache/torch/pyannote/speechbrain/mean_var_norm_emb.ckpt
DEBUG:speechbrain.utils.parameter_transfer:Redirecting (loading from local path): classifier -> /root/.cac

config.yaml:   0%|          | 0.00/280 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/17.7M [00:00<?, ?B/s]

config.yaml:   0%|          | 0.00/1.98k [00:00<?, ?B/s]

/usr/local/lib/python3.11/dist-packages/pytorch_lightning/utilities/migration/migration.py:208: You have multiple `ModelCheckpoint` callback states in this checkpoint, but we found state keys that would end up colliding with each other after an upgrade, which means we can't differentiate which of your checkpoint callbacks needs which states. At least one of your `ModelCheckpoint` callbacks will not be able to reload the state.
INFO:pytorch_lightning.utilities.migration.utils:Lightning automatically upgraded your loaded checkpoint from v1.1.3 to v2.5.1. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../root/.cache/torch/pyannote/models--pyannote--segmentation/snapshots/059e96f964841d40f1a5e755bb7223f76666bba4/pytorch_model.bin`


Model was trained with pyannote.audio 0.0.1, yours is 3.3.2. Bad things might happen unless you revert pyannote.audio to 0.x.
Model was trained with torch 1.7.1, yours is 2.6.0+cu124. Bad things might happen unless you revert torch to 1.x.
2025-04-09 21:59:20,783 - INFO - ✅ Diarization models loaded successfully.


INFO:diarization_logger:✅ Diarization models loaded successfully.


In [None]:
# 🧱 BLOCK 4: Core Utility Functions
# Includes helper functions used throughout the app:
# - download_audio(): Downloads the best audio stream from a YouTube URL (ignores playlists).
# - convert_audio(): Converts .webm to .wav without forcing 48kHz — preserves original sample rate, but enforces mono channel.
# - trim_and_normalize(): Removes leading/trailing silence and normalizes volume.
# - get_youtube_title(): Gets a clean title from the video for use in filenames.
# - chunk_and_export(): Splits audio into chunks and uploads to Google Drive.


def download_audio(url: str, output_path: str):
    logger.info(f"⬇️ Starting download for: {url}")
    try:
        result = subprocess.run(
            ["yt-dlp", "--no-playlist", "-f", "bestaudio", "-o", output_path, url],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True
        )
        if result.returncode != 0:
            logger.error(f"❌ yt-dlp stderr:\n{result.stderr}")
            raise subprocess.CalledProcessError(result.returncode, result.args)

        logger.info(f"✅ Audio downloaded to: {output_path}")
    except subprocess.CalledProcessError:
        logger.error(f"❌ yt-dlp failed to download: {url}", exc_info=True)
        raise

def convert_audio(input_path: str, output_path: str):
    """
    Convert downloaded audio to WAV format using ffmpeg.
    Keeps the original sample rate, forces mono channel. Overwrites if file exists.
    """
    logger.info(f"🎧 Converting {input_path} → {output_path} (preserving sample rate)")
    try:
        result = subprocess.run(
            ["ffmpeg", "-y", "-i", input_path, "-ac", "1", output_path],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
            check=True
        )
        logger.info("✅ Conversion complete")
    except subprocess.CalledProcessError as e:
        logger.error(f"❌ ffmpeg conversion failed for: {input_path}", exc_info=True)
        logger.error(f"🔍 ffmpeg stderr:\n{e.stderr}")
        raise

def trim_and_normalize(wav_path: str):
    logger.info(f"✨ Trimming and normalizing audio: {wav_path}")
    try:
        audio = AudioSegment.from_wav(wav_path)
        nonsilent_ranges = detect_nonsilent(audio, min_silence_len=500, silence_thresh=-40)

        if not nonsilent_ranges:
            logger.warning("⚠️ No nonsilent audio found. Skipping trimming.")
            return

        start_trim = nonsilent_ranges[0][0]
        end_trim = nonsilent_ranges[-1][1]
        trimmed_audio = audio[start_trim:end_trim]

        change_in_dBFS = -trimmed_audio.max_dBFS
        normalized_audio = trimmed_audio.apply_gain(change_in_dBFS)
        normalized_audio.export(wav_path, format="wav")
        logger.info(f"✅ Audio trimmed and normalized: {wav_path}")

    except Exception:
        logger.error(f"❌ Error while trimming/normalizing: {wav_path}", exc_info=True)
        raise

def get_youtube_title(url: str) -> str:
    logger.info(f"🔍 Getting title for: {url}")
    try:
        result = subprocess.run(
            ["yt-dlp", "--no-playlist", "--get-title", url],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
            check=True
        )
        title = result.stdout.strip().replace(" ", "_")
        logger.info(f"✅ Title extracted: {title}")
        return title
    except subprocess.CalledProcessError:
        logger.warning("⚠️ Could not extract title, using fallback.")
        return None

def chunk_and_export(wav_path: str, speaker_label: str = "unknown"):
    logger.info(f"📤 Exporting chunk for speaker: {speaker_label}")
    try:
        base_output_path = "/content/drive/MyDrive/YouTubeAudio"
        os.makedirs(base_output_path, exist_ok=True)

        audio = AudioSegment.from_wav(wav_path)
        sample_rate = audio.frame_rate
        filename = Path(wav_path).stem
        output_path = f"{base_output_path}/{filename}_{speaker_label}_{sample_rate}Hz.wav"

        # Export to temp file first
        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmpfile:
            audio.export(tmpfile.name, format="wav")

        # Move to final destination (handles cross-device)
        shutil.move(tmpfile.name, output_path)
        logger.info(f"✅ Uploaded chunk to Google Drive: {output_path}")

    except Exception:
        logger.error(f"❌ Failed to export chunk: {wav_path}", exc_info=True)
        raise


In [None]:
# 🧱 BLOCK 4.5: Load Diarization Pipelines
# Loads speaker diarization and (optionally) overlapped speech detection models from Hugging Face.
# If the OSD model fails to load, diarization will still run without overlap filtering.

from pyannote.audio import Pipeline

logger.info("🔐 Loading diarization and overlap detection models...")

try:
    diarization_pipeline = Pipeline.from_pretrained(
        "pyannote/speaker-diarization",
        use_auth_token=HUGGINGFACE_TOKEN
    )
    logger.info("✅ Diarization model loaded.")

    try:
        osd_pipeline = Pipeline.from_pretrained(
            "pyannote/overlapped-speech-detection",
            use_auth_token=HUGGINGFACE_TOKEN
        )
        logger.info("✅ Overlap detection model loaded.")
    except Exception:
        osd_pipeline = None
        logger.warning("⚠️ OSD model not loaded. Overlapping speech will NOT be filtered.")

except Exception:
    logger.error("❌ Failed to load diarization pipeline — cannot continue.", exc_info=True)
    raise  # hard fail if diarization model doesn't load


2025-04-09 21:59:53,593 - INFO - 🔐 Loading diarization and overlap detection models...


INFO:diarization_logger:🔐 Loading diarization and overlap detection models...
INFO:pytorch_lightning.utilities.migration.utils:Lightning automatically upgraded your loaded checkpoint from v1.5.4 to v2.5.1. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../root/.cache/torch/pyannote/models--pyannote--segmentation/snapshots/c4c8ceafcbb3a7a280c2d357aee9fbc9b0be7f9b/pytorch_model.bin`
INFO:speechbrain.utils.fetching:Fetch hyperparams.yaml: Using symlink found at '/root/.cache/torch/pyannote/speechbrain/hyperparams.yaml'
INFO:speechbrain.utils.fetching:Fetch custom.py: Fetching from HuggingFace Hub 'speechbrain/spkrec-ecapa-voxceleb' if not cached


Model was trained with pyannote.audio 0.0.1, yours is 3.3.2. Bad things might happen unless you revert pyannote.audio to 0.x.
Model was trained with torch 1.10.0+cu102, yours is 2.6.0+cu124. Bad things might happen unless you revert torch to 1.x.


DEBUG:speechbrain.utils.parameter_transfer:Collecting files (or symlinks) for pretraining in /root/.cache/torch/pyannote/speechbrain.
INFO:speechbrain.utils.fetching:Fetch embedding_model.ckpt: Using symlink found at '/root/.cache/torch/pyannote/speechbrain/embedding_model.ckpt'
DEBUG:speechbrain.utils.parameter_transfer:Set local path in self.paths["embedding_model"] = /root/.cache/torch/pyannote/speechbrain/embedding_model.ckpt
INFO:speechbrain.utils.fetching:Fetch mean_var_norm_emb.ckpt: Using symlink found at '/root/.cache/torch/pyannote/speechbrain/mean_var_norm_emb.ckpt'
DEBUG:speechbrain.utils.parameter_transfer:Set local path in self.paths["mean_var_norm_emb"] = /root/.cache/torch/pyannote/speechbrain/mean_var_norm_emb.ckpt
INFO:speechbrain.utils.fetching:Fetch classifier.ckpt: Using symlink found at '/root/.cache/torch/pyannote/speechbrain/classifier.ckpt'
DEBUG:speechbrain.utils.parameter_transfer:Set local path in self.paths["classifier"] = /root/.cache/torch/pyannote/speech

2025-04-09 21:59:55,414 - INFO - ✅ Diarization model loaded.


INFO:diarization_logger:✅ Diarization model loaded.
INFO:pytorch_lightning.utilities.migration.utils:Lightning automatically upgraded your loaded checkpoint from v1.1.3 to v2.5.1. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../root/.cache/torch/pyannote/models--pyannote--segmentation/snapshots/059e96f964841d40f1a5e755bb7223f76666bba4/pytorch_model.bin`


Model was trained with pyannote.audio 0.0.1, yours is 3.3.2. Bad things might happen unless you revert pyannote.audio to 0.x.
Model was trained with torch 1.7.1, yours is 2.6.0+cu124. Bad things might happen unless you revert torch to 1.x.
2025-04-09 21:59:56,443 - INFO - ✅ Overlap detection model loaded.


INFO:diarization_logger:✅ Overlap detection model loaded.


In [None]:
# 🧱 BLOCK 5: Main Processing Engine — YouTube to Clean Speaker Segments
# Runs the full pipeline: download → convert → trim → diarize only middle 5min → export

# 🎯 CONFIGURATION
MIDDLE_SEGMENT_DURATION = 300  # ⏱️ Duration (in seconds) to extract from middle of audio

# 🎯 URL(s) to process
YOUTUBE_URLS = [
    "https://www.youtube.com/watch?v=eSvVxQQO6_Q",  # 👈👈👈 Add or replace YouTube links here
]

# 🚀 Main Processing Loop
for url_index, url in enumerate(YOUTUBE_URLS):
    logger.info(f"\n🗕️ Processing URL {url_index+1}/{len(YOUTUBE_URLS)}")
    title = get_youtube_title(url)
    if not title:
        logger.warning("⚠️ Could not extract title from YouTube URL.")
        continue

    webm_path = f"audio_{url_index}.webm"
    wav_path = f"audio_{url_index}.wav"

    logger.info(f"📅 Downloading best audio from: {url}")
    try:
        download_audio(url, webm_path)
        logger.info(f"✅ Audio downloaded: {webm_path}")
    except Exception:
        logger.error(f"❌ Failed to download audio from: {url}", exc_info=True)
        continue

    logger.info(f"🎷 Converting {webm_path} to WAV (preserving original sample rate)")
    convert_audio(webm_path, wav_path)

    logger.info("✨ Trimming silence and normalizing volume")
    trim_and_normalize(wav_path)

    logger.info("✂️ Extracting middle 5 minutes of audio for diarization only")
    audio = AudioSegment.from_wav(wav_path)
    total_duration = len(audio) / 1000
    middle_start = max(0, (total_duration - MIDDLE_SEGMENT_DURATION) / 2)
    middle_end = middle_start + MIDDLE_SEGMENT_DURATION
    start_ms = int(middle_start * 1000)
    end_ms = int(middle_end * 1000)
    middle_audio = audio[start_ms:end_ms]

    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as middle_wav_file:
        middle_audio.export(middle_wav_file.name, format="wav")
        middle_wav_path = middle_wav_file.name

    logger.info("🧬 Running diarization and saving speaker-separated files...")
    try:
        diarization = diarization_pipeline(middle_wav_path)

        from pyannote.core import Segment, Timeline

        diarized_segments = list(diarization.itertracks(yield_label=True))

        if osd_pipeline:
            osd = osd_pipeline(middle_wav_path)
            osd_timeline = osd.get_timeline()
            logger.info("✅ Overlap detection applied.")
        else:
            osd_timeline = Timeline(segments=[])
            logger.info("⚠️ No overlap detection — using all segments.")

        fused_segments = []
        for turn, _, speaker in diarized_segments:
            segment = Segment(turn.start, turn.end)
            clean_parts = Timeline([segment]).extrude(osd_timeline)
            for clean_segment in clean_parts:
                if (clean_segment.end - clean_segment.start) >= 3.0:
                    fused_segments.append((clean_segment, speaker))

        logger.info(f"🔄 Usable (non-overlapping) segments: {len(fused_segments)}")

        for segment, speaker in fused_segments:
            try:
                start_ms = int(float(segment.start) * 1000)
                end_ms = int(float(segment.end) * 1000)
            except Exception:
                logger.warning("⚠️ Skipping segment due to cast error", exc_info=True)
                continue

            segment_audio = middle_audio[start_ms:end_ms]

            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile:
                segment_audio.export(tmpfile.name, format="wav")
                chunk_and_export(tmpfile.name, speaker)
                os.unlink(tmpfile.name)

        os.unlink(middle_wav_path)

    except Exception:
        logger.error("❌ Diarization failed. Proceeding with full audio export.", exc_info=True)
        chunk_and_export(wav_path)

logger.info("✅ All YouTube URLs have been processed!")


2025-04-09 22:00:33,243 - INFO - 
🗕️ Processing URL 1/1


INFO:diarization_logger:
🗕️ Processing URL 1/1


2025-04-09 22:00:33,244 - INFO - 🔍 Getting title for: https://www.youtube.com/watch?v=eSvVxQQO6_Q


INFO:diarization_logger:🔍 Getting title for: https://www.youtube.com/watch?v=eSvVxQQO6_Q


2025-04-09 22:00:36,505 - INFO - ✅ Title extracted: Interview_de_Thomas_Derichebourg_sur_BSMART_-_28_novembre_2022


INFO:diarization_logger:✅ Title extracted: Interview_de_Thomas_Derichebourg_sur_BSMART_-_28_novembre_2022


2025-04-09 22:00:36,506 - INFO - 📅 Downloading best audio from: https://www.youtube.com/watch?v=eSvVxQQO6_Q


INFO:diarization_logger:📅 Downloading best audio from: https://www.youtube.com/watch?v=eSvVxQQO6_Q


2025-04-09 22:00:36,507 - INFO - ⬇️ Starting download for: https://www.youtube.com/watch?v=eSvVxQQO6_Q


INFO:diarization_logger:⬇️ Starting download for: https://www.youtube.com/watch?v=eSvVxQQO6_Q


2025-04-09 22:00:41,464 - INFO - ✅ Audio downloaded to: audio_0.webm


INFO:diarization_logger:✅ Audio downloaded to: audio_0.webm


2025-04-09 22:00:41,465 - INFO - ✅ Audio downloaded: audio_0.webm


INFO:diarization_logger:✅ Audio downloaded: audio_0.webm


2025-04-09 22:00:41,466 - INFO - 🎷 Converting audio_0.webm to WAV (preserving original sample rate)


INFO:diarization_logger:🎷 Converting audio_0.webm to WAV (preserving original sample rate)


2025-04-09 22:00:41,467 - INFO - 🎧 Converting audio_0.webm → audio_0.wav (preserving sample rate)


INFO:diarization_logger:🎧 Converting audio_0.webm → audio_0.wav (preserving sample rate)


2025-04-09 22:00:43,240 - INFO - ✅ Conversion complete


INFO:diarization_logger:✅ Conversion complete


2025-04-09 22:00:43,241 - INFO - ✨ Trimming silence and normalizing volume


INFO:diarization_logger:✨ Trimming silence and normalizing volume


2025-04-09 22:00:43,242 - INFO - ✨ Trimming and normalizing audio: audio_0.wav


INFO:diarization_logger:✨ Trimming and normalizing audio: audio_0.wav


2025-04-09 22:01:15,597 - INFO - ✅ Audio trimmed and normalized: audio_0.wav


INFO:diarization_logger:✅ Audio trimmed and normalized: audio_0.wav


2025-04-09 22:01:15,606 - INFO - ✂️ Extracting middle 5 minutes of audio for diarization only


INFO:diarization_logger:✂️ Extracting middle 5 minutes of audio for diarization only


2025-04-09 22:01:15,721 - INFO - 🧬 Running diarization and saving speaker-separated files...


INFO:diarization_logger:🧬 Running diarization and saving speaker-separated files...


2025-04-09 22:11:24,530 - INFO - ✅ Overlap detection applied.


INFO:diarization_logger:✅ Overlap detection applied.


2025-04-09 22:11:24,534 - INFO - 🔄 Usable (non-overlapping) segments: 16


INFO:diarization_logger:🔄 Usable (non-overlapping) segments: 16


2025-04-09 22:11:24,537 - INFO - 📤 Exporting chunk for speaker: SPEAKER_01


INFO:diarization_logger:📤 Exporting chunk for speaker: SPEAKER_01


2025-04-09 22:11:24,549 - INFO - ✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmp9fmsbc_8_SPEAKER_01_48000Hz.wav


INFO:diarization_logger:✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmp9fmsbc_8_SPEAKER_01_48000Hz.wav


2025-04-09 22:11:24,552 - INFO - 📤 Exporting chunk for speaker: SPEAKER_02


INFO:diarization_logger:📤 Exporting chunk for speaker: SPEAKER_02


2025-04-09 22:11:24,565 - INFO - ✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmpi4mdbn3z_SPEAKER_02_48000Hz.wav


INFO:diarization_logger:✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmpi4mdbn3z_SPEAKER_02_48000Hz.wav


2025-04-09 22:11:24,568 - INFO - 📤 Exporting chunk for speaker: SPEAKER_02


INFO:diarization_logger:📤 Exporting chunk for speaker: SPEAKER_02


2025-04-09 22:11:24,580 - INFO - ✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmp1rom4ydf_SPEAKER_02_48000Hz.wav


INFO:diarization_logger:✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmp1rom4ydf_SPEAKER_02_48000Hz.wav


2025-04-09 22:11:24,584 - INFO - 📤 Exporting chunk for speaker: SPEAKER_00


INFO:diarization_logger:📤 Exporting chunk for speaker: SPEAKER_00


2025-04-09 22:11:24,599 - INFO - ✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmpfqfwja4f_SPEAKER_00_48000Hz.wav


INFO:diarization_logger:✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmpfqfwja4f_SPEAKER_00_48000Hz.wav


2025-04-09 22:11:24,604 - INFO - 📤 Exporting chunk for speaker: SPEAKER_00


INFO:diarization_logger:📤 Exporting chunk for speaker: SPEAKER_00


2025-04-09 22:11:24,646 - INFO - ✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmpokdbn_pe_SPEAKER_00_48000Hz.wav


INFO:diarization_logger:✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmpokdbn_pe_SPEAKER_00_48000Hz.wav


2025-04-09 22:11:24,650 - INFO - 📤 Exporting chunk for speaker: SPEAKER_02


INFO:diarization_logger:📤 Exporting chunk for speaker: SPEAKER_02


2025-04-09 22:11:24,664 - INFO - ✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmpf8wmo659_SPEAKER_02_48000Hz.wav


INFO:diarization_logger:✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmpf8wmo659_SPEAKER_02_48000Hz.wav


2025-04-09 22:11:24,668 - INFO - 📤 Exporting chunk for speaker: SPEAKER_00


INFO:diarization_logger:📤 Exporting chunk for speaker: SPEAKER_00


2025-04-09 22:11:24,683 - INFO - ✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmpqvbkrjbo_SPEAKER_00_48000Hz.wav


INFO:diarization_logger:✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmpqvbkrjbo_SPEAKER_00_48000Hz.wav


2025-04-09 22:11:24,685 - INFO - 📤 Exporting chunk for speaker: SPEAKER_02


INFO:diarization_logger:📤 Exporting chunk for speaker: SPEAKER_02


2025-04-09 22:11:24,696 - INFO - ✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmpxxzdrkyk_SPEAKER_02_48000Hz.wav


INFO:diarization_logger:✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmpxxzdrkyk_SPEAKER_02_48000Hz.wav


2025-04-09 22:11:24,701 - INFO - 📤 Exporting chunk for speaker: SPEAKER_00


INFO:diarization_logger:📤 Exporting chunk for speaker: SPEAKER_00


2025-04-09 22:11:24,721 - INFO - ✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmpg4fj0loo_SPEAKER_00_48000Hz.wav


INFO:diarization_logger:✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmpg4fj0loo_SPEAKER_00_48000Hz.wav


2025-04-09 22:11:24,725 - INFO - 📤 Exporting chunk for speaker: SPEAKER_02


INFO:diarization_logger:📤 Exporting chunk for speaker: SPEAKER_02


2025-04-09 22:11:24,737 - INFO - ✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmp81x6q1pz_SPEAKER_02_48000Hz.wav


INFO:diarization_logger:✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmp81x6q1pz_SPEAKER_02_48000Hz.wav


2025-04-09 22:11:24,740 - INFO - 📤 Exporting chunk for speaker: SPEAKER_00


INFO:diarization_logger:📤 Exporting chunk for speaker: SPEAKER_00


2025-04-09 22:11:24,750 - INFO - ✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmp6ovpfi67_SPEAKER_00_48000Hz.wav


INFO:diarization_logger:✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmp6ovpfi67_SPEAKER_00_48000Hz.wav


2025-04-09 22:11:24,754 - INFO - 📤 Exporting chunk for speaker: SPEAKER_02


INFO:diarization_logger:📤 Exporting chunk for speaker: SPEAKER_02


2025-04-09 22:11:24,771 - INFO - ✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmpxaw1_l_9_SPEAKER_02_48000Hz.wav


INFO:diarization_logger:✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmpxaw1_l_9_SPEAKER_02_48000Hz.wav


2025-04-09 22:11:24,776 - INFO - 📤 Exporting chunk for speaker: SPEAKER_00


INFO:diarization_logger:📤 Exporting chunk for speaker: SPEAKER_00


2025-04-09 22:11:24,794 - INFO - ✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmpt4piawug_SPEAKER_00_48000Hz.wav


INFO:diarization_logger:✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmpt4piawug_SPEAKER_00_48000Hz.wav


2025-04-09 22:11:24,797 - INFO - 📤 Exporting chunk for speaker: SPEAKER_02


INFO:diarization_logger:📤 Exporting chunk for speaker: SPEAKER_02


2025-04-09 22:11:24,809 - INFO - ✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmpkqsx_ab3_SPEAKER_02_48000Hz.wav


INFO:diarization_logger:✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmpkqsx_ab3_SPEAKER_02_48000Hz.wav


2025-04-09 22:11:24,812 - INFO - 📤 Exporting chunk for speaker: SPEAKER_00


INFO:diarization_logger:📤 Exporting chunk for speaker: SPEAKER_00


2025-04-09 22:11:24,827 - INFO - ✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmpyh2bml7u_SPEAKER_00_48000Hz.wav


INFO:diarization_logger:✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmpyh2bml7u_SPEAKER_00_48000Hz.wav


2025-04-09 22:11:24,830 - INFO - 📤 Exporting chunk for speaker: SPEAKER_00


INFO:diarization_logger:📤 Exporting chunk for speaker: SPEAKER_00


2025-04-09 22:11:24,909 - INFO - ✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmphazt15xx_SPEAKER_00_48000Hz.wav


INFO:diarization_logger:✅ Uploaded chunk to Google Drive: /content/drive/MyDrive/YouTubeAudio/tmphazt15xx_SPEAKER_00_48000Hz.wav


2025-04-09 22:11:24,915 - INFO - ✅ All YouTube URLs have been processed!


INFO:diarization_logger:✅ All YouTube URLs have been processed!
