In [None]:
from pathlib import Path
from typing import Optional, Dict, Tuple
import logging
import re
import yt_dlp
from pytubefix import Search
import whisper
import torch

In [None]:
def setup_logging() -> logging.Logger:
    """Configure and return logger"""
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s'
    )
    return logging.getLogger(__name__)

In [None]:
def get_safe_filename(title: str) -> str:
    """Convert video title to safe filename"""
    return re.sub(r'[^\w\-_]', '_', title)

In [None]:
def setup_yt_dlp_options(output_dir: Path) -> Dict:
    """Configure yt-dlp download options"""
    return {
        'format': 'bestaudio/best',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }],
        'quiet': True,
        'outtmpl': str(output_dir / '%(title)s.%(ext)s'),
    }

In [None]:
def format_timestamp(seconds: float) -> str:
    """Convert seconds to SRT timestamp format (HH:MM:SS,mmm)"""
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    secs = int(seconds % 60)
    millisecs = int((seconds - int(seconds)) * 1000)
    return f"{hours:02d}:{minutes:02d}:{secs:02d},{millisecs:03d}"

In [None]:
def create_srt_content(segments: list) -> str:
    """Generate SRT formatted content from transcript segments"""
    srt_parts = []
    for i, segment in enumerate(segments, 1):
        start = format_timestamp(segment["start"])
        end = format_timestamp(segment["end"])
        text = segment["text"].strip()
        srt_parts.append(f"{i}\n{start} --> {end}\n{text}\n")
    return "\n".join(srt_parts)

In [None]:
def download_audio(video_url: str, video_title: str, output_dir: Path, logger: logging.Logger) -> Path:
    """Download audio from YouTube video"""
    logger.info(f"Downloading audio for: {video_title}")
    
    ydl_opts = setup_yt_dlp_options(output_dir)
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([video_url])
    
    # Find the downloaded file
    audio_file = next(output_dir.glob(f"{get_safe_filename(video_title)}*.mp3"))
    return audio_file

In [None]:
def get_youtube_captions(video, language: str) -> Optional[str]:
    """Attempt to get YouTube's own captions"""
    if not video.captions:
        return None
        
    for caption in video.captions:
        if caption.code == language:
            return caption.generate_srt_captions()
    return None

In [None]:
def transcribe_with_whisper(
    audio_path: Path,
    model: whisper.Whisper,
    language: str,
    logger: logging.Logger
) -> str:
    """Generate transcription using Whisper"""
    logger.info(f"Generating transcription with Whisper for: {audio_path.name}")
    
    result = model.transcribe(
        str(audio_path),
        language=language,
        task="transcribe",
        fp16=torch.cuda.is_available()
    )
    
    return create_srt_content(result["segments"])

In [None]:
def process_video(
    video,
    output_dir: Path,
    whisper_model: whisper.Whisper,
    language: str,
    logger: logging.Logger
) -> Tuple[bool, Optional[str]]:
    """Process a single video: download audio and generate transcription"""
    try:
        video_title = video.title
        safe_title = get_safe_filename(video_title)
        
        # Download audio
        audio_path = download_audio(video.watch_url, video_title, output_dir, logger)
        
        # Try YouTube captions first
        transcription = get_youtube_captions(video, language)
        source = "YouTube"
        
        # Fall back to Whisper if needed
        if not transcription:
            transcription = transcribe_with_whisper(audio_path, whisper_model, language, logger)
            source = "Whisper"
        
        # Save transcription
        srt_path = output_dir / f"{safe_title}.srt"
        srt_path.write_text(transcription)
        logger.info(f"Saved {source} transcription to: {srt_path}")
        
        return True, None
        
    except Exception as e:
        error_msg = f"Error processing video {video_title}: {str(e)}"
        logger.error(error_msg)
        return False, error_msg

In [None]:
def process_query(
    query: str,
    num_videos: int = 1,
    output_dir: str = "outputs",
    whisper_model_size: str = "base",
    language: str = "en"
) -> None:
    """
    Process YouTube search query and generate transcriptions for matching videos
    
    Args:
        query: Search term for YouTube videos
        num_videos: Number of videos to process
        output_dir: Directory to save outputs
        whisper_model_size: Size of Whisper model to use ("base", "medium", "large")
        language: Language code for transcription
    """
    # Setup
    logger = setup_logging()
    output_path = Path(output_dir)
    output_path.mkdir(exist_ok=True)
    
    # Initialize Whisper
    device = "cuda" if torch.cuda.is_available() else "cpu"
    logger.info(f"Using device: {device}")
    model = whisper.load_model(whisper_model_size, device=device)
    
    # Search for videos
    logger.info(f"Searching for: {query}")
    search = Search(query)
    if not search.videos:
        logger.error("No videos found for query")
        return
    
    # Process videos
    successful = 0
    failed = 0
    errors = []
    
    for video in search.videos[:num_videos]:
        success, error = process_video(
            video=video,
            output_dir=output_path,
            whisper_model=model,
            language=language,
            logger=logger
        )
        if success:
            successful += 1
        else:
            failed += 1
            errors.append(error)
    
    # Log summary
    logger.info(f"Processing complete. Successful: {successful}, Failed: {failed}")
    if errors:
        logger.info("Errors encountered:")
        for error in errors:
            logger.error(error)

In [None]:
def main():
    # Example usage
    process_query(
        query="mac mini",
        num_videos=3,
        output_dir="youtube_outputs",
        whisper_model_size="base",
        language="en"
    )

In [None]:
if __name__ == "__main__":
    main()