In [None]:
import re
import sys
import os
import tempfile
from datetime import datetime
from urllib.parse import urlparse, parse_qs
import yt_dlp
import speech_recognition as sr
from pydub import AudioSegment
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
import requests

In [1]:
def extract_video_id(url):
    
    patterns = [
        r'(?:https?://)?(?:www\.)?youtube\.com/watch\?v=([^&]+)',
        r'(?:https?://)?(?:www\.)?youtu\.be/([^?]+)',
        r'(?:https?://)?(?:www\.)?youtube\.com/embed/([^?]+)',
        r'(?:https?://)?(?:www\.)?youtube\.com/v/([^?]+)'
    ]
    
    for pattern in patterns:
        match = re.search(pattern, url)
        if match:
            return match.group(1)
    
    return None

In [2]:
def download_audio(youtube_url, output_path):
    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': os.path.join(output_path, '%(title)s.%(ext)s'),
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'wav',
            'preferredquality': '192',
        }],
        'quiet': True,
        'no_warnings': True,
    }
    
    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            info = ydl.extract_info(youtube_url, download=True)
            video_title = info.get('title', 'Unknown Title')
            duration = info.get('duration', 0)
            
            for file in os.listdir(output_path):
                if file.endswith('.wav'):
                    audio_file = os.path.join(output_path, file)
                    return audio_file, video_title, duration
    except Exception as e:
        print(f"Error audio download: {e}", file=sys.stderr)
    
    return None, None, None

In [None]:
def transcribe_audio_chunks(audio_file, chunk_duration_ms=30000):
    recognizer = sr.Recognizer()
    audio = AudioSegment.from_wav(audio_file)
    audio = audio.set_channels(1).set_frame_rate(16000)
    
    transcripts = []
    total_duration_ms = len(audio)
    start_time_ms = 0
    
    while start_time_ms < total_duration_ms:
        end_time_ms = start_time_ms + chunk_duration_ms
        chunk = audio[start_time_ms:end_time_ms]
        
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
            chunk.export(temp_file.name, format="wav")
            temp_file_path = temp_file.name

        try:
            with sr.AudioFile(temp_file_path) as source:
                audio_data = recognizer.record(source)
                
            try:
                text = recognizer.recognize_google(audio_data)
                if text:
                    minutes = start_time_ms // 60000
                    seconds = (start_time_ms % 60000) // 1000
                    timestamp = f"{minutes:02}:{seconds:02}"
                    
                    transcripts.append({
                        'timestamp': timestamp,
                        'text': text,
                    })
            except sr.UnknownValueError:
                pass
            except sr.RequestError as e:
                print(f"Error for Google Speech Recognition service; {e}", file=sys.stderr)
        except Exception as e:
            print(f"Error during the transcription: {e}", file=sys.stderr)
        finally:
            if os.path.exists(temp_file_path):
                os.unlink(temp_file_path)
        
        start_time_ms = end_time_ms
    
    return transcripts

In [None]:
def create_pdf(title, transcript_data, output_filename):
    doc = SimpleDocTemplate(output_filename, pagesize=letter,
                            rightMargin=72, leftMargin=72,
                            topMargin=72, bottomMargin=18)
    
    styles = getSampleStyleSheet()
    
    title_style = ParagraphStyle(
        'CustomTitle',
        parent=styles['Heading1'],
        fontSize=16,
        spaceAfter=30,
        alignment=1
    )
    
    body_style = ParagraphStyle(
        'CustomBody',
        parent=styles['Normal'],
        fontSize=11,
        leading=16,
        spaceAfter=8
    )
    
    timestamp_style = ParagraphStyle(
        'TimestampStyle',
        parent=styles['Normal'],
        fontSize=9,
        textColor='blue',
        spaceAfter=4
    )
    
    story = []
    

    story.append(Paragraph(f"Transcript: {title}", title_style))
    story.append(Spacer(1, 12))
    
    for entry in transcript_data:
        story.append(Paragraph(entry['timestamp'], timestamp_style))
        story.append(Paragraph(entry['text'], body_style))
        story.append(Spacer(1, 8))
    
    doc.build(story)


def main():
    while True:
        youtube_url = input("\nEnter YouTube video URL: ").strip()
        
        if not youtube_url:
            print("Please enter a valid YouTube URL.")
            continue
            
        video_id = extract_video_id(youtube_url)
        
        if not video_id:
            print("Invalid YouTube URL. Please try again.")
            continue
            
        break
    
    print(f"\nProcessing video ID: {video_id}")
    
    with tempfile.TemporaryDirectory() as temp_dir:
        try:
            print("Downloading audio from YouTube...")
            audio_file, video_title, duration = download_audio(youtube_url, temp_dir)
            
            if not audio_file:
                print("Failed to download audio from YouTube.")
                return
                
            print("Starting speech recognition...")
            
            transcript_data = transcribe_audio_chunks(audio_file)
            
            safe_title = re.sub(r'[^\w\s-]', '', video_title)
            safe_title = re.sub(r'[-\s]+', '_', safe_title).strip('_')
            output_filename = f"{safe_title}_transcript.pdf"
            
            print(f"Creating PDF: {output_filename}")
            if not transcript_data:
                print("No transcript was generated, speech or the audio quality is poor.")
                return
                
            create_pdf(video_title, transcript_data, output_filename)
            
            print(f"Transcript saved as: {output_filename}")
            
        except Exception as e:
            print(f"Error: {str(e)}", file=sys.stderr)



if __name__ == "__main__":
    main()

