In [1]:
#from pytube import YouTube
import os
import re
import unicodedata
import whisper
import torch

from pytubefix import YouTube
from youtube_transcript_api import YouTubeTranscriptApi
from pytubefix.cli import on_progress
from pathlib import Path

In [2]:
import yt_dlp

### Trying to download by bulk

In [None]:
def download_audio(query, max_videos=3, output_path='Audio'):
    """
    Downloads audio from YouTube based on a search query.

    Parameters:
    query (str): The search query to find YouTube videos.
    max_videos (int, optional): The maximum number of videos to download. Defaults to 3.
    output_path (str, optional): The path to save the downloaded audio files. Defaults to 'Audio'.
    """
    ydl_opts = {
        'format': 'bestaudio/best',
        'noplaylist': True,
        'outtmpl': f'{output_path}/(title)s.%(ext)s',
        'quiet': True,
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '50',
        }],
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        print(f"Downloading {max_videos} audio files: {query}...")
        info_dict = ydl.extract_info(query, download=True)

        downloaded_files =[]
        for video in info_dict['entries']:
            file_path=os.path.join(output_path, video['title'] + '.mp3')
            if os.path.exists(file_path):
                downloaded_files.append(file_path)
                print(f"Downloaded :{video['title']}")
            else:
                print(f"Failed to download :{video['title']} with path {file_path}")
        return downloaded_files


In [None]:
downloaded_audio = download_audio(query="ytsearch:programming")

In [None]:
url = "https://www.youtube.com/watch?v=zBjJUV-lzHo&pp=ygUMMSBtaW4gdmlkZW9z"
lang = "en"

# download the mp3 file

In [None]:
yt = YouTube(url, on_progress_callback = on_progress)
print("title: "+yt.title)
print("length: "+str(yt.length))

ys = yt.streams.get_audio_only()
ys.download(mp3=True)

# whisper code

In [None]:
def transcribe_audio_to_srt(audio_path, output_path=None):
    """
    Transcribe audio file using OpenAI's Whisper model and save as SRT file.

    Parameters:
    audio_path (str): Path to the audio file
    output_path (str, optional): Path to save the SRT file. 
                                 If None, uses audio filename with .srt extension

    Returns:
    str: Path to the generated SRT file
    """
    # Check if CUDA is available for GPU acceleration
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Using device: {device}")

    # Determine output path if not provided
    if output_path is None:
        output_path = os.path.splitext(audio_path)[0] + ".srt"

    # Load the Whisper model (you can choose different sizes: tiny, base, small, medium, large)
    print("Loading Whisper model...")
    model = whisper.load_model("small", device=device)

    # Load and transcribe the audio file
    print("Transcribing audio...")
    result = model.transcribe(
        audio_path,
        language=lang,  # Language specified in global variable
        task="transcribe",
        fp16=torch.cuda.is_available()  # Use float16 if on GPU
    )

    # Generate SRT content
    srt_content = []
    for i, segment in enumerate(result["segments"], 1):
        # Convert start and end times to SRT time format
        start_time = format_time(segment["start"])
        end_time = format_time(segment["end"])
        
        # Create SRT entry
        srt_content.append(str(i))  # Subtitle number
        srt_content.append(f"{start_time} --> {end_time}")  # Time codes
        srt_content.append(segment["text"].strip())  # Subtitle text
        srt_content.append("")  # Blank line between entries

    # Write SRT file
    with open(output_path, 'w', encoding='utf-8') as srt_file:
        srt_file.write("\n".join(srt_content))

    print(f"SRT file saved to: {output_path}")
    return output_path

def format_time(seconds):
    """
    Convert seconds to SRT time format (00:00:00,000)
    
    Parameters:
    seconds (float): Time in seconds

    Returns:
    str: Formatted time string
    """
    # Convert to milliseconds
    milliseconds = int((seconds - int(seconds)) * 1000)
    
    # Convert to hours, minutes, seconds
    hours, remainder = divmod(int(seconds), 3600)
    minutes, seconds = divmod(remainder, 60)
    
    return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"


# try downloading YouTube Caption

In [None]:
try:
    yt = YouTube(url)

    caption = yt.captions.get_by_language_code(lang) # Specify Arabic language
    caption.save_captions(yt.title+".srt")


except Exception as e:
    audio_file = yt.title+".mp3"
    output_file = yt.title+".txt"

    try:
        transcript = transcribe_audio_to_srt(audio_file, output_file)
        print("\nTranscription:")
        print(transcript)
    except Exception as e:
        print(f"An error occurred: {str(e)}")

In [None]:
print(f"Title: {yt.title}")
print(f"Length: {yt.length} seconds")
print(f"Views: {yt.views}")
print(f"Author: {yt.author}")
print(f"Publish Date: {yt.publish_date}")
print(f"Description: {yt.description}")