In [15]:
import yt_dlp
from pytubefix import Search, YouTube
import whisper
import torch
import os
import re

In [16]:
def format_time_to_srt(seconds):
    """Helper function to convert time in seconds to SRT format (HH:MM:SS,MS)"""
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    seconds = seconds % 60
    milliseconds = int((seconds - int(seconds)) * 1000)
    return f"{hours:02}:{minutes:02}:{int(seconds):02},{milliseconds:03}"

In [17]:
def transcribe_with_whisper(audio_path, model, language="en"):
    # Transcription function with SRT output
    result = model.transcribe(
        audio_path,
        language=language,  # Language specified in global variable
        task="transcribe",
        fp16=torch.cuda.is_available(),  # Use float16 if on GPU
        verbose=False
    )

    # Extract the segments with timestamps and text
    segments = result["segments"]

    # Prepare the SRT formatted transcription
    srt_transcription = ""
    for i, segment in enumerate(segments, 1):
        start_time = segment["start"]
        end_time = segment["end"]
        text = segment["text"]
        
        # Convert times from seconds to SRT format (HH:MM:SS,MS)
        start_time_srt = format_time_to_srt(start_time)
        end_time_srt = format_time_to_srt(end_time)
        
        # Append each segment in SRT format (ensuring correct spacing)
        print(text)
        srt_transcription += f"{i}\n{start_time_srt} --> {end_time_srt}\n{text}\n\n"

    # Remove the trailing newline after the last subtitle (to avoid extra blank line)
    srt_transcription = srt_transcription.strip()

    # Save the SRT transcription to a file
    transcription_path = f"{os.path.splitext(audio_path)[0]}_transcription.srt"
    with open(transcription_path, "w",encoding="utf-8") as f:
        f.write(srt_transcription)  # Write without extra newlines
    
    print(f"SRT transcription generated with Whisper for {audio_path}.")
    return transcription_path

In [18]:
def download_audio_and_transcription(query, num_videos=1, language="en"):
    # Initialize Whisper model and check for CUDA support
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Using device: {device}")
    model = whisper.load_model("turbo", device=device)
    
    # Search for the video
    search = Search(query)
    videos = search.videos  # Use .videos instead of .results
    if not videos:
        print("No videos found for query.")
        return
    
    processed_count = 0
    i = 0  # Index for the video list
    
    # Iterate until we process the required number of videos
    while processed_count < num_videos:
        if i >= len(videos):  # If there are not enough videos, restart search or end
            print("Not enough videos found, stopping after processing.")
            break
        
        video = videos[i]
        yt = YouTube(video.watch_url)
        
        # Get the video length in minutes
        video_length = yt.length / 60  # Convert seconds to minutes
        
        # Check if the video length is within the desired range (5 to 15 minutes)
        if video_length < 5 or video_length > 15:
            print(f"Skipping video '{yt.title}' (Length: {int(video_length)} minutes)")
            i += 1  # Move to the next video
            continue

        video_url = video.watch_url
        #video_title = video.title.replace(" ", "_")
        # Remove all invalid characters in a file name using regular expressions
        video_title = re.sub(r"[^\w]", "_", video.title)

        # Download the audio
        audio_path = f"{video_title}.mp3"
        ydl_opts = {
            'format': 'bestaudio/best',
            'outtmpl': f"{video_title}.%(ext)s",  # Adjust the template to avoid double extensions
            'postprocessors': [{
                'key': 'FFmpegExtractAudio',
                'preferredcodec': 'mp3',
                'preferredquality': '192',
            }],
            'quiet': True,
        }
        
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            print(f"Downloading audio for {video_title}...")
            ydl.download([video_url])
        
        # Check for YouTube transcription
        captions = video.captions
        if captions:
            for lang in captions:
                if lang.code == language:
                    transcript = lang.generate_srt_captions()
                    transcription_path = f"{video_title}_transcription.srt"
                    with open(transcription_path, "w",encoding="utf-8") as f:
                        f.write(transcript)
                    print(f"Downloaded YouTube transcription for {video_title}.")
                    break
            else:
                print(f"No English transcription available for {video_title}, generating with Whisper...")
                transcription_path = transcribe_with_whisper(audio_path, model, language)
        else:
            print(f"No transcription available for {video_title}, generating with Whisper...")
            transcription_path = transcribe_with_whisper(audio_path, model, language)
        
        print(f"Audio and transcription saved for {video_title}.")
        processed_count += 1
        i += 1

    print(f"Processed {num_videos} videos.")

In [19]:
# Usage
query = "دوس اونلاين"
num_videos = 5  # Modify this number to download and transcribe N videos
language = "ar"  # Language code for YouTube transcription (if available)
download_audio_and_transcription(query, num_videos, language)

Using device: cuda
Skipping video 'هذا الكتاب خسرني مليون جنيه' (Length: 22 minutes)
Skipping video 'ذاكر معي ٣ ساعات بتقنية البومودورو  - Study with me' (Length: 180 minutes)
Downloading audio for مستحيل_تنسى_الفرق_بين_Do_Does__Did___قواعد_اللغة_الإنجليزية...
No transcription available for مستحيل_تنسى_الفرق_بين_Do_Does__Did___قواعد_اللغة_الإنجليزية, generating with Whisper...


100%|██████████| 35484/35484 [02:44<00:00, 215.89frames/s]


 مرحبا بكم أيها الرائعون في قناتكم English State لتعلم اللغة الإنجليزية
 في درس اليوم سوف أشرح الفرق بين Do, Does and Did
 هناك الكثير من المتعلمين للغة الإنجليزية يتساءلون عن الفرق بين هذه الكلمات
 وخاصة المبتدئين في تعلم اللغة الإنجليزية
 إذن لكي تفهموا هذا الدرس بشكل واضح شاهدوه حتى النهاية لكي تستفيدوا منه بشكل جيد
 إذن شرط أن تشاهدوا هذا الدرس حتى النهاية وأعيدكم أنكم سوف تفهمون بشكل واضح
 ولن يبقى لكم أي أسئلة
 إذن بالنسبة لي Do, Does and Did هذه الكلمات جاءت من مصدر To Do وهو أن يفعل أو أن يصنع
 To Do
 إذن Do, Does and Did جاءت من To Do وهو مصدر
 إذن متى نستعمل Do و Does و Did
 نستعمل Do و Does في الحاضر البسيط
 في الزمن الحاضر
 إذن بالنسبة للزمن الحاضر فإننا نقول I Do
 أنا أفعل
 إذن دائما نتحدث هنا في Present Simple في الحاضر
 إذن حينما أريد أن أصرف To Do في الحاضر
 أقول I Do, You Do, He Does, She Does, It Does, We Do, You Do and They Do
 إذن هذا فيما يتعلق بالزمن الحاضر
 نصرف To Do إلى I Do, You Do, He Does, She Does, It Does, We Do, You Do and They Do
 لاحظوا معي جيدا بأننا ن

KeyboardInterrupt: 