In [1]:
pip install yt-dlp openai-whisper transformers torch


Collecting yt-dlp
  Downloading yt_dlp-2025.1.26-py3-none-any.whl.metadata (172 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m172.0/172.0 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting openai-whisper
  Downloading openai-whisper-20240930.tar.gz (800 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m800.5/800.5 kB[0m [31m15.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting tiktoken (from openai-whisper)
  Downloading tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.12

In [7]:
import os
import yt_dlp
import whisper
from transformers import pipeline
from transformers import MarianMTModel, MarianTokenizer

def download_audio(youtube_url, output_path='downloads'):
    """Download audio from YouTube video."""
    os.makedirs(output_path, exist_ok=True)
    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': os.path.join(output_path, '%(title)s.%(ext)s'),
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }],
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(youtube_url, download=True)
        audio_path = ydl.prepare_filename(info).replace('.webm', '.mp3').replace('.m4a', '.mp3')
    return audio_path

def transcribe_audio(audio_path, model='base', language='en'):
    """Transcribe audio using OpenAI Whisper."""
    whisper_model = whisper.load_model(model)
    result = whisper_model.transcribe(audio_path, language=language)
    return result['text']

def translate_text(text, src_lang='en', tgt_lang='fr'):
    """Translate text to target language using MarianMT, with validation."""
    available_models = [
        'en-fr', 'en-es', 'en-de', 'en-it', 'en-ru', 'en-zh', 'fr-en', 'es-en', 'de-en', 'it-en', 'ru-en', 'zh-en'
    ]

    if src_lang == tgt_lang:
        print("Source and target language are the same. Skipping translation.")
        return text  # Return original text if translation is unnecessary

    model_name = f'Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}'

    if f"{src_lang}-{tgt_lang}" not in available_models:
        print(f"Error: Translation from {src_lang} to {tgt_lang} is not supported.")
        return text  # Return original text if translation is not possible

    try:
        tokenizer = MarianTokenizer.from_pretrained(model_name)
        model = MarianMTModel.from_pretrained(model_name)

        inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
        translated_tokens = model.generate(**inputs)
        translated_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]

        return translated_text
    except Exception as e:
        print(f"Translation failed: {e}")
        return text

def summarize_text(text, model_name='facebook/bart-large-cnn', chunk_size=1000):
    """Summarize transcribed text in chunks to avoid input length issues."""
    summarizer = pipeline('summarization', model=model_name)
    chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]

    summaries = []
    for chunk in chunks:
        summary = summarizer(chunk, max_length=200, min_length=50, do_sample=False)
        summaries.append(summary[0]['summary_text'])

    return " ".join(summaries)  # Combine all summary chunks

def analyze_safety(summary):
    """Analyze if the summary is safe for children."""
    classifier = pipeline("text-classification", model="facebook/roberta-hate-speech-dynabench-r4-target")
    result = classifier(summary)
    label = result[0]['label']

    if label in ['not_hate', 'neutral']:
        return "Safe for children"
    else:
        return "Not safe for children"

if __name__ == "__main__":
    youtube_url = input("Enter YouTube video URL: ")
    target_language = input("Enter target language code (e.g., 'fr' for French, 'es' for Spanish, 'de' for German, etc.): ")

    audio_path = download_audio(youtube_url)
    print("Audio downloaded successfully.")

    transcript = transcribe_audio(audio_path)
    print("Transcription completed.")

    summary = summarize_text(transcript)
    print("Summary:")
    print(summary)

    translated_summary = translate_text(summary, src_lang='en', tgt_lang=target_language)
    print("Translated Summary:")
    print(translated_summary)

    safety_status = analyze_safety(summary)
    print(safety_status)


Enter YouTube video URL: https://www.youtube.com/shorts/ymf3C5NlFa0
Enter target language code (e.g., 'fr' for French, 'es' for Spanish, 'de' for German, etc.): en
[youtube] Extracting URL: https://www.youtube.com/shorts/ymf3C5NlFa0
[youtube] ymf3C5NlFa0: Downloading webpage
[youtube] ymf3C5NlFa0: Downloading tv client config
[youtube] ymf3C5NlFa0: Downloading player 19d2ae9d
[youtube] ymf3C5NlFa0: Downloading tv player API JSON
[youtube] ymf3C5NlFa0: Downloading ios player API JSON
[youtube] ymf3C5NlFa0: Downloading m3u8 information
[info] ymf3C5NlFa0: Downloading 1 format(s): 251
[download] Destination: downloads/SATRANGA SONG STATUS 🥺❤️ #animalmovie #arijitsinghsong #songstatus #foryou #aesthetic #fyp #shorts.webm
[download] 100% of  416.50KiB in 00:00:00 at 2.77MiB/s   
[ExtractAudio] Destination: downloads/SATRANGA SONG STATUS 🥺❤️ #animalmovie #arijitsinghsong #songstatus #foryou #aesthetic #fyp #shorts.mp3
Deleting original file downloads/SATRANGA SONG STATUS 🥺❤️ #animalmovie #ar

  checkpoint = torch.load(fp, map_location=device)


Transcription completed.


Device set to use cpu
Your max_length is set to 200, but your input_length is only 4. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=2)


Summary:
So So So So.  So So so. So so so so So. So Soso so soso sososo. Soso Sosososo sooosoo sosoosoo. Soososoo! Sosooo!
Source and target language are the same. Skipping translation.
Translated Summary:
So So So So.  So So so. So so so so So. So Soso so soso sososo. Soso Sosososo sooosoo sosoosoo. Soososoo! Sosooo!


Device set to use cpu


Not safe for children
