<a href="https://colab.research.google.com/github/Eladji/Video_en_subtitles/blob/main/video_subtitles.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. run this first part

In [None]:
!pip install git+https://github.com/openai/whisper.git yt-dlp setuptools-rust requests==2.31.0 deep-translator googletrans==4.0.0-rc1 pillow matplotlib
!apt install ffmpeg

Collecting git+https://github.com/openai/whisper.git
  Cloning https://github.com/openai/whisper.git to /tmp/pip-req-build-hdyyklrf
  Running command git clone --filter=blob:none --quiet https://github.com/openai/whisper.git /tmp/pip-req-build-hdyyklrf
  Resolved https://github.com/openai/whisper.git to commit ba3f3cd54b0e5b8ce1ab3de13e32122d0d5f98ab
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 45 not upgraded.


# 2. don't forget to go to the ram Disque part
then edit type of exucution and select T4 GPU

# 3. run this

In [None]:
%%writefile subtitles.py
from IPython.display import display, HTML
import ipywidgets as widgets
import whisper
import yt_dlp
import re
from googletrans import Translator
import subprocess
import sys
import matplotlib.pyplot as plt
import requests
from PIL import Image
from io import BytesIO
import os

def download_video_as_mp3(url, output_path):
    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': output_path,
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }],
        'postprocessor_args': [
            '-ar', '44100'
        ],
        'prefer_ffmpeg': True,
        'keepvideo': False,
    }

    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            info = ydl.extract_info(url, download=True)
            downloaded_file_path = ydl.prepare_filename(info).replace(info['ext'], 'mp3')
        return downloaded_file_path
    except yt_dlp.utils.DownloadError as e:
        print(f"Error downloading video: {e}")
        return None

def get_video_title(video_url):
    ydl_opts = {}

    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            info_dict = ydl.extract_info(video_url, download=False)
            video_title = info_dict.get('title', 'Unknown Title')
            return video_title
    except yt_dlp.utils.DownloadError as e:
        print(f"Error downloading video: {e}")
        return None

def get_thumbnail(url):
    ydl_opts = {
        'format': 'best',
        'quiet': True
    }

    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            info = ydl.extract_info(url, download=False)
            thumbnail_url = info.get('thumbnail')
        return thumbnail_url
    except yt_dlp.utils.ExtractorError as e:
        print(f"Error extracting video info: {e}")
        return None

def transcribe_audio(audio_path):
    model = whisper.load_model("medium")
    result = model.transcribe(audio_path, word_timestamps=True)
    segments = result['segments']

    phrases_with_timestamps = []
    for segment in segments:
        start_time = segment['start']
        end_time = segment['end']
        text = segment['text']
        phrases = re.split(r'(?<=[.!?]) +', text.strip())
        for phrase in phrases:
            phrases_with_timestamps.append((phrase, start_time, end_time))

    return phrases_with_timestamps

def translate_text(text, target_language='en'):
    translator = Translator()
    if not text:
        return ""
    try:
        translated_text = translator.translate(text, dest=target_language).text
    except Exception as e:
        print(f"Error during translation: {e}")
        return None
    return translated_text

def write_srt_file(filename, phrases_with_timestamps, target_language='en'):
    with open(filename, 'w') as file:
        index = 1
        for phrase, start, end in phrases_with_timestamps:
            translated_phrase = translate_text(phrase, target_language)
            if translated_phrase:
                start_time = format_timestamp(start)
                end_time = format_timestamp(end)
                file.write(f"{index}\n")
                file.write(f"{start_time} --> {end_time}\n")
                file.write(f"{translated_phrase}\n\n")
                index += 1

def show_image_from_url(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        img = Image.open(BytesIO(response.content))

        # Display the image using matplotlib
        plt.imshow(img)
        plt.axis('off')
        plt.show()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching the image: {e}")

def format_timestamp(seconds):
    hours, remainder = divmod(int(seconds), 3600)
    minutes, seconds = divmod(remainder, 60)
    milliseconds = int((seconds - int(seconds)) * 1000)
    return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"

def download_video_with_subtitles(video_url, subtitle_file, output_filename):
    ydl_opts = {
        'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/mp4',
        'outtmpl': 'downloaded_video.%(ext)s',
        'merge_output_format': 'mp4',
    }
    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([video_url])
            info_dict = ydl.extract_info(video_url, download=True)
            video_title = info_dict.get('title', None)
            video_ext = info_dict.get('ext', 'mp4')
            downloaded_video = f"downloaded_video.{video_ext}"

    except yt_dlp.utils.DownloadError as e:
        print(f"Error downloading video: {e}")
        return

    ffmpeg_command = [
        'ffmpeg',
        '-i', downloaded_video,
        '-vf', f"subtitles={subtitle_file}",
        output_filename
    ]
    try:
        subprocess.run(ffmpeg_command, check=True)
        print(f"Subtitles successfully embedded into {output_filename}")
    except subprocess.CalledProcessError as e:
        print(f"An error occurred during embedding subtitles: {e}")

    if os.path.exists(downloaded_video):
        os.remove(downloaded_video)

def main(url):
    url_content = requests.get(url)
    if url_content:
        thumbnail_url = get_thumbnail(url)
        if thumbnail_url:
            show_image_from_url(thumbnail_url)
            # Proceed to display input prompt
            display_input_prompt(url)
        else:
            print("No thumbnail available.")
            sys.exit()
    else:
        print("URL is either not valid or empty")
        sys.exit()

def display_input_prompt(url):
    # Display input prompt
    def on_button_clicked(b):
        confirm = confirm_input.value.strip().lower()
        if confirm == "y":
            start(url)
        elif confirm == "n":
            print("Please check the URL")
            sys.exit()
        else:
            print("Please enter either yes (y) or no (n)")

    confirm_input = widgets.Text(
        description='Confirm:',
        placeholder='Enter y or n'
    )

    confirm_button = widgets.Button(description="Submit")
    confirm_button.on_click(on_button_clicked)

    # Display the HTML and input widgets
    display(HTML("<h3>Is the thumbnail matching?</h3>"))
    display(confirm_input, confirm_button)

def start(video_url):
    output_path = 'downloads/%(title)s.%(ext)s'
    mp3_path = download_video_as_mp3(video_url, output_path)
    if mp3_path:
        phrases_with_timestamps = transcribe_audio(mp3_path)
        srt_filename = 'output.srt'
        write_srt_file(srt_filename, phrases_with_timestamps, target_language='en')
        for phrase, start, end in phrases_with_timestamps:
            translated = translate_text(phrase, 'en')
            if translated:
                print(f"Original: {phrase} [Start: {start:.2f}s, End: {end:.2f}s]")
                print(f"Translated: {translated}\n")
        video_title = get_video_title(video_url)
        output_filename = f"{video_title}.mp4"
        download_video_with_subtitles(video_url, srt_filename, output_filename)

if __name__ == "__main__":

    main(url)


Overwriting subtitles.py


# 4. here put the link of the youtube video


In [None]:
url = "put the url here"
execfile('subtitles.py')
main(url)

#5. now go in the left side bar and you should have in the folder icon you video with en subtitles  have fun