<a href="https://colab.research.google.com/github/Pushkar0655g/Generative-AI/blob/main/MULTI_40enhanced.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Install dependencies
!pip install -q git+https://github.com/openai/whisper.git
!pip install -q yt-dlp
!pip install -q transformers
!pip install -q pydrive
!apt-get install -y ffmpeg

# Import necessary libraries
from google.colab import files, auth
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
import whisper
import subprocess
import os
from transformers import MarianMTModel, MarianTokenizer

# Define video path
video_path = "/content/video.mp4"

# Function to download video from YouTube
def download_youtube_video(url, output_path):
    try:
        command = f"yt-dlp -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/mp4' -o {output_path} {url}"
        subprocess.run(command, shell=True, check=True)
        print(f"Downloaded video to {output_path}")
        return True
    except subprocess.CalledProcessError as e:
        print(f"Failed to download video from {url}: {e}")
        return False

# Function to authenticate Google Drive access
def authenticate_drive():
    auth.authenticate_user()
    gauth = GoogleAuth()
    gauth.credentials = GoogleDrive(gauth).auth.credentials
    drive = GoogleDrive(gauth)
    return drive

# Function to download video from Google Drive
def download_drive_video(file_id, output_path):
    try:
        drive = authenticate_drive()
        file = drive.CreateFile({'id': file_id})
        file.GetContentFile(output_path)
        print(f"Downloaded video from Google Drive to {output_path}")
        return True
    except Exception as e:
        print(f"Error downloading from Google Drive: {str(e)}")
        return False

# Load Whisper model
model = whisper.load_model("base")

# Function to create an SRT file
def create_srt(segments, filename):
    with open(filename, "w", encoding="utf-8") as f:
        for i, segment in enumerate(segments, 1):
            start = f"{segment['start']:.3f}".replace(".", ",")
            end = f"{segment['end']:.3f}".replace(".", ",")
            text = segment["text"].strip()
            f.write(f"{i}\n00:00:{start} --> 00:00:{end}\n{text}\n\n")
    print(f"Subtitles saved to {filename}")

# Function to process video and generate subtitles
def process_video(video_path, language):
    try:
        print("Transcribing video to English...")
        result = model.transcribe(video_path, language="en")

        # If the language is English, no translation needed
        if language == "english":
            segments = result["segments"]

        elif language == "telugu":
            from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
            model_name = "facebook/nllb-200-distilled-600M"
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            translation_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

            # Correcting the forced language ID approach
            tgt_lang = "tel_Telu"  # Telugu language code for NLLB
            segments = []
            print("Translating to Telugu using NLLB-200 Distilled...")

            for segment in result["segments"]:
                inputs = tokenizer(segment["text"], return_tensors="pt", padding=True)
                translated_tokens = translation_model.generate(**inputs, forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang))
                translated_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
                segment["text"] = translated_text
                segments.append(segment)

        else:
            if language == "hindi":
                model_name = "Helsinki-NLP/opus-mt-en-hi"
            elif language == "spanish":
                model_name = "Helsinki-NLP/opus-mt-en-es"
            elif language == "french":
                model_name = "Helsinki-NLP/opus-mt-en-fr"
            elif language == "german":
                model_name = "Helsinki-NLP/opus-mt-en-de"
            else:
                print(f"Language '{language}' not supported.")
                return None

            tokenizer = MarianTokenizer.from_pretrained(model_name)
            translation_model = MarianMTModel.from_pretrained(model_name)
            segments = []
            print(f"Translating to {language}...")
            for segment in result["segments"]:
                inputs = tokenizer(segment["text"], return_tensors="pt", padding=True)
                translated = translation_model.generate(**inputs)
                translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
                segment["text"] = translated_text
                segments.append(segment)

        # Generate SRT file
        srt_path = f"/content/subtitles_{language}.srt"
        create_srt(segments, srt_path)
        return srt_path

    except Exception as e:
        print(f"Error processing video: {str(e)}")
        return None

# Main function
def main():
    print("Choose the source of the video:")
    print("1. YouTube")
    print("2. Google Drive")
    choice = input("Enter your choice (1 or 2): ").strip()

    if choice == "1":
        youtube_link = input("Enter the YouTube video URL: ").strip()
        if not youtube_link:
            print("Error: YouTube URL cannot be empty.")
            return
        success = download_youtube_video(youtube_link, video_path)
        if not success:
            print("Failed to download YouTube video. Please check the URL and try again.")
            return
    elif choice == "2":
        drive_link = input("Enter the Google Drive file ID (from the shareable link): ").strip()
        if not drive_link:
            print("Error: Google Drive file ID cannot be empty.")
            return
        success = download_drive_video(drive_link, video_path)
        if not success:
            print("Failed to download Google Drive video.")
            return
    else:
        print("Invalid choice.")
        return

    print("\nChoose the language for subtitles:")
    print("1. English")
    print("2. Hindi")
    print("3. Spanish")
    print("4. French")
    print("5. German")
    print("6. Telugu")
    language_choice = input("Enter your choice (1, 2, 3, 4, 5, or 6): ").strip()

    language_map = {
        "1": "english",
        "2": "hindi",
        "3": "spanish",
        "4": "french",
        "5": "german",
        "6": "telugu"
    }

    if language_choice not in language_map:
        print("Invalid choice.")
        return

    language = language_map[language_choice]
    print(f"\nStarting video processing for {language} subtitles...")
    srt_path = process_video(video_path, language)

    if srt_path:
        print(f"\nSample of {language} subtitles:")
        with open(srt_path, "r", encoding="utf-8") as f:
            print(f.read(500))
        files.download(srt_path)
    else:
        print("Video failed to process. Please try again.")

# Run the main function
if __name__ == "__main__":
    main()


  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 29 not upgraded.
Choose the source of the video:
1. YouTube
2. Google Drive
Enter your choice (1 or 2): 1
Enter the YouTube video URL: https://youtu.be/RKF8vynN-1c?si=81nEiwr1i1E6lRl4
Downloaded video to /content/video.mp4

Choose the language for subtitles:
1. English
2. Hindi
3. Spanish
4. French
5. German
6. Telugu
Enter your choice (1, 2, 3, 4, 5, or 6): 2

Starting video processing for hindi subtitles...
Transcribing video to English...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/812k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/1.07M [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.10M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.39k [00:00<?, ?B/s]



pytorch_model.bin:   0%|          | 0.00/306M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/306M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

Translating to hindi...
Subtitles saved to /content/subtitles_hindi.srt

Sample of hindi subtitles:
1
00:00:0,000 --> 00:00:4,960
ऑडियो के नए संस्करण के बीच एक ड्रैग स्तर में क्या होगा

2
00:00:4,960 --> 00:00:12,400
ETMOMON GT, एस संस्करण, और ऑडियो के नए शीर्ष, RS6 के लाइन संस्करण के लिए,

3
00:00:12,400 --> 00:00:17,160
नए RS6 जीटी. हम पता लगाने के लिए जा रहे हैं क्योंकि मैं उन्हें खत्म करने जा रहा हूँ क्योंकि हम पता लगाने के लिए जा रहे हैं

4
00:00:17,160 --> 00:00:21,760
खड़े चौथाई मील. अब मैं भी ऑडियो के एक और संस्करण की दौड़ करने के लिए जा रहा हूँ

5
00:00:21,760 --> 00:00:27,600
आंतरिक 


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>