<a href="https://colab.research.google.com/github/Pushkar0655g/Generative-AI/blob/main/MULTI_40enhanced.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install dependencies
!pip install -q git+https://github.com/openai/whisper.git
!pip install -q yt-dlp
!pip install -q transformers
!apt-get install -y ffmpeg -qq

# Import necessary libraries
from google.colab import files
import whisper
import subprocess
import os
from transformers import MarianMTModel, MarianTokenizer, AutoTokenizer, AutoModelForSeq2SeqLM

# Define default video path
video_path = "/content/video.mp4"

# Function to download video from YouTube
def download_youtube_video(url, output_path):
    cookies_path = "/content/cookies.txt"
    use_cookies = False

    print("If the video requires sign-in (e.g., age-restricted), upload a cookies file (optional).")
    print("Guide: https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp")
    uploaded = files.upload()
    if "cookies.txt" in uploaded:
        with open(cookies_path, "wb") as f:
            f.write(uploaded["cookies.txt"])
        use_cookies = True
        print("Cookies uploaded successfully.")

    try:
        command = [
            "yt-dlp",
            "--user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
            "-o", output_path,
        ]
        if use_cookies:
            command.extend(["--cookies", cookies_path])
        command.append(url)

        result = subprocess.run(command, capture_output=True, text=True, check=True)
        print(f"Downloaded video to {output_path}")
        if use_cookies and os.path.exists(cookies_path):
            os.remove(cookies_path)
        return True

    except subprocess.CalledProcessError as e:
        print(f"Failed to download video from {url}:")
        print(f"Error output: {e.stderr}")
        if "Sign in to confirm you’re not a bot" in e.stderr or "age-restricted" in e.stderr:
            print("\n⚠️ This video requires authentication (e.g., sign-in or age verification).")
            if not use_cookies:
                print("You didn’t upload cookies. Options:")
                print("1. Upload a cookies.txt file (restart and try again).")
                print("   Export cookies: https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies")
                print("2. Try a different public video.")
                print("3. Download locally with yt-dlp and upload to Colab.")
                print("   Command: yt-dlp <URL> -o video.mp4")
            else:
                print("Cookies provided but still failed. Ensure they’re valid and from a signed-in browser.")
        else:
            print("Unexpected error. Check the URL or try again.")
        if use_cookies and os.path.exists(cookies_path):
            os.remove(cookies_path)
        return False

# Load Whisper model
model = whisper.load_model("base")

# Function to create an SRT file
def create_srt(segments, filename):
    with open(filename, "w", encoding="utf-8") as f:
        for i, segment in enumerate(segments, 1):
            start = f"{segment['start']:.3f}".replace(".", ",")
            end = f"{segment['end']:.3f}".replace(".", ",")
            text = segment["text"].strip()
            f.write(f"{i}\n00:00:{start} --> 00:00:{end}\n{text}\n\n")
    print(f"Subtitles saved to {filename}")

# Function to process video and generate subtitles
def process_video(video_path, language):
    if not os.path.exists(video_path):
        print(f"Error: Video file {video_path} does not exist.")
        return None

    try:
        print("Transcribing video to English...")
        result = model.transcribe(video_path, language="en")

        if language == "english":
            segments = result["segments"]
        elif language == "telugu":
            model_name = "facebook/nllb-200-distilled-600M"
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            translation_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
            tgt_lang = "tel_Telu"
            segments = []
            print("Translating to Telugu using NLLB-200 Distilled...")
            for segment in result["segments"]:
                inputs = tokenizer(segment["text"], return_tensors="pt", padding=True)
                translated_tokens = translation_model.generate(**inputs, forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang))
                translated_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
                segment["text"] = translated_text
                segments.append(segment)
        else:
            if language == "hindi":
                model_name = "Helsinki-NLP/opus-mt-en-hi"
            elif language == "spanish":
                model_name = "Helsinki-NLP/opus-mt-en-es"
            elif language == "french":
                model_name = "Helsinki-NLP/opus-mt-en-fr"
            elif language == "german":
                model_name = "Helsinki-NLP/opus-mt-en-de"
            else:
                print(f"Language '{language}' not supported.")
                return None

            tokenizer = MarianTokenizer.from_pretrained(model_name)
            translation_model = MarianMTModel.from_pretrained(model_name)
            segments = []
            print(f"Translating to {language}...")
            for segment in result["segments"]:
                inputs = tokenizer(segment["text"], return_tensors="pt", padding=True)
                translated = translation_model.generate(**inputs)
                translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
                segment["text"] = translated_text
                segments.append(segment)

        srt_path = f"/content/subtitles_{language}.srt"
        create_srt(segments, srt_path)
        return srt_path

    except Exception as e:
        print(f"Error processing video: {str(e)}")
        return None

# Main function
def main():
    print("Choose the source of the video:")
    print("1. YouTube")
    print("2. Local Drive Path (e.g., /content/video.mp4)")
    choice = input("Enter your choice (1 or 2): ").strip()

    if os.path.exists(video_path):
        os.remove(video_path)  # Clean up previous file if it’s the default path

    if choice == "1":
        youtube_link = input("Enter the YouTube video URL: ").strip()
        if not youtube_link:
            print("Error: YouTube URL cannot be empty.")
            return
        success = download_youtube_video(youtube_link, video_path)
        if not success:
            print("Failed to download YouTube video. Review the error above for next steps.")
            return
        final_video_path = video_path  # Use the downloaded file
    elif choice == "2":
        drive_path = input("Enter the local drive path (e.g., /content/M4 Macbook Air Review_ Too Easy!.mkv): ").strip()
        if not drive_path:
            print("Error: Drive path cannot be empty.")
            return
        if not os.path.exists(drive_path):
            print(f"Error: File not found at {drive_path}. Please upload the file to Colab first.")
            print("To upload: Use the Files tab on the left, or run 'from google.colab import files; files.upload()'.")
            return
        final_video_path = drive_path  # Use the provided path directly
    else:
        print("Invalid choice.")
        return

    print("\nChoose the language for subtitles:")
    print("1. English")
    print("2. Hindi")
    print("3. Spanish")
    print("4. French")
    print("5. German")
    print("6. Telugu")
    language_choice = input("Enter your choice (1, 2, 3, 4, 5, or 6): ").strip()

    language_map = {
        "1": "english",
        "2": "hindi",
        "3": "spanish",
        "4": "french",
        "5": "german",
        "6": "telugu"
    }

    if language_choice not in language_map:
        print("Invalid choice.")
        return

    language = language_map[language_choice]
    print(f"\nStarting video processing for {language} subtitles...")
    srt_path = process_video(final_video_path, language)

    if srt_path:
        print(f"\nSample of {language} subtitles:")
        with open(srt_path, "r", encoding="utf-8") as f:
            print(f.read(500))
        files.download(srt_path)
    else:
        print("Video failed to process. Please try again.")

# Run the main function
if __name__ == "__main__":
    main()