<a href="https://colab.research.google.com/github/Poorna-T/Gen-AI/blob/main/Video_translation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install required libraries
!pip install gradio transformers torch gtts moviepy openai-whisper --quiet

# Import necessary libraries
import gradio as gr
import moviepy.editor as mp
import os
from gtts import gTTS
import whisper
from transformers import pipeline

# Load Whisper model (this may take a moment)
whisper_model = whisper.load_model("base")

# Define translation models for South Indian languages and their gTTS language codes
translation_models = {
    "English to Tamil": {"model": "Helsinki-NLP/opus-mt-en-ta", "tts_lang": "ta"},
    "English to Telugu": {"model": "Helsinki-NLP/opus-mt-en-te", "tts_lang": "te"},
    "English to Kannada": {"model": "Helsinki-NLP/opus-mt-en-kn", "tts_lang": "kn"},
    "English to Malayalam": {"model": "Helsinki-NLP/opus-mt-en-ml", "tts_lang": "ml"}
}

def process_video(video_file, target_language):
    # Save the uploaded video file
    input_video_path = "input_video.mp4"
    with open(input_video_path, "wb") as f:
        f.write(video_file.read())

    # Load the video using MoviePy and extract its audio
    video_clip = mp.VideoFileClip(input_video_path)
    audio_path = "extracted_audio.wav"
    video_clip.audio.write_audiofile(audio_path, logger=None)

    # Transcribe the audio using Whisper
    transcription_result = whisper_model.transcribe(audio_path)
    original_text = transcription_result["text"]

    # Set up the translation pipeline using the selected model
    model_name = translation_models[target_language]["model"]
    translator = pipeline("translation", model=model_name)
    translation = translator(original_text)[0]["translation_text"]

    # Generate new speech (TTS) for the translated text using gTTS
    tts_language = translation_models[target_language]["tts_lang"]
    tts = gTTS(text=translation, lang=tts_language)
    tts_audio_path = "translated_audio.mp3"
    tts.save(tts_audio_path)

    # Load the generated TTS audio and set it as the audio for the original video
    new_audio = mp.AudioFileClip(tts_audio_path)

    # Set new audio track to the video
    final_video = video_clip.set_audio(new_audio)
    output_video_path = "translated_video.mp4"
    final_video.write_videofile(output_video_path, codec="libx264", audio_codec="aac", logger=None)

    # Optional cleanup of temporary files
    # os.remove(input_video_path)
    # os.remove(audio_path)
    # os.remove(tts_audio_path)

    return output_video_path

# Create a Gradio interface
iface = gr.Interface(
    fn=process_video,
    inputs=[
        gr.Video(label="Input Video (English)"),
        gr.Dropdown(choices=list(translation_models.keys()), label="Select Target Language")
    ],
    outputs=gr.Video(label="Translated Video"),
    title="Video Translator with TTS",
    description=(
        "Upload a video with English speech. The system will transcribe, translate to a South Indian language, "
        "generate new speech for the translation, and output a video with the new audio track."
    )
)

# Launch the Gradio app
iface.launch(share=True)
```

Let me know if you'd like to optimize runtime, improve UI, or add features like subtitle overlay!