In [1]:
# Install dependencies
!pip install transformers
!pip install pyttsx3
!pip install ffmpeg-python
!pip install googletrans==4.0.0-rc1
!pip install pydub
!apt-get install ffmpeg
!pip install -U openai-whisper
!pip install gTTS
!apt-get update && apt-get install -y espeak-ng


Collecting pyttsx3
  Downloading pyttsx3-2.98-py3-none-any.whl.metadata (3.8 kB)
Downloading pyttsx3-2.98-py3-none-any.whl (34 kB)
Installing collected packages: pyttsx3
Successfully installed pyttsx3-2.98
Collecting ffmpeg-python
  Downloading ffmpeg_python-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Downloading ffmpeg_python-0.2.0-py3-none-any.whl (25 kB)
Installing collected packages: ffmpeg-python
Successfully installed ffmpeg-python-0.2.0
Collecting googletrans==4.0.0-rc1
  Downloading googletrans-4.0.0rc1.tar.gz (20 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting httpx==0.13.3 (from googletrans==4.0.0-rc1)
  Downloading httpx-0.13.3-py3-none-any.whl.metadata (25 kB)
Collecting hstspreload (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading hstspreload-2025.1.1-py3-none-any.whl.metadata (2.1 kB)
Collecting chardet==3.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading chardet-3.0.4-py2.py3-none-any.whl.metadata (3.2 kB)
Collecting idna==2.* (from h

In [4]:
import os
import whisper
# import ffmpeg # Not directly used, moviepy/pydub handle it
# import pyttsx3 # Not used, using gTTS instead
from googletrans import Translator
from pydub import AudioSegment
from moviepy.editor import VideoFileClip, AudioFileClip
from gtts import gTTS
from IPython.display import Video, display
import subprocess # Import subprocess module
import glob # To help find the output video file

# --- Constants (Define paths clearly) ---
SADTALKER_PATH = "/content/SadTalker" # ADJUST IF YOUR SADTALKER IS ELSEWHERE
SADTALKER_SCRIPT = os.path.join(SADTALKER_PATH, "inference.py")
INPUT_VIDEO = "/content/input_video.mp4" # Your input video
REFERENCE_AUDIO = "/content/reference_voice.wav"
CLEANED_AUDIO = "/content/cleaned_audio.wav"
TRANSLATED_AUDIO = "/content/translated_audio.wav"
# SadTalker often outputs to a directory, let's define one
SADTALKER_RESULT_DIR = "/content/sadtalker_results"
# The final expected output video name format might vary, adjust if needed
# Usually it's something like <original_video_name>_<some_hash>.mp4 inside the result dir
# We will find it dynamically later.

# --- Function Definitions (Mostly unchanged, minor improvements) ---

def extract_audio_from_video(video_file, audio_output_file):
    print(f"Extracting audio from: {video_file}")
    try:
        video = VideoFileClip(video_file)
        audio = video.audio
        audio.write_audiofile(audio_output_file)
        video.close() # Close the video file explicitly
        print(f"Audio extracted successfully to: {audio_output_file}")
        return audio_output_file
    except Exception as e:
        print(f"Error extracting audio: {str(e)}")
        return None

def clean_audio(audio_file, output_file):
    print(f"Cleaning audio file: {audio_file}")
    try:
        audio = AudioSegment.from_wav(audio_file)
        audio = audio.set_channels(1)  # Mono
        audio = audio.set_frame_rate(16000)  # 16kHz resampling (common for speech)
        audio.export(output_file, format="wav")
        print(f"Audio cleaned successfully: {output_file}")
        return output_file
    except Exception as e:
        print(f"Error cleaning audio: {str(e)}")
        return None

def transcribe_audio_with_whisper(audio_file_path, language="hi"):
    if not os.path.exists(audio_file_path):
        print(f"Error: Audio file not found at '{audio_file_path}'")
        return None

    print(f"Loading Whisper model and transcribing: {audio_file_path}")
    try:
        # Consider using "medium" for better accuracy if resources allow
        model = whisper.load_model("base")
        # You might let whisper auto-detect language if unsure: result = model.transcribe(audio_file_path, fp16=False)
        result = model.transcribe(audio_file_path, language=language, fp16=False)
        print("Transcription complete.")
        return result['text']
    except Exception as e:
        print(f"Error during Whisper transcription: {str(e)}")
        return None

def translate_text(text, target_language='en'):
    print(f"Translating text to: {target_language}")
    try:
        translator = Translator()
        translated = translator.translate(text, dest=target_language)
        print("Translation complete.")
        return translated.text
    except Exception as e:
        print(f"Error during translation: {str(e)}")
        return None


def generate_speech(text, output_audio_file):
    print(f"Generating speech for text and saving to: {output_audio_file}")
    try:
        tts = gTTS(text=text, lang='en', slow=False) # Specify language for TTS
        tts.save(output_audio_file)
        print("Speech generated successfully.")
        return output_audio_file
    except Exception as e:
        print(f"Error generating speech: {str(e)}")
        return None

# Function to replace audio in video with new audio
def replace_audio_in_video(original_video_file, new_audio_file, output_video_file="output_video.mp4"):
    try:
        video_clip = VideoFileClip(original_video_file)
        new_audio_clip = AudioFileClip(new_audio_file)
        video_with_new_audio = video_clip.set_audio(new_audio_clip)
        video_with_new_audio.write_videofile(output_video_file, codec='libx264', audio_codec='aac')
        print(f"Video saved as {output_video_file}")
    except Exception as e:
        print(f"Error replacing audio in video: {str(e)}")

# --- Main Processing Function ---
# --- Main Processing Function (Corrected - AUDIO DUBBING ONLY) ---
def process_video_for_audio_dubbing(video_file, target_language='en'):
    # Step 1: Extract and clean audio from video
    audio_file = extract_audio_from_video(video_file, REFERENCE_AUDIO) # CORRECTED LINE - added REFERENCE_AUDIO
    if not audio_file:
        print("Error extracting audio.")
        return

    cleaned_audio_file = "/content/cleaned_audio.wav"
    if not clean_audio(audio_file, cleaned_audio_file):
        print("Error cleaning audio.")
        return

    # Step 2: Transcribe audio
    transcribed_text = transcribe_audio_with_whisper(cleaned_audio_file, language="hi")
    if not transcribed_text:
        print("Error transcribing audio.")
        return
    print(f"Transcribed Text: {transcribed_text}")

    # Step 3: Translate text
    translated_text = translate_text(transcribed_text, target_language)
    if not translated_text:
        print("Error translating audio.")
        return
    print(f"Translated Text: {translated_text}")

    # Step 4: Generate speech from translated text
    generate_speech(translated_text, "translated_audio.wav")
    translated_audio = '/content/translated_audio.wav'

    # Step 5: Replace audio in video (INSTEAD OF LIP-SYNC)
    output_video_file = "output_dubbed_video.mp4" # Different output file name
    replace_audio_in_video(video_file, translated_audio, output_video_file)

    # Step 6: Display the final video
    display_video(output_video_file)

# Run the process for AUDIO DUBBING (no lip-sync)
process_video_for_audio_dubbing("/content/input_video.mp4", target_language='en')

Extracting audio from: /content/input_video.mp4
MoviePy - Writing audio in /content/reference_voice.wav




MoviePy - Done.
Audio extracted successfully to: /content/reference_voice.wav
Cleaning audio file: /content/reference_voice.wav
Audio cleaned successfully: /content/cleaned_audio.wav
Loading Whisper model and transcribing: /content/cleaned_audio.wav
Transcription complete.
Transcribed Text:  get out get out first you have to go to the bar then you have come to the same problem then you have to use me to run then you have to go to the car yes, yes, yes, get out please get out no, I will go to the car if you don't go to the car, you will leave it then you will become a missus before that you will become a late Rahul
Translating text to: en
Translation complete.
Translated Text: get out get out first you have to go to the bar then you have come to the same problem then you have to use me to run then you have to go to the car yes, yes, yes, get out please get out no, I will go to the car if you don't go to the car, you will leave it then you will become a missus before that you will become



MoviePy - Done.
Moviepy - Writing video output_dubbed_video.mp4






Moviepy - Done !
Moviepy - video ready output_dubbed_video.mp4
Video saved as output_dubbed_video.mp4
Displaying video: output_dubbed_video.mp4
