In [17]:
!pip install transformers

!pip install pyttsx3
!pip install ffmpeg-python
!pip install googletrans==4.0.0-rc1
!pip install pydub
!apt-get install ffmpeg
!apt-get update && apt-get install -y ffmpeg
!pip install -U openai-whisper
!apt-get update && apt-get install -y espeak-ng


import os
import whisper
import ffmpeg

import pyttsx3
from googletrans import Translator
from pydub import AudioSegment
from moviepy.editor import VideoFileClip, AudioFileClip
from IPython.display import Video, display

# Function to extract audio from video using moviepy (simplified)
def extract_audio_from_video(video_file):
    try:
        video = VideoFileClip(video_file)
        audio = video.audio
        audio_file = "/content/reference_voice.wav"
        audio.write_audiofile(audio_file)
        return audio_file
    except Exception as e:
        print("Error extracting audio:", str(e))
        return None

# Function to clean up audio by reducing noise and ensuring good quality
def clean_audio(audio_file, output_file):
    try:
        # Load audio file
        audio = AudioSegment.from_wav(audio_file)

        # Optionally: Change to mono, reduce noise, or lower the volume
        audio = audio.set_channels(1)  # Make it mono (1 channel)
        audio = audio.set_frame_rate(16000)  # Resample to 16kHz

        # Save the cleaned audio file
        audio.export(output_file, format="wav")
        return output_file
    except Exception as e:
        print(f"Error cleaning audio: {str(e)}")
        return None

def transcribe_audio_with_whisper(audio_file_path):
    """
    Transcribes the given audio file using OpenAI's Whisper.

    Args:
        audio_file_path (str): Path to the audio file (e.g., WAV, MP3, M4A).

    Returns:
        str: The transcribed text, or None if transcription fails or file doesn't exist.
    """
    # Check if the audio file actually exists
    if not os.path.exists(audio_file_path):
        print(f"Error: Audio file not found at '{audio_file_path}'")
        return None

    print(f"Loading Whisper model and transcribing: {audio_file_path}")
    try:
        # Load the Whisper model.
        # Common options: "tiny", "base", "small", "medium", "large"
        # Using ".en" versions (e.g., "base.en") can be slightly faster/more accurate
        # if you know the audio is definitely English.
        # "base" is a good balance of speed and accuracy to start.
        model = whisper.load_model("base") # Choose model size based on needs & resources

        # Perform the transcription
        # fp16=False is recommended if you are running on CPU or encounter precision issues.
        # If you have a capable GPU and CUDA installed, you might omit fp16=False or set it to True.
        result = model.transcribe(audio_file_path, fp16=False)

        # Extract the transcribed text
        transcribed_text = result["text"]
        print("Whisper transcription successful.")
        return transcribed_text

    except Exception as e:
        print(f"Error during Whisper transcription: {str(e)}")
        # Uncomment the following line for more detailed error information if needed
        # import traceback; traceback.print_exc()
        return None


my_audio_file = "/content/reference_voice.wav" # <-- *** REPLACE THIS WITH YOUR ACTUAL AUDIO FILE PATH ***



# Call the transcription function only if the file path is set
if my_audio_file and os.path.exists(my_audio_file): # Check again before calling
    print(f"\n--- Transcribing {os.path.basename(my_audio_file)} ---")
    transcription = transcribe_audio_with_whisper(my_audio_file)

    # Print the result
    if transcription:
        print("\n--- Transcription Result ---")
        print(transcription)
    else:
        print("\n--- Transcription failed or returned no text. ---")
elif my_audio_file:
     print(f"\n--- Cannot transcribe: File not found at '{my_audio_file}' ---")
else:
     print("\n--- Cannot transcribe: Audio file path was not set. ---")


# Function to translate text

def translate_text(text_to_translate, target_language='en'):
    translator = Translator()
    translated = translator.translate(text_to_translate, dest=target_language)
    return translated.text

# Function to generate voice from text
def generate_voice(text, output_audio_file="output_voice.wav"):
    engine = pyttsx3.init()
    engine.save_to_file(text, output_audio_file)
    engine.runAndWait()

# Function to replace the audio in the video with a new audio file
def replace_audio_in_video(original_video_file, new_audio_file, output_video_file="output_video.mp4"):
    try:
        video_clip = VideoFileClip(original_video_file)
        new_audio_clip = AudioFileClip(new_audio_file)
        video_with_new_audio = video_clip.set_audio(new_audio_clip)
        video_with_new_audio.write_videofile(output_video_file, codec='libx264', audio_codec='aac')
        print(f"Video saved as {output_video_file}")
    except Exception as e:
        print("Error replacing audio in video:", str(e))


def lip_sync_video(original_video_file, audio_file, output_video_file="/content/output_video_with_lipsync.mp4"):
    try:
        # Run Wav2Lip on the video and audio to perform lip syncing
        print(f"Running lip-syncing for {original_video_file} and {audio_file}...")

        # Assuming you have already installed Wav2Lip and set up its model path correctly
        os.system(f"python /content/Wav2Lip/inference.py --checkpoint_path /content/Wav2Lip/checkpoints/wav2lip.pth --face {original_video_file} --audio {audio_file} --outfile {output_video_file}")

        # Check if the output video is generated
        if os.path.exists(output_video_file):
            print(f"Lip-syncing completed. Video saved as {output_video_file}")

            # Call the display_video function to display the output video
            display_video(output_video_file)
        else:
            print(f"Error: {output_video_file} not generated.")

    except Exception as e:
        print("Error with lip-syncing:", str(e))

def display_video(output_video_file):
    try:
        # Display the video in the Colab notebook
        display(Video(output_video_file, embed=True))
    except Exception as e:
        print(f"Error displaying video: {str(e)}")


# Main function to process the video and dub it with translated audio
def process_video_for_dubbing(video_file, target_language='en'):
    # Extract the audio from the video
    audio_file = extract_audio_from_video(video_file)
    if audio_file is None:
        print("Failed to extract audio.")
        return

    # Clean the audio before transcription
    cleaned_audio_file = "/content/cleaned_audio.wav"  # Path to save cleaned audio
    cleaned_audio = clean_audio(audio_file, cleaned_audio_file)
    if cleaned_audio is None:
        print("Failed to clean audio.")
        return

    # Transcribe the cleaned audio to text
    transcribed_text = transcribe_audio_with_whisper(cleaned_audio)
    if transcribed_text is None:
        print("Failed to transcribe audio.")
        return

    print("Transcribed Text: ", transcribed_text)

    # Translate the transcribed text
    translated_text = translate_text(transcribed_text, target_language)
    print("Translated Text: ", translated_text)

    engine = pyttsx3.init()
    voices = engine.getProperty('voices')
    for voice in voices:
      print(f"Voice: {voice.name}, ID: {voice.id}")


    # Generate voice from the translated text
    generate_voice(translated_text, "translated_audio.wav")
    translated_audio='/content/translated_audio.wav'

    # Perform lip-syncing using Wav2Lip
    lip_sync_video(video_file, translated_audio)




# Run the process for dubbing
process_video_for_dubbing("/content/input_video.mp4", target_language='en')  # Change to your target language


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 44 not upgraded.
Hit:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:3 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:4 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:5 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:6 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:7 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Reading package lists... Done
W: Skipping acquire of configured



MoviePy - Done.
Loading Whisper model and transcribing: /content/cleaned_audio.wav
Whisper transcription successful.
Transcribed Text:   get out get out first you have to go to the bar then you have come to the same problem then you have to use me to run then you have to go to the car yes, yes, yes, get out please get out no, I will go to the car if you don't go to the car, you will leave it then you will become a missus before that you will become a late Rahul
Translated Text:  get out get out first you have to go to the bar then you have come to the same problem then you have to use me to run then you have to go to the car yes, yes, yes, get out please get out no, I will go to the car if you don't go to the car, you will leave it then you will become a missus before that you will become a late Rahul
Voice: Afrikaans, ID: Afrikaans
Voice: Amharic, ID: Amharic
Voice: Aragonese, ID: Aragonese
Voice: Arabic, ID: Arabic
Voice: Assamese, ID: Assamese
Voice: Azerbaijani, ID: Azerbaijani
Voi

In [11]:
!pip install moviepy
!pip install pyttsx3
!pip install transformers
!pip install googletrans==4.0.0-rc1
!pip install pydub
!pip install ffmpeg-python
!apt-get install ffmpeg
!pip install --upgrade pip

!git clone https://github.com/Rudrabha/Wav2Lip.git
%cd Wav2Lip
!pip install -r requirements.txt


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 44 not upgraded.
Cloning into 'Wav2Lip'...
remote: Enumerating objects: 403, done.[K
remote: Total 403 (delta 0), reused 0 (delta 0), pack-reused 403 (from 1)[K
Receiving objects: 100% (403/403), 540.33 KiB | 4.16 MiB/s, done.
Resolving deltas: 100% (225/225), done.
/content/Wav2Lip/Wav2Lip/Wav2Lip
Collecting librosa==0.7.0 (from -r requirements.txt (line 1))
  Using cached librosa-0.7.0.tar.gz (1.6 MB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting numpy==1.17.1 (from -r requirements.txt (line 2))
  Using cached numpy-1.17.1.zip (6.5 MB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
[31mERROR: Ignored the following yanked versions: 3.4.11.39, 3.4.17.61, 4.4.0.42, 4.4.0.44, 4.5.4.58, 4.5.5.62, 4.7.0.68[0m[31m
[0m[31mERROR: Ignored the following versi