<a href="https://colab.research.google.com/github/Shivamani162/Generative-AI-2025/blob/main/VIRTUAL_NEWS_ANCHOR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
# Step 1: Install dependencies and FFmpeg
!apt-get update
!apt-get install -y ffmpeg

# Step 2: Clone Wav2Lip repository (skip if already cloned)
import os
if not os.path.exists("/content/Wav2Lip"):
    !git clone https://github.com/Rudrabha/Wav2Lip.git
%cd Wav2Lip

# Step 3: Install compatible versions of requirements
# The original requirements.txt has outdated versions, so we install compatible ones
!pip install librosa==0.8.0  # Compatible with Python 3.11
!pip install numpy==1.23.5  # Compatible with Python 3.11
!pip install opencv-python==4.11.0.86  # Latest compatible version
!pip install gdown  # For reliable downloads

# Step 4: Create necessary directories
!mkdir -p face_detection/detection/sfd
!mkdir -p checkpoints

# Step 5: Download face detection model
print("Downloading face detection model...")
!wget "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth" -O "face_detection/detection/sfd/s3fd.pth"
if not os.path.exists("face_detection/detection/sfd/s3fd.pth"):
    print("Failed to download face detection model. Please download manually from:")
    print("https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth")
    print("Then upload it here and name it 's3fd-619a316812.pth'.")
    from google.colab import files
    uploaded = files.upload()
    for filename in uploaded.keys():
        print(f"Uploaded file: {filename}")
        if "s3fd-619a316812.pth" in filename.lower():
            !mv "{filename}" "face_detection/detection/sfd/s3fd.pth"
        else:
            print(f"Uploaded file '{filename}' does not match 's3fd-619a316812.pth'. Please upload the correct file.")
            raise FileNotFoundError("Incorrect file uploaded.")

# Step 6: Attempt to download Wav2Lip checkpoint using wget with retries
checkpoint_path = "checkpoints/wav2lip_gan.pth"
import time

print("Downloading Wav2Lip checkpoint...")
for attempt in range(3):  # Try 3 times
    try:
        !wget "https://huggingface.co/Nekochu/Wav2Lip/resolve/main/wav2lip_gan.pth" -O {checkpoint_path}
        time.sleep(5)  # Wait for file to be written
        if os.path.exists(checkpoint_path):
            break
    except:
        print(f"Attempt {attempt + 1} failed. Retrying...")
        time.sleep(5)

# Step 7: If download fails, prompt for manual upload
if not os.path.exists(checkpoint_path):
    print("Automatic download failed. Please manually download 'wav2lip_gan.pth' from one of these links:")
    print("1. Hugging Face: https://huggingface.co/Nekochu/Wav2Lip/resolve/main/wav2lip_gan.pth")
    print("2. Google Drive: https://drive.google.com/uc?id=1Y7nNhfA-5W9kEyX6cWq30BZz7eA2W5h-")
    print("Steps: Open a link in a browser, download the file (should be ~433 MB), save it as 'wav2lip_gan.pth', then upload it here.")
    from google.colab import files
    uploaded = files.upload()
    for filename in uploaded.keys():
        print(f"Uploaded file: {filename}")
        if "wav2lip_gan.pth" in filename.lower():  # Case-insensitive matching
            !mv "{filename}" {checkpoint_path}
            print(f"Moved {filename} to {checkpoint_path}")
        else:
            print(f"Uploaded file '{filename}' does not match 'wav2lip_gan.pth'. Please upload the correct file.")
            raise FileNotFoundError("Incorrect file uploaded.")

# Step 8: Verify the checkpoint file size (should be ~433 MB)
file_size = os.path.getsize(checkpoint_path) / (1024 * 1024)  # Size in MB
print(f"Checkpoint file size: {file_size:.2f} MB")
if file_size < 400:  # If less than 400 MB, it's likely incomplete
    raise FileNotFoundError("Checkpoint file is incomplete. Please rerun this cell or manually upload a valid file.")

print("Wav2Lip setup complete. Please restart the runtime (Runtime > Restart runtime) and then run the next cell.")

Hit:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:3 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:4 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:5 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:6 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:7 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Reading package lists... Done
W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
Reading package lists... Done
Building dependency tree... Done
Reading

In [7]:
# Step 1: Install gtts to ensure availability
!pip install gtts

# Step 2: Import necessary libraries
from google.colab import files
from IPython.display import Audio, display
from PIL import Image
import os
import random
import glob
import subprocess
import time
from gtts import gTTS
import shutil

# Step 3: Ensure working directory is correct
%cd /content/Wav2Lip

# Step 4: Verify inference.py and checkpoint exist
if not os.path.exists("inference.py"):
    raise FileNotFoundError("inference.py not found in /content/Wav2Lip. Please ensure Step 1 was run correctly.")
if not os.path.exists("checkpoints/wav2lip_gan.pth"):
    raise FileNotFoundError("Checkpoint file 'checkpoints/wav2lip_gan.pth' not found. Please ensure Step 1 completed successfully.")

# Step 5: Upload the anchor's image
print("Please upload the anchor's image (JPG or PNG, at least 256x256 with a clear face):")
uploaded = files.upload()
if not uploaded:
    print("No image uploaded. Please run the code again and upload an image.")
    raise FileNotFoundError("No image uploaded.")

# Sanitize the uploaded image filename
original_image_path = list(uploaded.keys())[0]
# Create a sanitized filename (remove spaces and special characters)
sanitized_image_path = "uploaded_image" + os.path.splitext(original_image_path)[1].lower()
# Replace spaces and special characters with underscores
sanitized_image_path = sanitized_image_path.replace(" ", "_").replace("(", "").replace(")", "")
# Move the uploaded file to the sanitized name
shutil.move(original_image_path, sanitized_image_path)
image_path = sanitized_image_path
print(f"Sanitized image path: {image_path}")

# Step 6: Verify image file is valid
try:
    img = Image.open(image_path)
    print(f"Image dimensions: {img.size} (width, height)")
    if img.size[0] < 256 or img.size[1] < 256:
        print("Error: Image is smaller than 256x256. Please upload a larger image.")
        raise ValueError("Image too small.")
except Exception as e:
    print(f"Error opening image: {e}")
    raise FileNotFoundError("Invalid image file.")

# Step 7: Prompt for custom script input
script = input("Please enter the script you want the anchor to say (max 100 words): ")
words = script.split()
if len(words) > 100:
    print("Warning: Script exceeds 100 words. This may cause issues with audio or video generation. Proceeding anyway...")
print(f"Entered script: {script}")

# Step 8: Prompt for language selection (optional)
language = input("Enter the language code for the audio (e.g., 'en' for English, 'fr' for French, 'es' for Spanish) [default: en]: ")
language = language.strip() if language.strip() else "en"
print(f"Using language: {language}")

# Step 9: Generate audio from the script using gTTS
tts = gTTS(text=script, lang=language)
audio_path = "audio.wav"  # Wav2Lip prefers WAV format
tts.save(audio_path)

# Step 10: Verify audio file was created and is playable
if not os.path.exists(audio_path):
    print("Audio generation failed.")
    raise FileNotFoundError("Audio file was not generated.")
print("Playing generated audio to verify:")
display(Audio(audio_path))

# Step 11: Run Wav2Lip inference with detailed error output, timeout, and enhanced settings
print("Running Wav2Lip inference. Check the output below for errors:")
print("Note: This process may take 1-5 minutes. Progress updates will be provided.")

output_path = "results/result_voice.mp4"
# Use a list of arguments instead of a single string to avoid shell parsing issues
command = [
    "python",
    "/content/Wav2Lip/inference.py",
    "--checkpoint_path", "checkpoints/wav2lip_gan.pth",
    "--face", image_path,
    "--audio", audio_path,
    "--outfile", output_path,
    "--fps", "25"
]

# Print the command for debugging
print("Executing command:", " ".join(command))

start_time = time.time()
try:
    print("Starting inference... (0%)")
    process = subprocess.run(command, capture_output=True, text=True, timeout=600)  # 10 minutes timeout, shell=False
    execution_time = time.time() - start_time
    print("Inference in progress... (50%)")
    print(f"Wav2Lip inference completed in {execution_time:.2f} seconds. (100%)")
    print("Wav2Lip Output:")
    print(process.stdout)
    if process.stderr:
        print("Wav2Lip Errors:")
        print(process.stderr)
    if process.returncode != 0:
        print(f"Error: Inference process failed with return code {process.returncode}.")
        raise RuntimeError("Inference script failed.")
except subprocess.TimeoutExpired as e:
    print("Inference timed out after 10 minutes.")
    print("Partial Output:", e.stdout.decode())
    print("Partial Errors:", e.stderr.decode())
    raise RuntimeError("Inference took too long and was terminated.")

# Step 12: Download the generated video
video_files = glob.glob("results/*.mp4")
if video_files:
    latest_video = max(video_files, key=os.path.getctime)
    print("Generated video:", latest_video)
    files.download(latest_video)
else:
    print("No video generated. Detailed errors should be above.")
    raise RuntimeError("Video generation failed.")

/content/Wav2Lip
Please upload the anchor's image (JPG or PNG, at least 256x256 with a clear face):


Saving Screenshot 2025-03-11 134017.png to Screenshot 2025-03-11 134017.png
Sanitized image path: uploaded_image.png
Image dimensions: (556, 548) (width, height)
Enter the language code for the audio (e.g., 'en' for English, 'fr' for French, 'es' for Spanish) [default: en]: en
Using language: en
Playing generated audio to verify:


Running Wav2Lip inference. Check the output below for errors:
Note: This process may take 1-5 minutes. Progress updates will be provided.
Executing command: python /content/Wav2Lip/inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face uploaded_image.png --audio audio.wav --outfile results/result_voice.mp4 --fps 25
Starting inference... (0%)
Inference in progress... (50%)
Wav2Lip inference completed in 259.28 seconds. (100%)
Wav2Lip Output:
Using cpu for inference.
Number of frames available for inference: 1
(80, 3927)
Length of mel chunks: 1224
Load checkpoint from: checkpoints/wav2lip_gan.pth
Model loaded

Wav2Lip Errors:

  model_weights = torch.load(path_to_detector)


  0%|          | 0/1 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:04<00:00,  4.63s/it][A
100%|██████████| 1/1 [00:04<00:00,  4.63s/it]
  checkpoint = torch.load(checkpoint_path,

 10%|█         | 1/10 [00:33<04:57, 33.08s/it]
 20%|██        | 2/10 [00:57<03:44, 28.10s/it]
 30%|███       | 3/10 [01:22<03:

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>