<a href="https://colab.research.google.com/github/Pushkar0655g/Generative-AI/blob/main/37.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Step 1: Install dependencies and FFmpeg
!apt-get update
!apt-get install -y ffmpeg

# Step 2: Clone Wav2Lip repository (skip if already cloned)
import os
if not os.path.exists("/content/Wav2Lip"):
    !git clone https://github.com/Rudrabha/Wav2Lip.git
%cd Wav2Lip

# Step 3: Install compatible versions of requirements
# The original requirements.txt has outdated versions, so we install compatible ones
!pip install librosa==0.8.0  # Compatible with Python 3.11
!pip install numpy==1.23.5  # Compatible with Python 3.11
!pip install opencv-python==4.11.0.86  # Latest compatible version
!pip install gdown  # For reliable downloads

# Step 4: Create necessary directories
!mkdir -p face_detection/detection/sfd
!mkdir -p checkpoints

# Step 5: Download face detection model
print("Downloading face detection model...")
!wget "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth" -O "face_detection/detection/sfd/s3fd.pth"
if not os.path.exists("face_detection/detection/sfd/s3fd.pth"):
    print("Failed to download face detection model. Please download manually from:")
    print("https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth")
    print("Then upload it here and name it 's3fd-619a316812.pth'.")
    from google.colab import files
    uploaded = files.upload()
    for filename in uploaded.keys():
        print(f"Uploaded file: {filename}")
        if "s3fd-619a316812.pth" in filename.lower():
            !mv "{filename}" "face_detection/detection/sfd/s3fd.pth"
        else:
            print(f"Uploaded file '{filename}' does not match 's3fd-619a316812.pth'. Please upload the correct file.")
            raise FileNotFoundError("Incorrect file uploaded.")

# Step 6: Attempt to download Wav2Lip checkpoint using wget with retries
checkpoint_path = "checkpoints/wav2lip_gan.pth"
import time

print("Downloading Wav2Lip checkpoint...")
for attempt in range(3):  # Try 3 times
    try:
        !wget "https://huggingface.co/Nekochu/Wav2Lip/resolve/main/wav2lip_gan.pth" -O {checkpoint_path}
        time.sleep(5)  # Wait for file to be written
        if os.path.exists(checkpoint_path):
            break
    except:
        print(f"Attempt {attempt + 1} failed. Retrying...")
        time.sleep(5)

# Step 7: If download fails, prompt for manual upload
if not os.path.exists(checkpoint_path):
    print("Automatic download failed. Please manually download 'wav2lip_gan.pth' from one of these links:")
    print("1. Hugging Face: https://huggingface.co/Nekochu/Wav2Lip/resolve/main/wav2lip_gan.pth")
    print("2. Google Drive: https://drive.google.com/uc?id=1Y7nNhfA-5W9kEyX6cWq30BZz7eA2W5h-")
    print("Steps: Open a link in a browser, download the file (should be ~433 MB), save it as 'wav2lip_gan.pth', then upload it here.")
    from google.colab import files
    uploaded = files.upload()
    for filename in uploaded.keys():
        print(f"Uploaded file: {filename}")
        if "wav2lip_gan.pth" in filename.lower():  # Case-insensitive matching
            !mv "{filename}" {checkpoint_path}
            print(f"Moved {filename} to {checkpoint_path}")
        else:
            print(f"Uploaded file '{filename}' does not match 'wav2lip_gan.pth'. Please upload the correct file.")
            raise FileNotFoundError("Incorrect file uploaded.")

# Step 8: Verify the checkpoint file size (should be ~433 MB)
file_size = os.path.getsize(checkpoint_path) / (1024 * 1024)  # Size in MB
print(f"Checkpoint file size: {file_size:.2f} MB")
if file_size < 400:  # If less than 400 MB, it's likely incomplete
    raise FileNotFoundError("Checkpoint file is incomplete. Please rerun this cell or manually upload a valid file.")

print("Wav2Lip setup complete. Please restart the runtime (Runtime > Restart runtime) and then run the next cell.")

0% [Working]            Hit:1 http://archive.ubuntu.com/ubuntu jammy InRelease
0% [Connecting to security.ubuntu.com (185.125.190.82)] [Connecting to cloud.r-project.org (108.157.                                                                                                    Hit:2 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
0% [Waiting for headers] [Connecting to security.ubuntu.com (185.125.190.82)] [Connected to cloud.r-                                                                                                    Hit:3 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
0% [Waiting for headers] [Connected to cloud.r-project.org (108.157.173.97)] [Connected to r2u.stat.                                                                                                    Hit:4 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:5 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:6 https://r2u.stat.illinois.edu/ubunt

In [2]:
# Step 1: Install gtts to ensure availability
!pip install gtts

# Step 2: Import necessary libraries
from google.colab import files
from IPython.display import Audio, display
from PIL import Image
import os
import random
import glob
import subprocess
import time
from gtts import gTTS

# Step 3: Ensure working directory is correct
%cd /content/Wav2Lip

# Step 4: Verify inference.py and checkpoint exist
if not os.path.exists("inference.py"):
    raise FileNotFoundError("inference.py not found in /content/Wav2Lip. Please ensure Step 1 was run correctly.")
if not os.path.exists("checkpoints/wav2lip_gan.pth"):
    raise FileNotFoundError("Checkpoint file 'checkpoints/wav2lip_gan.pth' not found. Please ensure Step 1 completed successfully.")

# Step 5: Upload the anchor's image
print("Please upload the anchor's image (JPG or PNG, at least 256x256 with a clear face):")
uploaded = files.upload()
if not uploaded:
    print("No image uploaded. Please run the code again and upload an image.")
    raise FileNotFoundError("No image uploaded.")
image_path = list(uploaded.keys())[0]
print(f"Uploaded image path: {image_path}")

# Step 6: Verify image file is valid
try:
    img = Image.open(image_path)
    print(f"Image dimensions: {img.size} (width, height)")
    if img.size[0] < 256 or img.size[1] < 256:
        print("Error: Image is smaller than 256x256. Please upload a larger image.")
        raise ValueError("Image too small.")
except Exception as e:
    print(f"Error opening image: {e}")
    raise FileNotFoundError("Invalid image file.")

# Step 7: List of random news scripts (10-15 words each)
scripts = [
    "Breaking news: A major storm is approaching the coast. Residents are advised to stay indoors.",
    "In sports: The local team won the championship game last night in a thrilling finish.",
    "Health update: New studies show that regular exercise can significantly reduce stress levels.",
    "Technology: A new smartphone was released today with advanced features and improved battery life.",
    "Entertainment: The latest blockbuster movie broke box office records over the weekend.",
]

# Step 8: Select a random script
script = random.choice(scripts)
print("Selected script:", script)

# Step 9: Generate audio from the script using gTTS
tts = gTTS(script)
audio_path = "audio.wav"  # Wav2Lip prefers WAV format
tts.save(audio_path)

# Step 10: Verify audio file was created and is playable
if not os.path.exists(audio_path):
    print("Audio generation failed.")
    raise FileNotFoundError("Audio file was not generated.")
print("Playing generated audio to verify:")
display(Audio(audio_path))

# Step 11: Run Wav2Lip inference with detailed error output and timeout
print("Running Wav2Lip inference. Check the output below for errors:")
print("Note: This process may take 1-5 minutes.")
output_path = "results/result_voice.mp4"
command = f"python /content/Wav2Lip/inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face {image_path} --audio {audio_path} --outfile {output_path}"
start_time = time.time()
try:
    process = subprocess.run(command, shell=True, capture_output=True, text=True, timeout=600)  # 10 minutes timeout
    execution_time = time.time() - start_time
    print(f"Wav2Lip inference completed in {execution_time:.2f} seconds.")
    print("Wav2Lip Output:")
    print(process.stdout)
    if process.stderr:
        print("Wav2Lip Errors:")
        print(process.stderr)
    if process.returncode != 0:
        print(f"Error: Inference process failed with return code {process.returncode}.")
        raise RuntimeError("Inference script failed.")
except subprocess.TimeoutExpired as e:
    print("Inference timed out after 10 minutes.")
    print("Partial Output:", e.stdout.decode())
    print("Partial Errors:", e.stderr.decode())
    raise RuntimeError("Inference took too long and was terminated.")

# Step 12: Download the generated video
video_files = glob.glob("results/*.mp4")
if video_files:
    latest_video = max(video_files, key=os.path.getctime)
    print("Generated video:", latest_video)
    files.download(latest_video)
else:
    print("No video generated. Detailed errors should be above.")
    raise RuntimeError("Video generation failed.")

/content/Wav2Lip
Please upload the anchor's image (JPG or PNG, at least 256x256 with a clear face):


Saving newsanchor.jpg to newsanchor.jpg
Uploaded image path: newsanchor.jpg
Image dimensions: (1920, 1280) (width, height)
Selected script: Technology: A new smartphone was released today with advanced features and improved battery life.
Playing generated audio to verify:


Running Wav2Lip inference. Check the output below for errors:
Note: This process may take 1-5 minutes.
Wav2Lip inference completed in 41.90 seconds.
Wav2Lip Output:
Using cuda for inference.
Number of frames available for inference: 1
(80, 577)
Length of mel chunks: 177
Load checkpoint from: checkpoints/wav2lip_gan.pth
Model loaded

Wav2Lip Errors:

  model_weights = torch.load(path_to_detector)


  0%|          | 0/1 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:07<00:00,  7.04s/it][A
100%|██████████| 1/1 [00:07<00:00,  7.04s/it]
  checkpoint = torch.load(checkpoint_path)

 50%|█████     | 1/2 [00:23<00:23, 23.32s/it]
100%|██████████| 2/2 [00:29<00:00, 13.36s/it]
100%|██████████| 2/2 [00:29<00:00, 14.85s/it]
ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-l

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>