In [1]:
# Step 1: Install dependency
!pip install numpy
!pip install librosa
!pip install opencv-python
!pip install tqdm
!pip install batch_face
!pip install ffmpeg-python
!pip install pyaudio
!pip install yt-dlp
!pip install torch==2.3.0+cu118 torchvision==0.18.0+cu118 torchaudio==2.3.0 -f https://download.pytorch.org/whl/torch_stable.html

Collecting numpy
  Using cached numpy-1.26.4-cp311-cp311-win_amd64.whl.metadata (61 kB)
Using cached numpy-1.26.4-cp311-cp311-win_amd64.whl (15.8 MB)
Installing collected packages: numpy
Successfully installed numpy-1.26.4
Collecting librosa
  Using cached librosa-0.10.2.post1-py3-none-any.whl.metadata (8.6 kB)
Collecting audioread>=2.1.9 (from librosa)
  Using cached audioread-3.0.1-py3-none-any.whl.metadata (8.4 kB)
Collecting scipy>=1.2.0 (from librosa)
  Using cached scipy-1.13.1-cp311-cp311-win_amd64.whl.metadata (60 kB)
Collecting scikit-learn>=0.20.0 (from librosa)
  Using cached scikit_learn-1.5.0-cp311-cp311-win_amd64.whl.metadata (11 kB)
Collecting joblib>=0.14 (from librosa)
  Using cached joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting numba>=0.51.0 (from librosa)
  Using cached numba-0.59.1-cp311-cp311-win_amd64.whl.metadata (2.8 kB)
Collecting soundfile>=0.12.1 (from librosa)
  Using cached soundfile-0.12.1-py2.py3-none-win_amd64.whl.metadata (14 kB)
Collecting

#### Before this step you need to install FFmpeg from official site
#### After add system env variable "Path" with path to bin folder in FFmpeg 
#### Example: Path - C:\Users\User\Documents\Projects\ffmpeg-master-latest-win64-gpl-shared\bin

In [3]:
import os
import subprocess
from urllib import parse as urlparse

# Step 1: Define YouTube URL and Video ID
YOUTUBE_URL = 'https://www.youtube.com/watch?v=quxzCrGk32s'
url_data = urlparse.urlparse(YOUTUBE_URL)
query = urlparse.parse_qs(url_data.query)
YOUTUBE_ID = query["v"][0]

# Remove previous input video
if os.path.isfile('input_vid.mp4'):
    os.remove('input_vid.mp4')

# Trim video (start, end) seconds
start = 0
end = 5
interval = end - start

# Step 2: Download and trim the YouTube video
subprocess.run(['yt-dlp', '-f', 'bestvideo[ext=mp4]', '--output', "youtube.%(ext)s", f'https://www.youtube.com/watch?v={YOUTUBE_ID}'])

# Cut the video using FFmpeg
subprocess.run(['ffmpeg', '-y', '-i', 'youtube.mp4', '-ss', str(start), '-t', str(end), '-async', '1', 'input_vid.mp4'])
# Display video.
from IPython.display import HTML
from base64 import b64encode

def show_video(path):
    mp4 = open(path, 'rb').read()
    data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
    return HTML(f"""<video width=600 controls><source src="{data_url}"></video>""")

# Preview the trimmed video
show_video('input_vid.mp4')

In [4]:
import os
from IPython.display import Audio
from IPython.display import display

upload_method = 'Path'  # Change this to 'Record' or 'Path'

# Remove previous input audio
if os.path.isfile('input_audio.wav'):
    os.remove('input_audio.wav')

def display_audio():
    display(Audio('input_audio.wav'))

if upload_method == 'Record':
    import pyaudio
    import wave

    CHUNK = 1024
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    RATE = 16000
    RECORD_SECONDS = 5
    WAVE_OUTPUT_FILENAME = "input_audio.wav"

    p = pyaudio.PyAudio()

    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)

    print("Recording...")

    frames = []

    for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
        data = stream.read(CHUNK)
        frames.append(data)

    print("Finished recording.")

    stream.stop_stream()
    stream.close()
    p.terminate()

    wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(b''.join(frames))
    wf.close()

    display_audio()

elif upload_method == 'Path':
    # Add the full path to your audio
    PATH_TO_YOUR_AUDIO = 'C:\\Users\\Lenovo\\Documents\\Bootcamp\\Projects\\Final_Project\\Trump_Bootcamp (mp3cut.net).wav'

    # Load audio with specified sampling rate
    import librosa
    audio, sr = librosa.load(PATH_TO_YOUR_AUDIO, sr=None)

    # Save audio with specified sampling rate
    import soundfile as sf
    sf.write('input_audio.wav', audio, sr, format='wav')

    display_audio()

In [6]:
# Define the parameters for the Wav2Lip model
pad_top = 0
pad_bottom = 10
pad_left = 0
pad_right = 0
rescaleFactor = 1
nosmooth = False

# Set the full path to the Wav2Lip model and input files
checkpoint_path = "C:\\Users\\Lenovo\\Documents\\Bootcamp\\Projects\\Wav2LipTest\\Wav2Lip\\checkpoints\\wav2lip_gan.pth"
input_face = "C:\\Users\\Lenovo\\Documents\\Bootcamp\\Projects\\Wav2LipTest\\input_vid.mp4"
input_audio = "C:\\Users\\Lenovo\\Documents\\Bootcamp\\Projects\\Wav2LipTest\\input_audio.wav"
results_path = "C:\\Users\\Lenovo\\Documents\\Bootcamp\\Projects\\Wav2LipTest\\Wav2Lip\\results"

# Run the Wav2Lip model (You have to be in Wav2Lip folder)
# {"--nosmooth" if nosmooth else ""}
os.chdir("Wav2Lip")
!python inference.py --checkpoint_path {checkpoint_path} --face {input_face} --audio {input_audio} --pads {pad_top} {pad_bottom} {pad_left} {pad_right} --resize_factor {rescaleFactor}

# Preview the output video
print("Final Video Preview")
print("Find the output video at", 'Wav2Lip/results/result_voice.mp4')
show_video('C:\\Users\\Lenovo\\Documents\\Bootcamp\\Projects\\Wav2LipTest\\Wav2Lip\\results\\result_voice.mp4')

Using cuda for inference.
Load checkpoint from: C:\Users\Lenovo\Documents\Bootcamp\Projects\Wav2LipTest\Wav2Lip\checkpoints\wav2lip_gan.pth
Models loaded
Reading video frames...
Number of frames available for inference: 120
(80, 410)
Length of mel chunks: 120
face detect time: 12.902933359146118
wav2lip prediction time: 20.203838109970093
Final Video Preview
Find the output video at Wav2Lip/results/result_voice.mp4



  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:20<00:00, 20.20s/it]
100%|██████████| 1/1 [00:20<00:00, 20.20s/it]
ffmpeg version N-115585-g7d46ab9e12-20240607 Copyright (c) 2000-2024 the FFmpeg developers
  built with gcc 13.2.0 (crosstool-NG 1.26.0.65_ecc5e41)
  configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-w64-mingw32- --arch=x86_64 --target-os=mingw32 --enable-gpl --enable-version3 --disable-debug --disable-w32threads --enable-pthreads --enable-iconv --enable-libxml2 --enable-zlib --enable-libfreetype --enable-libfribidi --enable-gmp --enable-fontconfig --enable-libharfbuzz --enable-libvorbis --enable-opencl --disable-libpulse --enable-libvmaf --disable-libxcb --disable-xlib --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enab