<a href="https://colab.research.google.com/github/anothermartz/Easy-Wav2Lip/blob/Fast/EZWav2Lip_v5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Welcome to my Easy Wav2Lip colab!

My goal is to make lipsyncing with this tool easy, fast and great looking!

Please view the GitHub for instructions: https://github.com/anothermartz/Easy-Wav2Lip/tree/Fast#best-practices

In [None]:
#@title <h1>Step 1: Setup "Easy-Wav2Lip"</h1> With one button: it's really that easy!
#@markdown 👈 Click that little circle play button first - it will ask for Google Drive access: <br>
#@markdown > Accept if your files are on Google Drive (recommended).
#@markdown <br> Alternatively, you can click deny and upload files manually, but this is slower.
#check if already installed
import os
import sys
if os.path.exists('installed.txt'):
  sys.exit('Step 1 has already been run on this instance!')


import torch
if not torch.cuda.is_available():
  import sys
  sys.exit('No GPU in runtime. Please go to the "Runtime" menu, "Change runtime type" and select "GPU".')
try:
  from google.colab import drive
  drive.mount('/content/drive')
except:
  print("google drive not linked")
import tensorflow as tf
import os
import re
import time
start_time = time.time()

giturl = 'https://github.com/anothermartz/cog-Wav2Lip.git'
gitbranch = 'testing'
#clone git in a way that allows me to change it very easily for when I rename it
!git clone -b {gitbranch} {giturl}
regex = r'([^\/]+)(?=\.git)'
match = re.search(regex, giturl)
project_dir = match.group(1)
%cd '{project_dir}'
!pip install batch_face
#!wget 'https://github.com/anothermartz/Easy-Wav2Lip/releases/download/Prerequesits/Wav2Lip.pth' -O 'checkpoints/Wav2Lip.pth'
!wget 'https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip_gan.pth' -O 'checkpoints/wav2lip_gan.pth'
checkpoint_path="/content/"+project_dir+"/checkpoints/Wav2Lip.pth"

#imports and stuff
from easy_functions import *
from base64 import b64encode
import warnings
import csv
import gdown
import io
import json
import pandas as pd
import re
import requests
import shutil
import subprocess
from numpy.lib import stride_tricks
from IPython.display import HTML, Audio, clear_output
from sklearn.ensemble import RandomForestRegressor
from sklearn.exceptions import DataConversionWarning
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from basicsr.utils.download_util import load_file_from_url
from face_parsing import init_parser
!pip install realesrgan --quiet
#clear_output()

from esrgan.upsample import load_sr
import torch, face_detection

face_detection.FaceAlignment(face_detection.LandmarksType._2D, flip_input=False, device='cuda')

print("Loading segmentation network...")
seg_net = load_file_from_url(
  url='https://github.com/anothermartz/Easy-Wav2Lip/releases/download/Prerequesits/face_segmentation.pth',
  model_dir='checkpoints', progress=True, file_name=None)
seg_net = init_parser('checkpoints/face_segmentation.pth')

print("Loading upscalers")
load_sr('gfpgan')
load_sr('RestoreFormer')

model_path = load_file_from_url(
  url='https://github.com/anothermartz/Easy-Wav2Lip/releases/download/Prerequesits/Wav2Lip.pth',
  model_dir='checkpoints', progress=True, file_name='Wav2Lip.pth')
model = load_model(checkpoint_path)
print ("Model loaded")

end_time = time.time()
elapsed_time = end_time - start_time
formatted_setup_time = format_time(elapsed_time)
with open('installed.txt', 'w') as f:
    f.write('Wav2Lip has been installed.')
clear_output()
print()
print("Installation complete, move to Step 2!")
print(f"Execution time: {formatted_setup_time}")

In [2]:
import os
import sys
if not os.path.exists('/content/'+project_dir+'/installed.txt'):
  sys.exit('Step 1 has not been run in this instance! Please run step 1 each time you disconnect from a runtime.')

############################## user inputs #####################################
#@markdown <h1>Step 2: Select inputs:</h1>

#@markdown On destktop: <h1></h1>Click the folder icon ( 📁 ) at the left edge of colab, find your video, right click, copy path, paste it below:
#@markdown<br></br>
#@markdown On mobile: <h1></h1>Tap the hamburger button ( ☰ ) at the top left, click show file browser, long tap (hold) on Easy-Wav2Lip, upload, select your file(s), find them in the file browser, copy path, paste below:
video_or_image = "" #@param {type:"string"}
vocal_track = "" #@param {type:"string"}
#@markdown > Keep vocal_track blank if your video already has the desired speech audio encoded into it.
#@markdown # Quality
quality = "Fast" # @param ["Fast", "Improved", "Enhanced"]
#@markdown * <b><u>Fast</u></b>: Wav2Lip <br>
#@markdown * <b><u>Improved</u></b>: Wav2Lip with a mask to remove the square around the face <br>
#@markdown * <b><u>Enhanced</u></b>: Wav2Lip + mask + GFPGAN upscaling done on the face
#preview_quality = False #@param {type:"boolean"} - coming soon!
output_height = "full resolution" #@param ["half resolution", "full resolution", "480"] {allow-input: true}

subtle_upscaling=True

if quality == "Fast":
  no_sr=True
  better_mask=False
if quality == "Improved":
  no_sr=True
  better_mask=True
if quality == "Enhanced":
  no_sr=False
  better_mask=True
  subtle_upscaling=False
#@markdown
#------------------------------*Step 3*----------------------------------------!
#@markdown <h1>👈 Step 3:  Click the little circle play button on this cell! </h1> (Or press ctrl + F10) - Then wait for processing to complete.
# scale padding with resolution
#@markdown <br>

#@markdown ---
#@markdown <br>

#@markdown # [Advanced tweaking](https://github.com/anothermartz/Easy-Wav2Lip/tree/Fast#advanced-tweaking) (optional) </h1>Just ignore all of this if you are new, or click the blue titles for instructions.
wav2lip_version = "Wav2Lip" # @param ["Wav2Lip", "Wav2Lip_GAN"]
if wav2lip_version=="Wav2Lip_GAN":
  checkpoint_path = '/content/'+project_dir+'/checkpoints/wav2lip_gan.pth'
else:
  checkpoint_path = '/content/'+project_dir+'/checkpoints/Wav2Lip.pth'

nosmooth = True #@param {type:"boolean"}
##@markdown ### [Upscaling:](https://github.com/anothermartz/Easy-Wav2Lip#upscaling)
#upscaling = "gfpgan" # @param ["none", "gfpgan", "RestoreFormer"]
#if upscaling=="none":
#  no_sr=True
#else:
#  no_sr=False
#subtle_upscaling = False #@param {type:"boolean"}
#better_mask = False #@param {type:"boolean"}
#@markdown ### [Padding:](https://github.com/anothermartz/Easy-Wav2Lip/tree/Fast#padding)</h1> (Up, Down, Left, Right) <br>
U = 0 #@param {type:"slider", min:-40, max:100, step:5}
D = 0 #@param {type:"slider", min:-40, max:100, step:5}
L = 0 #@param {type:"slider", min:-40, max:100, step:5}
R = 0 #@param {type:"slider", min:-40, max:100, step:5}
#@markdown # [Other options:](https://github.com/anothermartz/Easy-Wav2Lip/tree/Fast#other-options)
resolution_scale = 1
res_custom = False
if output_height == 'half resolution':
  resolution_scale = 2
elif output_height == 'full resolution':
  resolution_scale = 1
else:
  res_custom = True
  resolution_scale = 3

in_width, in_height, in_fps, in_length = get_video_details(video_or_image)
out_height = round(in_height / resolution_scale)

if res_custom:
  out_height = int(output_height)
fps_for_static_image = 30
batch_process = False #@param {type:"boolean"}
output_suffix = "_EZWav2Lip" #@param {type:"string"}
include_settings_in_suffix = False #@param {type:"boolean"}
if include_settings_in_suffix:
  if wav2lip_version=="Wav2Lip_GAN":
    output_suffix = f'{output_suffix}_GAN'
  output_suffix = f'{output_suffix}_{quality}'
  output_suffix = f'{output_suffix}_{out_height}'
  if nosmooth:
    output_suffix = f'{output_suffix}_nosmooth1'
  else:
    output_suffix = f'{output_suffix}_nosmooth0'
  if U!=0 or D!=0 or L!=0 or R!=0:
    output_suffix = f'{output_suffix}_pads-'
    if U!=0:
      output_suffix = f'{output_suffix}U{U}'
    if D!=0:
      output_suffix = f'{output_suffix}D{D}'
    if L!=0:
      output_suffix = f'{output_suffix}L{L}'
    if R!=0:
      output_suffix = f'{output_suffix}R{R}'

preview_input = False #@param {type:"boolean"}

rescaleFactor = str(round(1 // resolution_scale))
pad_up = str(round(U * resolution_scale))
pad_down = str(round(D * resolution_scale))
pad_left = str(round(L * resolution_scale))
pad_right = str(round(R * resolution_scale))
################################################################################


######################### reconstruct input paths ##############################
# check video_or_image exists
if not os.path.exists(video_or_image):
  sys.exit(f'Could not find file: {video_or_image}')
# extract each part of the path
filename = re.search(r"[^\/]+(?=\.\w+$)", video_or_image).group()
file_type = os.path.splitext(video_or_image)[1]
folder = re.search(r"^(.*\/)[^\/]+$", video_or_image).group(1)
filenumber_match = re.search(r"\d+$", filename)
if filenumber_match: # if there is a filenumber - extract it
  filenumber = str(filenumber_match.group())
  filenamenonumber = re.sub(r"\d+$", "", filename)
else: # if there is no filenumber - make it blank
  filenumber = ""
  filenamenonumber = filename

# if vocal_track is blank - use the video as audio
if vocal_track == "":
  vocal_track = video_or_image
# if not, check that the vocal_track file exists
else:
  if not os.path.exists(vocal_track):
    sys.exit(f'Could not find file: {vocal_track}')
# extract each part of the path:
audio_filename = re.search(r"[^\/]+(?=\.\w+$)", vocal_track).group()
audio_file_type = os.path.splitext(vocal_track)[1]
audio_folder = re.search(r"^(.*\/)[^\/]+$", vocal_track).group(1)
audio_filenumber_match = re.search(r"\d+$", audio_filename)
if audio_filenumber_match: #if there is a filenumber - extract it
  audio_filenumber = str(audio_filenumber_match.group())
  audio_filenamenonumber = re.sub(r"\d+$", "", audio_filename)
else: # if there is no filenumber - make it blank
  audio_filenumber = ""
  audio_filenamenonumber = audio_filename
################################################################################

# set process_failed to False so that it may be set to True if one or more processings fail
process_failed = False
temp_output = '/content/'+project_dir+'/temp/output.mp4'
temp_folder = '/content/'+project_dir+'/temp/'
last_input_video = None
last_input_audio = None

#if file_type == '.gif':
#  sys.exit("I'm sorry but .gif files aren't supported!")

#if file_type == '.jpg' or '.jpeg' or '.png' or '.bmp' or '.tiff' or '.tif':
#  input_is_image = True
#else:
#  input_is_image = False

start_time = time.time()
#--------------------------Batch processing loop-------------------------------!
while True:

  # construct input_video
  input_video = folder + filenamenonumber + str(filenumber) + file_type
  input_videofile = re.search(r"[^\/]+$", input_video).group()
  # construct input_audio
  input_audio = audio_folder + audio_filenamenonumber + str(audio_filenumber) + audio_file_type
  input_audiofile = re.search(r"[^\/]+$", input_audio).group()
  # see if filenames are different:
  if filenamenonumber + str(filenumber) != audio_filenamenonumber + str(audio_filenumber):
    output_filename = filenamenonumber + str(filenumber) + "_" + audio_filenamenonumber + str(audio_filenumber)
  else:
    output_filename = filenamenonumber + str(filenumber)
  # construct output_video
  output_video = folder + output_filename + output_suffix + '.mp4'
  output_videofile = re.search(r"[^\/]+$", output_video).group()

  # remove last outputs
  directory_path = '/content/'+project_dir+'/temp'
  if os.path.exists(directory_path):
    shutil.rmtree(directory_path)
  os.makedirs(directory_path)

  # preview inputs (if enabled)
  if preview_input:
    print("input video:")
    show_video(input_video)
    if vocal_track != "":
      print("input audio:")
      display(Audio(input_audio))
    else:
      print("using", input_video, "for audio")
    print("You may want to check now that they're the correct files!")

  last_input_video = input_video
  last_input_audio = input_audio
  shutil.copy(input_video, temp_folder)
  shutil.copy(input_audio, temp_folder)
  temp_input_video = temp_folder + input_videofile
  temp_input_audio = temp_folder + input_audiofile

  if os.path.isfile(temp_output):
    os.remove(temp_output)

  #----------------------------Process the inputs!-----------------------------!
  print(f"Processing {input_videofile} using {input_audiofile} for audio")
  #start processing timer
  start_time = time.time()


  #execute Wav2Lip & upscaler
  !python 'inference.py' \
  --face "{temp_input_video}" \
  --audio "{temp_input_audio}" \
  --outfile "{temp_output}" \
  --pads {pad_up} {pad_down} {pad_left} {pad_right} \
  --checkpoint_path {checkpoint_path} \
  --sr_model 'gfpgan' \
  --out_height {out_height} \
  --fps "{fps_for_static_image}" \
  --fullres {resolution_scale} \
  {'--nosmooth ' if nosmooth else ''} {'--no_sr ' if no_sr else ''} {'' if better_mask else '--no_seg '} {'--subtle_upscaling ' if subtle_upscaling else ''}

  #end processing timer and format the time it took
  end_time = time.time()
  elapsed_time = end_time - start_time
  process_time = int(elapsed_time)
  formatted_process_time = format_time(elapsed_time)

  #rename temp file and move to correct directory
  if os.path.isfile(temp_output):
    if os.path.isfile(output_video):
      os.remove(output_video)
    !cp "{temp_output}" "{output_video}"
    if os.path.isfile(output_video):
      #show output video
      clear_output()
      print(f"{output_filename} successfully lip synced! Find it in the same folder as your input file(s).")
      end_time = time.time()
      elapsed_time = end_time - start_time
      formatted_setup_time = format_time(elapsed_time)
      print(f"Execution time: {formatted_setup_time}")
  if os.path.isfile(temp_output):
    print(f"Loading video preview for {output_videofile}...")
    show_video(temp_output)
  else:
    print(f"Processing failed! :( see line above 👆")
    process_failed = True

  if batch_process == False:
    print("Batch Processing disabled")
    if process_failed:
        sys.exit("Processing failed")
    else:
      break
  elif filenumber == "" and audio_filenumber == "":
    print('Files not set for batch processing')
    break

  #Batch processing
  if filenumber != "": # if video has a filenumber
    match = re.search(r'\d+', filenumber)
    # add 1 to video filenumber
    filenumber = f"{filenumber[:match.start()]}{int(match.group())+1:0{len(match.group())}d}"

  if audio_filenumber != "": # if audio has a filenumber
    match = re.search(r'\d+', audio_filenumber)
    # add 1 to audio filenumber
    audio_filenumber = f"{audio_filenumber[:match.start()]}{int(match.group())+1:0{len(match.group())}d}"

  # construct input_video
  input_video = folder + filenamenonumber + str(filenumber) + file_type
  input_videofile = re.search(r"[^\/]+$", input_video).group()
  # construct input_audio
  input_audio = audio_folder + audio_filenamenonumber + str(audio_filenumber) + audio_file_type
  input_audiofile = re.search(r"[^\/]+$", input_audio).group()

  # now check which input files exist and what to do for each scenario

  # both +1 files exist - continue processing
  if os.path.exists(input_video) and os.path.exists(input_audio):
    continue

  # video +1 only - continue with last audio file
  if os.path.exists(input_video) and input_video != last_input_video:
    if audio_filenumber != "": # if audio has a filenumber
        match = re.search(r'\d+', audio_filenumber)
        # take 1 from audio filenumber
        audio_filenumber = f"{audio_filenumber[:match.start()]}{int(match.group())-1:0{len(match.group())}d}"
    continue

  # audio +1 only - continue with last video file
  if os.path.exists(input_audio) and input_audio != last_input_audio:
    if filenumber != "": # if video has a filenumber
      match = re.search(r'\d+', filenumber)
      # take 1 from video filenumber
      filenumber = f"{filenumber[:match.start()]}{int(match.group())-1:0{len(match.group())}d}"
    continue

  # neither +1 files exist or current files already processed - finish processing
  print("Finished all sequentially numbered files")
  if process_failed:
     sys.exit("Processing failed on at least one video")
  else:
    break

AIPresidentsAliensTrumpLaterLosers successfully lip synced! Find it in the same folder as your input file(s).
Execution time: 1m 0s
Loading video preview for AIPresidentsAliensTrumpLaterLosers_EZWav2Lip_Enhanced_720_nosmooth1.mp4...


Batch Processing disabled
