Please follow the procedure in ```./assets/README-talking-head.md``` to create talking head video.


In [27]:
## Enviroment Setup
import os, sys
os.environ['CUDA_VISIBLE_DEVICES'] = '0' # Set the visible CUDA, here we use the second GPU
WORKING_DIR = '/home/peizhi/Documents/gaussian-dejavu/'
os.chdir(WORKING_DIR) # change the working directory to the project's absolute path
print("Current Working Directory: ", os.getcwd())
sys.path.append(WORKING_DIR)
sys.path.append('./models')
sys.path.append('./networks')
sys.path.append('./utils')
sys.path.append('./utils/flame_lib/')
sys.path.append('./utils/diff-gaussian-rasterization')
sys.path.append('./utils/gaussian_renderer')
sys.path.append('./utils/scene')
sys.path.append('./utils/arguments')
sys.path.append('./utils/simple-knn')

import matplotlib.pyplot as plt
from time import time
from tqdm import tqdm

import numpy as np
import cv2
import torch

from dejavu import GaussianDejavu

def min_max_normalize(image):
    norm_image = (image - np.min(image)) / (np.max(image) - np.min(image))
    return norm_image

def blur_head_boundary(rendered_img, blur_kernel_size=25, erode_kernel_size=20, sigma=5):
    # rendered_img: RGB numpy array, float32
    # Ensure image is in 0-255 range if given in 0-1
    rendered_img = (np.clip(rendered_img, 0, 1.0) * 255).astype(np.uint8)
    gray = cv2.cvtColor(rendered_img, cv2.COLOR_BGR2GRAY)
    _, binary_mask = cv2.threshold(gray, 254, 255, cv2.THRESH_BINARY_INV)
    eroded_mask = 255 - cv2.erode(binary_mask, np.ones((erode_kernel_size, erode_kernel_size), np.uint8), iterations=1)
    blurred_boundary = cv2.GaussianBlur(rendered_img, (blur_kernel_size, blur_kernel_size), sigmaX=sigma, sigmaY=sigma)
    alpha = cv2.GaussianBlur(eroded_mask.astype(float) / 255.0, (blur_kernel_size, blur_kernel_size), sigmaX=sigma*2)
    blurred_img = (alpha[..., None] * blurred_boundary + (1 - alpha[..., None]) * rendered_img)
    return blurred_img / 255.


dejavu = GaussianDejavu(network_weights='./models/dejavu_network.pt')
device = dejavu.device



Current Working Directory:  /home/peizhi/Documents/gaussian-dejavu
creating the FLAME Decoder


  self.register_buffer('dynamic_lmk_faces_idx', torch.tensor(lmk_embeddings['dynamic_lmk_faces_idx'], dtype=torch.long))
  self.register_buffer('dynamic_lmk_bary_coords', torch.tensor(lmk_embeddings['dynamic_lmk_bary_coords'], dtype=self.dtype))


Framework v1.0 initialized.
Number of Gaussians:  74083
model loaded from:  ./models/dejavu_network.pt
Gaussian DejaVu Framework Created.
Head avatar parameters loaded


In [34]:
## load head avatar
# dejavu.load_head_avatar(save_path='./saved_avatars', avatar_name='peizhi-uv320-1.1')
# dejavu.load_head_avatar(save_path='./saved_avatars', avatar_name='peizhi-cartoon-uv320-v1.1')
# dejavu.load_head_avatar(save_path='./saved_avatars', avatar_name='imavatar-subject1-uv320-1.1')
dejavu.load_head_avatar(save_path='./saved_avatars', avatar_name='imavatar-subject2-uv320-1.1')


# the sequence of driving parameters is from Unitalker
loaded_sequences = np.load('./assets/can_you_feel_the_love_tonight_clip.npy', allow_pickle=True) 

video_export_path = '/home/peizhi/Desktop/exported_video.mp4'


Head avatar parameters loaded


### Render Video

In [35]:

## Render frames
frames = []
for frame_id in tqdm(range(len(loaded_sequences['exp']))):
    
    # prepare driving parameters
    exp = loaded_sequences['exp'][frame_id:frame_id+1, :50] * 1.2
    exp = np.clip(exp, -1.5, 1.5)
    jaw = loaded_sequences['jaw'][frame_id:frame_id+1, :]
    pose = np.zeros([1,6], dtype=np.float32)
    pose[:,3:] = jaw * 3.0
    pose[:,3] = np.clip(pose[:,3] + 0.2, 0.015, 0.3) # correct the jaw up/down to valid range
    
    # render via dejavu
    rendered = dejavu.drive_head_avatar(exp=exp, pose=pose, eye_pose=None, cam_pose=None, return_all=False)
    rendered = rendered[0].permute(1,2,0).cpu().numpy()
    rendered = blur_head_boundary(rendered_img=rendered)
    frames.append(rendered)


## Save video
height, width, _ = frames[0].shape
fps = loaded_sequences['fps']  # should match that used in Unitalker
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # or 'XVID' for .avi
out = cv2.VideoWriter(video_export_path, fourcc, fps, (width, height))
for frame in frames:
    # Ensure uint8
    if frame.dtype != np.uint8:
        frame = (np.clip(frame, 0,1.0) * 255).clip(0,255).astype('uint8')
    # Convert RGB to BGR for OpenCV
    frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
    out.write(frame_bgr)
out.release()
print('Video Exported!')


  world_view_transform = torch.tensor(cam.world_view_transform).float().to(device)
  full_proj_transform = torch.tensor(cam.full_proj_transform).float().to(device)
100%|██████████| 866/866 [00:37<00:00, 23.02it/s]


Video Exported!


### Add Original Audio to the Video

In [36]:
import subprocess

# need to edit the paths before running!!
video_path = '/home/peizhi/Desktop/exported_video.mp4'                     # rendered video file
audio_path = '/home/peizhi/Desktop/can_you_feel_the_love_tonight_clip.wav' # original audio file
output_path = '/home/peizhi/Desktop/video_with_audio.mp4'                  # final video save path

# Mux audio with video
subprocess.run([
    'ffmpeg', '-y', '-i', video_path, '-i', audio_path,
    '-c:v', 'copy', '-c:a', 'aac', '-shortest',
    output_path
])

print(f"\nOutput saved to: {output_path}")

ffmpeg version 6.1.1-3ubuntu5 Copyright (c) 2000-2023 the FFmpeg developers
  built with gcc 13 (Ubuntu 13.2.0-23ubuntu3)
  configuration: --prefix=/usr --extra-version=3ubuntu5 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --disable-omx --enable-gnutls --enable-libaom --enable-libass --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libglslang --enable-libgme --enable-libgsm --enable-libharfbuzz --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzimg --ena


Output saved to: /home/peizhi/Desktop/video_with_audio.mp4


[out#0/mp4 @ 0x64e88c4360c0] video:902kB audio:245kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 2.226338%
size=    1172kB time=00:00:28.83 bitrate= 333.0kbits/s speed= 116x    
[aac @ 0x64e88c461bc0] Qavg: 170.770
