# Demo for paper "First Order Motion Model for Image Animation"

### Load imports and setup
**It's important that you set `USE_CPU` parameter to False if you don't have CUDA compatible graphics card**

In [1]:
import os.path
import cv2
import torch
import zipfile
import ffmpeg
import imageio
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import gdown
import warnings
warnings.filterwarnings("ignore")

from skimage.transform import resize
from IPython.display import HTML
from animate import normalize_kp
from crop_video import process_video
from demo import load_checkpoints
from demo import make_animation
from skimage import img_as_ubyte

x = 143
y = 87
w = 322
h = 322 

USE_CPU = False

if not os.path.exists('temp'):
    os.mkdir('temp')
    
cv2.__version__

'4.2.0'

### Choose source image
**Please choose source image from provided or add new image to `source_image_inputs` folder. This should be a picture of the area of the head, preferably with square proportions**

In [2]:
!ls source_image_inputs

Monalisa.png		 chucky.png    mike_tyson.jpg	  rowan.jpg
Neil_deGrasse_Tyson.png  draco.jpg     orlando_bloom.jpg  the_rock.jpeg
ben_stiller.jpeg	 feynman.jpeg  rowan.jpeg


In [3]:
source_image = imageio.imread('source_image_inputs/chucky.png')

### Create a model and load checkpoints
**You need checkpoints from a trained model. Code below will download them if they are not in `/extract` folder. If you need to download them manually, they can be found under following link: [google-drive](https://drive.google.com/open?id=1PyQJmkdCsAkOYwUyaj_l-l0as-iLDgeH) , [google-drive-2](https://drive.google.com/uc?id=1wCzJP1XJNB04vEORZvPjNz6drkXm5AUK) or [yandex-disk](https://yadi.sk/d/lEw8uRm140L_eQ) and need to be extracted in `/extract` folder.**

In [4]:
model_checkpoint_exist = os.path.exists('extract/vox-cpk.pth.tar')
if not model_checkpoint_exist:
    url = 'https://drive.google.com/uc?id=1wCzJP1XJNB04vEORZvPjNz6drkXm5AUK'
    output = 'temp/checkpoints.zip'
    gdown.download(url, output, quiet=False)
    with zipfile.ZipFile(output, 'r') as zip_ref:
        zip_ref.extractall('extract')


In [14]:
generator, kp_detector = load_checkpoints(config_path='config/vox-256.yaml', 
                            checkpoint_path='extract/vox-cpk.pth.tar', cpu=USE_CPU)

### Record your source video
**First you need to prepare source video or you can jump directly to real time image animation in section [Real time image animation section](#Real-time-image-animation).
The quality of the image animation is usually better when using video created in beforehand. 
You can also jump to [Resizing source video and image section](#Resizing-source-video-and-image) and use my source video from repository (`temp/test_video_cropped.avi`)**

In [5]:
def save_video(cap,saving_file_name,fps=33.0):
    while cap.isOpened():
        ret, frame = cap.read()
        if ret:
            i_width,i_height = frame.shape[1],frame.shape[0]
            break

    process = (
    ffmpeg
        .input('pipe:',format='rawvideo', pix_fmt='rgb24',s='{}x{}'.format(i_width,i_height))
        .output(saving_file_name,pix_fmt='yuv420p',vcodec='libx264',r=fps,crf=37)
        .overwrite_output()
        .run_async(pipe_stdin=True)
    )

    return process

In [6]:
cap = cv2.VideoCapture(0)   
cap.set(3,1920)
cap.set(4,1080)
saved_video_file_name = 'temp/source_video.avi'
process = save_video(cap,saved_video_file_name)
    
while(cap.isOpened()): 
    ret, frame = cap.read()  
    if ret==True:
        frame_flipped = cv2.flip(frame,1)
        process.stdin.write(cv2.cvtColor(frame_flipped, cv2.COLOR_BGR2RGB)
                            .astype(np.uint8).tobytes())
        cv2.imshow('frame', frame_flipped) 
        if cv2.waitKey(1) & 0xFF == ord('q'):
            process.stdin.close()
            process.wait()
            cap.release()
            cv2.destroyAllWindows()
            break
    else:
            process.stdin.close()
            process.wait()
            cap.release()
            cv2.destroyAllWindows()
            break
        

### Cropp and scale video
**You video needs to be prepared. Code below will search for best crop options. This is made using [Face allignment library](https://github.com/1adrianb/face-alignment) and some models will be downloaded automatically**

In [7]:
class CropVideoArgs:
    def __init__(self, video_input):
        self.inp = video_input
        self.image_shape = (256,256)
        self.increase = 0.1
        self.iou_with_initial = 0.25
        self.min_frames = 150
        self.cpu = USE_CPU
args = CropVideoArgs('temp/source_video.avi')

commands = process_video(args)

235it [17:58,  4.59s/it]


**Now let's print proposed settings for cropping. Warning - if there is no proposed settings (empty array) you can try to record antoher video or use some hand selected parameters - provide box coordinates where your face is visible inside video**

In [8]:
print(commands)

['ffmpeg -i temp/source_video.avi -ss 0.0 -t 7.090909090909091 -filter:v "crop=293:294:147:120, scale=256:256" crop.mp4']


In [9]:
(ffmpeg
.input(saved_video_file_name)
.filter('crop', out_w='293', out_h='294', x='147', y='120')
.filter('scale', size='256:256', force_original_aspect_ratio='disable')
.output("temp/source_video_cropped.avi")
.overwrite_output()
.run()
)

(None, None)

### Resizing source video and image 
**If you don't record video then `temp/test_video_cropped.avi` will be used...so you will use my ugly face :)**

In [10]:
recorded_video_exist = os.path.exists('temp/source_video_cropped.avi')
if recorded_video_exist:
    driving_video = imageio.mimread('temp/source_video_cropped.avi')
else:
    driving_video = imageio.mimread('temp/test_video_cropped.avi')

**Resize both video and image to 256x256 size**

In [11]:
source_image = resize(source_image, (256, 256))[..., :3]
driving_video = [resize(frame, (256, 256))[..., :3] for frame in driving_video]

def display(source, driving, generated=None):
    fig = plt.figure(figsize=(8 + 4 * (generated is not None), 6))

    ims = []
    for i in range(len(driving)):
        cols = [source]
        cols.append(driving[i])
        if generated is not None:
            cols.append(generated[i])
        im = plt.imshow(np.concatenate(cols, axis=1), animated=True)
        plt.axis('off')
        ims.append([im])

    ani = animation.ArtistAnimation(fig, ims, interval=50, repeat_delay=1000)
    plt.close()
    return ani



In [12]:
HTML(display(source_image, driving_video).to_html5_video())

### Perform image animation
**Let's create source imgae animation using video prepared in previous steps. There are three cells and each of them with diffeerent settings. This video will be available in `temp` directory**

**Parameters: `relative=True`, `adapt_movement_scale=False`. In this cell we will use relative keypoint displacement to animate the objects.**

In [15]:
predictions = make_animation(source_image, driving_video, generator, kp_detector, relative=True, cpu=USE_CPU)
imageio.mimsave('temp/animation_res1.mp4', [img_as_ubyte(frame) for frame in predictions])

animation_res1 = display(source_image, driving_video, predictions)
HTML(animation_res1.to_html5_video())

100%|██████████| 238/238 [20:01<00:00,  5.05s/it]


 **Parameters: `relative=False`, `adapt_movement_scale=True`. We can use absolute coordinates instead of relative,  but in this way all the object proporions will be inherited from the driving video. For example Putin haircut will be extended to match Trump haircut.**

In [16]:
predictions = make_animation(source_image, driving_video, generator, kp_detector, relative=False,
                             adapt_movement_scale=True, cpu=USE_CPU)
imageio.mimsave('temp/animation_res2.mp4', [img_as_ubyte(frame) for frame in predictions])

animation_res2 = display(source_image, driving_video, predictions)
HTML(display(source_image, driving_video, predictions).to_html5_video())

100%|██████████| 226/226 [16:35<00:00,  4.41s/it]


**Parameters: `relative=True`, `adapt_movement_scale=True`.**

In [17]:
predictions = make_animation(source_image, driving_video, generator, kp_detector, relative=True,
                             adapt_movement_scale=True, cpu=USE_CPU)
imageio.mimsave('temp/animation_res3.mp4', [img_as_ubyte(frame) for frame in predictions])

animation_res3 = display(source_image, driving_video, predictions)
HTML(display(source_image, driving_video, predictions).to_html5_video())

100%|██████████| 226/226 [19:52<00:00,  5.28s/it]


### Save animation result with source video and possibly convert to GIF
**Please choose animation (`animation_res1`,`animation_res2` or `animation_res3`) which in your opinion perform best and set `im_ani` variable.**

In [18]:
im_ani = animation_res1
video_footer = 'Michal Kostewicz - Real time image animation'

In [19]:
Writer = animation.writers['ffmpeg']
writer = Writer(fps=15, metadata=dict(artist=video_footer), bitrate=3600)
im_ani.save('temp/image_driver_animation.avi', writer=writer)

**Convert to GIF file**

In [20]:
(ffmpeg
.input('temp/image_driver_animation.avi')
.output('temp/image_driver_animation.gif')
.overwrite_output()
.run()
)

(None, None)

### Real time image animation
Below is part of code where you can perform image animation in real time using webcam as source of video. Video will be recorded in `temp` directory.

In [24]:
relative=True
adapt_movement_scale=True

fourcc = cv2.VideoWriter_fourcc(*'MJPG')
out1 = cv2.VideoWriter('temp/animation_from_webcam.avi', fourcc, 12, (256*3 , 256), True)
cv2_source = cv2.cvtColor(source_image.astype('float32'),cv2.COLOR_BGR2RGB)
cap = cv2.VideoCapture(0)

count = 0
while True:

    ret, frame = cap.read()
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
    with torch.no_grad() :
        predictions = []
        source = torch.tensor(source_image[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2)
        if not USE_CPU:
            source = source.cuda()
        kp_source = kp_detector(source)
        ims = [source_image]
        frame = cv2.flip(frame,1)
        frame = frame[y:y+h,x:x+w]
        frame1 = resize(frame,(256,256))[..., :3]
        
        if count == 0:
            source_image1 = frame1
            source1 = torch.tensor(source_image1[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2)
            kp_driving_initial = kp_detector(source1)
        frame_test = torch.tensor(frame1[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2)

        driving_frame = frame_test
        if not USE_CPU:
            driving_frame = driving_frame.cuda()
        kp_driving = kp_detector(driving_frame)
        kp_norm = normalize_kp(kp_source=kp_source,
                               kp_driving=kp_driving,
                               kp_driving_initial=kp_driving_initial, 
                               use_relative_movement=relative,
                               use_relative_jacobian=relative, 
                               adapt_movement_scale=adapt_movement_scale)
        out = generator(source, kp_source=kp_source, kp_driving=kp_norm)
        predictions.append(np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0])
        im = np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0]
        im = cv2.cvtColor(im,cv2.COLOR_RGB2BGR)
        joinedFrame = np.concatenate((cv2_source,im,frame1),axis=1)
        
        joinedFrameToSave = np.uint8(255 * joinedFrame)
        out1.write(joinedFrameToSave)
        
        cv2.imshow('Test',joinedFrame)    
            
        count += 1


cap.release()
cv2.destroyAllWindows()

**Convert to GIF file**

In [43]:
(ffmpeg
.input('temp/animation_from_webcam.avi')
.output('temp/animation_from_webcam.gif')
.overwrite_output()
.run()
)

(None, None)