<a href="https://colab.research.google.com/github/RebortY/yulegeyu/blob/master/AI_Generated_Characters.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# AI Generated Characters for Learning and Wellbeing

Website: https://www.media.mit.edu/projects/ai-generated-characters/overview/

Paper: https://www.nature.com/articles/s42256-021-00417-9

Github: https://github.com/mitmedialab/AI-generated-characters


![](https://drive.google.com/uc?export=view&id=17arRYqt6QyEjkj4-5eDrqRPcteTsbheO)


*This notebook is a combination of previous work on AI generated characters compiled into one easy to use pipeline that include [Siarohin et al.](https://github.com/AliaksandrSiarohin/first-order-model), [Prajwal et al.](https://github.com/Rudrabha/Wav2Lip), and [Corentin](https://github.com/CorentinJ/Real-Time-Voice-Cloning). Please go check out their amazing work.*

**Licensed under the MIT License**


Copyright (c) 2021 MIT Media Lab

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

**HOW TO USE THIS NOTEBOOK:** Press the play button on each cell one at a time from top to bottom.

In [1]:
#@markdown #**Installation of libraries**
# @markdown This cell will take a little while because it has to download several libraries.
%cd "/content"
import requests

print("\nDownloading Packages\n")
# Character Images
!gdown --id "16HzQKA4e3vpLY8Em57WnE8UwIE591aF1" -O "/content/mona_lisa.png" &> /dev/null
!gdown --id "1cgfFgzm4BrqKIkyspGib6u4ty5ReyeM_" -O "/content/einstein.png" &> /dev/null
!gdown --id "10N3e5E0R1aYcLVmE_dmtMCSYVFGQLTeq" -O "/content/lincoln.png" &> /dev/null
!gdown --id "1-BeSNGGjJADs5W-Rn6izAteuVzJcnhW1" -O "/content/nietzsche.png" &> /dev/null
!gdown --id "1zPPUQ7xgbhnpVNl26J1Gl6rXlJ6g0rK7" -O "/content/sokrates.png" &> /dev/null
!gdown --id "1mzzEdXEOohLcpr8L01JzOVbirEMJogni" -O "/content/van_gogh.png" &> /dev/null

# Face Cropping
!wget "https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_alt2.xml" -O "/content/haarcascade_frontalface_alt2.xml" &> /dev/null

# Wav2Lip
!git clone "https://github.com/Rudrabha/Wav2Lip.git"
!wget "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth" -O "Wav2Lip/face_detection/detection/sfd/s3fd.pth" &> /dev/null

try:
  !curl -L -o '/content/Wav2Lip/checkpoints/wav2lip_gan.pth' 'https://drive.google.com/u/0/uc?id=1IKhxXy0mplOpGFWLH9_uUhBoIplao8j0&export=download&confirm=t'
except Exception as e: print(e)
#!gdown --id "1IKhxXy0mplOpGFWLH9_uUhBoIplao8j0" -O "/content/Wav2Lip/checkpoints/wav2lip_gan.pth" &> /dev/null

# First-Order-Model
!git clone "https://github.com/AliaksandrSiarohin/first-order-model"
try:
  !curl -L -o '/content/first-order-model/vox-cpk.pth.tar' 'https://drive.google.com/u/0/uc?id=19d9ZJYAMsNNQZd4AzIWCw4sF1EaNYuJ3&export=download&confirm=t'
except Exception as e: print(e)

# Template Data
#!gdown --id "1Qod7I5hiK1nCPsHBqAdK6hoYZgNzQPHi" -O "driving_video_long.mp4"
!gdown --id "1o2zD5xky8F6wZ21PkeG5KhJOlSdkeEpm" -O "driving_video.mp4" &> /dev/null

# Watermark
url = 'https://raw.githubusercontent.com/mitmedialab/AI-generated-characters/main/gen.png'
r = requests.get(url, allow_redirects=True) 
open('gen.png', 'wb').write(r.content)

# Noise
url = 'https://raw.githubusercontent.com/mitmedialab/AI-generated-characters/main/noise2.jpg'
r = requests.get(url, allow_redirects=True)
open('noise_2.png', 'wb').write(r.content)


print("\nInstalling required libraries\n")
!pip install -r Wav2Lip/requirements.txt -y &> /dev/null
!pip uninstall tensorflow tensorflow-gpu -y &> /dev/null
!pip install ffmpeg -y &> /dev/null
!pip install https://github.com/tugstugi/dl-colab-notebooks/archive/colab_utils.zip &> /dev/null


# General Functions
print("\nLoading Libraries and functions\n")
import sys
import numpy as np
import ipywidgets as widgets
from io import StringIO
from IPython import get_ipython
from IPython.display import display, Audio, clear_output
from dl_colab_notebooks.audio import record_audio, upload_audio
from scipy.io import wavfile

class IpyExit(SystemExit):
    """
    Exit Exception for IPython.
    Exception temporarily redirects stderr to buffer.
    """
    def __init__(self):
        print("Error: Please only select one input. If you will not use text please leave text field empty.")
        sys.stderr = StringIO()

    def __del__(self):
        sys.stderr.close()
        sys.stderr = sys.__stderr__  # restore from backup

from google.colab import files
def getLocalFiles():
  uploaded = files.upload()
  filename = next(iter(uploaded))
  return filename


# First-order-model
import imageio
import cv2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from skimage.transform import resize
from IPython.display import HTML
import warnings
warnings.filterwarnings("ignore")

def _compute_embedding(audio):
    display(Audio(audio, rate=SAMPLE_RATE, autoplay=True))
    global embedding
    embedding = None
    embedding = encoder.embed_utterance(encoder.preprocess_wav(audio, SAMPLE_RATE))

def _record_audio(b):
  clear_output()
  audio = record_audio(record_seconds, sample_rate=SAMPLE_RATE)
  #_compute_embedding(audio)
  display(Audio(audio, rate=SAMPLE_RATE, autoplay=True))
  wavfile.write('driving_audio.wav', SAMPLE_RATE, (32767*audio).astype(np.int16))

def _upload_audio(b):
  clear_output()
  audio = upload_audio(sample_rate=SAMPLE_RATE)
  _compute_embedding(audio)

def trim_img(img_src):
  
  import imutils

  # Read the Input Image
  img = cv2.imread(img_src)
  img = imutils.resize(img, width=400)  

  # Convert into grayscale
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

  # Trim to 400x400
  face_cascade = cv2.CascadeClassifier('/content/haarcascade_frontalface_alt2.xml')
  faces = face_cascade.detectMultiScale(gray, 1.1, 4)
  try:
    for (x, y, w, h) in faces:
      extention = 40
      faces = img[y-extention:y + h+extention, x-extention:x + w + extention]
      cv2.imwrite('/content/img_trimmed.png', faces)
  except:
    print("Error: Face takes too much space on image. Try a different image, or trim it yourself to 400x400.")

  return "/content/img_trimmed.png"


def animate_video(img_filename, vid_filename):
    %cd /content/first-order-model/
    
    from demo import make_animation
    from demo import load_checkpoints
    from skimage import img_as_ubyte

    source_image = imageio.imread(img_filename)
    driving_video = imageio.mimread(vid_filename, fps=30, memtest=False) 

    # Resize image and video to 256x256
    source_image = resize(source_image, (256, 256))[..., :3]
    driving_video = [resize(frame, (256, 256))[..., :3] for frame in driving_video]

    # Load Model
    generator, kp_detector = load_checkpoints(config_path='config/vox-256.yaml', checkpoint_path='/content/first-order-model/vox-cpk.pth.tar')

    # Make Animation
    predictions = make_animation(source_image, driving_video, generator, kp_detector, relative=True,
                                adapt_movement_scale=False)
    #save resulting video
    imageio.mimsave('/content/vidvid.mp4', [img_as_ubyte(frame) for frame in predictions], fps=30)

    %cd /content


def tracability(video_filename):
  import moviepy.editor as mp

  video = mp.VideoFileClip(video_filename)

  machine = (mp.ImageClip('/content/noise_2.png')
    .set_duration(video.duration)
    .set_opacity(.05)
    .resize(height = 552) #
    .margin(right = 0, top = 0, opacity = 1.0)
    .set_pos(("center", "center")))
  
  human = (mp.ImageClip('/content/gen.png')
   .set_duration(video.duration)
   .resize(height = 50) #
   .margin(right = 0, top = 0, opacity = 1.0)
   .set_pos(("left", "bottom")))

  final = mp.CompositeVideoClip([video, machine, human])
  final.write_videofile("/content/marked.mp4")

print("\nSuccesfully Finished Installing Libraries\n")

/content

Downloading Packages

Cloning into 'Wav2Lip'...
remote: Enumerating objects: 360, done.[K
remote: Total 360 (delta 0), reused 0 (delta 0), pack-reused 360[K
Receiving objects: 100% (360/360), 522.30 KiB | 4.50 MiB/s, done.
Resolving deltas: 100% (198/198), done.
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100  415M  100  415M    0     0  38.5M      0  0:00:10  0:00:10 --:--:-- 52.8M
Cloning into 'first-order-model'...
remote: Enumerating objects: 337, done.[K
remote: Counting objects: 100% (31/31), done.[K
remote: Compressing objects: 100% (25/25), done.[K
remote: Total 337 (delta 15), reused 18 (delta 6), pack-reused 306[K
Receiving objects: 100% (337/337), 72.16 MiB | 30.52 MiB/s, done.
Resolving delt

In [2]:
#@markdown #**Choose Character**

# TO DO: Show Images of Characters one can choose.

# @markdown Choose the character which you want to animate. If you have any requests for new characters to animate, please let us know here: patpat@mit.edu
character = 'Einstein' #@param ["Van Gogh", "Mona Lisa", "Einstein", "Lincoln", "Nietzsche", "Sokrates", "Upload Your Own"]
print(f"{character} selected.")

if character == "Upload Your Own":
  character_img = "/content/"+getLocalFiles()
  if cv2.imread(character_img).shape[0] != cv2.imread(character_img).shape[1]:
    print("Cropping uploaded image")
    character_img = trim_img(character_img)

else:
  character = character.lower().replace(" ", "_") # make lowercase and remove spacing
  character_img = "/content/"+character+".png"

Einstein selected.


In [3]:
#@markdown #**Choose Inputs**
# @markdown Please select one of the available inputs. Leave the text field empty if you want to animate the character with audio or video.


#Welcome. Today we will learn about the Theory of Relativity. I first came up with this method when...
text = "" #@param {type:"string"}
#@markdown --
audio = True #@param {type:"boolean"}
#@markdown * Either record audio from microphone or upload audio from file (.mp3 or .wav) 
record_or_upload = "Record" #@param ["Record", "Upload (.mp3 or .wav)"]
record_seconds =  5#@param {type:"number", min:1, max:10, step:1}
#@markdown --
video = False #@param {type:"boolean"}

if text != "" and audio or text !="" and video or audio and video:
  raise IpyExit


if video:
  print("Please upload the video you wish to drive the animation with:\n")
  video_driver = "/content/"+getLocalFiles()

  #to do: make sure only supported video formats can be uploaded

elif audio:

  SAMPLE_RATE = 22050
  embedding = None

  if record_or_upload == "Record":
    print("Please record the audio you wish to drive the animation with. Remember to enable your microphone in Chrome:\n")
    button = widgets.Button(description="Record Your Voice")
    button.on_click(_record_audio) 
    display(button)
    audio_driver = "/content/driving_audio.wav"
  else:
    print("Please upload the audio you wish to drive the animation with:\n")
    audio_driver = "/content/"+getLocalFiles()
  video_driver = "/content/driving_video.mp4"

elif text:
  print("Text is currently unsupported but will be soon.. Please use either audio or video inputs for now.")

Please record the audio you wish to drive the animation with. Remember to enable your microphone in Chrome:



Button(description='Record Your Voice', style=ButtonStyle())

In [4]:
from numpy.core import memmap
import shutil


#@markdown #**Generate Character**
#@markdown This is likely to take a while depending on the length of your driving video. First we generate the movements of the character using the first-order-model approach, and then, if audio or text was given as input, we either synthesize audio from or use the audio provided to make the character lipsymc it using Wav2Lip.

#@markdown **If you have any errors, please make sure that you have run all the preceding cells and uploaded/recorded all the necessary inputs.**

!cd /content/
print("Animating Character with Driving Video: This might take a few minutes..")
animate_video(character_img, video_driver) # variables are only for showing HTML video
final_video_driver = "/content/vidvid.mp4"

if text != "":
  print("Generating speech from text")
  # generate audio
  #audio_driver = _GENERATED AUDIO.wav_
  audio = True

if audio:
  print("Lipsyncing Character with Audio")
  # Using Wav2Lip
  %cd /content/Wav2Lip
  !python inference.py --checkpoint_path "/content/Wav2Lip/checkpoints/wav2lip_gan.pth" --face $final_video_driver --audio $audio_driver &> /dev/null
  %cd /content
  final_video_driver = "/content/Wav2Lip/results/result_voice.mp4"
else:
  audio_driver = "/content/driver.wav"
  !ffmpeg -i $video_driver -q:a 0 -map 0:a "/content/driver.wav" -y &> /dev/null
  !ffmpeg -i $final_video_driver -i $audio_driver -c:v copy -c:a aac merged.mp4 -y &> /dev/null
  final_video_driver = "merged.mp4"

# Traceability
tracability(final_video_driver)
final_video_driver = "marked.mp4"
!ffmpeg -i $final_video_driver -i $audio_driver final_generated.mp4 -y &> /dev/null
!ffmpeg -i $final_video_driver ai_generated_character.mp4 -y &> /dev/null
final_video_driver = "ai_generated_character.mp4"

# display result
from IPython.display import HTML
from base64 import b64encode
mp4 = open("/content/final_generated.mp4",'rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=400 controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)

Animating Character with Driving Video: This might take a few minutes..
/content/first-order-model


ModuleNotFoundError: ignored

In [None]:
#@markdown ### **Download the Generated Video**
#@markdown Run this cell to download your generated video. If you wish to change your AI-generated character or the input, please go back to that cell and repeat the same process. You can skip the **Installation of libraries** section.

from google.colab import files
files.download(final_video_driver)