# Animation example

In [1]:
#@title Mount Google Drive
try:
    from google.colab import drive
    drive.mount('/content/gdrive')
    outputs_path = "/content/gdrive/MyDrive/AI/StabilityAnimations"
    !mkdir -p $outputs_path
except:
    outputs_path = "."
print(f"Animations will be saved to {outputs_path}")

In [2]:
#@title Connect to the Stability API
import grpc
import os
from pathlib import Path
import shutil
import sys


path = Path('stability-sdk')
if path.exists():
    shutil.rmtree(path)
    !pip uninstall -y stability-sdk


!git clone -b anima --recurse-submodules https://github.com/Stability-AI/stability-sdk
!touch ./stability-sdk/src/stability_sdk/interfaces/__init__.py
!pip install ./stability-sdk


from stability_sdk import client
from stability_sdk.utils import (
    color_match_from_string,
    sampler_from_string,
    key_frame_inbetweens,
    key_frame_parse,
    guidance_from_string,
    #curve_to_series,
    image_mix,
    image_to_jpg_bytes,
    image_to_png_bytes,
    image_to_prompt,
    image_xform,
    warp2d_op,
    warp3d_op,
)


from stability_sdk.client import generation, generation_grpc # not a huge fan of this but at least it works


# GRPC endpoint and engines
GRPC_HOST = "" #@param {type:"string"}
API_KEY = "" #@param {type:"string"}
GENERATE_ENGINE_ID = 'stable-diffusion-v1-5'
INPAINT_ENGINE_ID = 'stable-diffusion-v1-5'
TRANSFORM_ENGINE_ID = 'transform-server-v1'


# Connect to Stability API
stub = client.open_channel(GRPC_HOST, api_key=API_KEY)


In [3]:
#@title Code definitions

import bisect
import cv2
import datetime
import json
import logging
import numpy as np
import os
import pandas as pd
import pathlib
import random
import re
import subprocess
import sys

from base64 import b64encode
from collections import OrderedDict
from IPython import display
from PIL import Image
from tqdm import tqdm
from types import SimpleNamespace
from typing import List, Tuple

from stability_sdk.client import (
    image_gen,
    image_inpaint,
)


In [4]:
#@title Settings

def Args():

    #@markdown ####**Settings:**
    W = 512 #@param
    H = 512 #@param
    W, H = map(lambda x: x - x % 64, (W, H))  # resize to integer multiple of 64
    sampler = 'K_euler_ancestral' #@param ["DDIM", "PLMS", "K_euler", "K_euler_ancestral", "K_heun", "K_dpm_2", "K_dpm_2_ancestral", "K_lms"]
    seed = -1 #@param
    cfg_scale = 7 #@param {type:"number"}
    clip_guidance = 'FastBlue' #@param ["None", "Simple", "FastBlue", "FastGreen"]

    #@markdown ####**Animation Settings:**
    animation_mode = '3D' #@param ['2D', '3D', 'Video Input'] {type:'string'}
    max_frames = 60 #@param {type:"number"}
    border = 'replicate' #@param ['reflect', 'replicate', 'wrap', 'zero'] {type:'string'}
    inpaint_border = False #@param {type:"boolean"}
    interpolate_prompts = False #@param {type:"boolean"}
    locked_seed = False #@param {type:"boolean"}

    #@markdown ####**Key framed value curves:**
    angle = "0:(1)" #@param {type:"string"}
    zoom = "0:(1.05)" #@param {type:"string"}
    translation_x = "0:(0)" #@param {type:"string"}
    translation_y = "0:(0)" #@param {type:"string"}
    translation_z = "0:(5)" #@param {type:"string"}
    rotation_x = "0:(0)" #@param {type:"string"}
    rotation_y = "0:(0)" #@param {type:"string"}
    rotation_z = "0:(1)" #@param {type:"string"}
    brightness_curve = "0: (1.0)" #@param {type:"string"}
    contrast_curve = "0: (1.0)" #@param {type:"string"}
    noise_curve = "0:(0.0)" # likely to be removed, still hidden here for potential experiments
    noise_scale_curve = "0:(1.02)" #@param {type:"string"}
    steps_curve = "0:(50)" #@param {type:"string"}
    strength_curve = "0:(0.65)" #@param {type:"string"}

    #@markdown ####**Coherence:**
    color_coherence = 'LAB' #@param ['None', 'HSV', 'LAB', 'RGB'] {type:'string'}
    diffusion_cadence_curve = "0:(4)" #@param {type:"string"}

    #@markdown ####**3D Depth Warping:**
    #use_depth_warping = True #@param {type:"boolean"}
    midas_weight = 0.3 #@param {type:"number"}
    near_plane = 200
    far_plane = 10000
    fov_curve = "0:(25)" #@param {type:"string"}
    save_depth_maps = False #@param {type:"boolean"}

    #@markdown ####**Video Input:**
    video_init_path = '/content/video_in.mp4' #@param {type:"string"}
    extract_nth_frame = 4 #@param {type:"number"}
    video_mix_in_curve = "0:(0.02)" #@param {type:"string"}
    video_flow_warp = True #@param {type:"boolean"}

    return locals()


### Prompts

In [5]:
animation_prompts = {
    0: "a painting of a delicious cheeseburger by Tyler Edlin",
    24: "a painting of the the answer to life the universe and everything by Tyler Edlin",
}

negative_prompt = ""
negative_prompt_weight = -1.0

#####################


def display_frame(image: np.ndarray):
    display.clear_output(wait=True)
    display.display(Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)))

def get_animation_prompts_weights(frame_idx: int, key_frame_values: List[int], interp: bool) -> Tuple[List[str], List[float]]:
    idx = bisect.bisect_right(key_frame_values, frame_idx)
    prev, next = idx - 1, idx
    if not interp:
        return [animation_prompts[key_frame_values[min(len(key_frame_values)-1, prev)]]], [1.0]
    elif next == len(key_frame_values):
        return [animation_prompts[key_frame_values[-1]]], [1.0]
    else:
        tween = (frame_idx - key_frame_values[prev]) / (key_frame_values[next] - key_frame_values[prev])
        return [animation_prompts[key_frame_values[prev]], animation_prompts[key_frame_values[next]]], [1.0 - tween, tween]


In [6]:
#@title Render the animation

class Animator:
    def __init__(self, args, out_dir):
        self.args = args
        self.out_dir = out_dir
        self.save_settings()
        self.setup_animation()

    def save_settings(self):
            # save settings for the animation
            settings_filename = os.path.join(out_dir, f"{timestring}_settings.txt")
            with open(settings_filename, "w+", encoding="utf-8") as f:
                save_dict = OrderedDict(vars(args))
                for k in ['angle', 'zoom', 'translation_x', 'translation_y', 'translation_z', 'rotation_x', 'rotation_y', 'rotation_z']:
                    save_dict.move_to_end(k, last=True)
                save_dict['animation_prompts'] = animation_prompts
                save_dict['negative_prompt'] = negative_prompt
                save_dict['negative_prompt_weight'] = negative_prompt_weight
                json.dump(save_dict, f, ensure_ascii=False, indent=4)

    def setup_animation(self):
        args = self.args
        out_dir = self.out_dir

        # change request for random seed into explicit value so it is saved to settings
        if args.seed <= 0:
            args.seed = random.randint(0, 2**32 - 1)

        def curve_to_series(curve: str) -> List[float]:
            return key_frame_inbetweens(key_frame_parse(curve), args.max_frames)    

        self.frame_args = SimpleNamespace(**dict(
            angle_series = curve_to_series(args.angle)
            ,zoom_series = curve_to_series(args.zoom)
            ,translation_x_series = curve_to_series(args.translation_x)
            ,translation_y_series = curve_to_series(args.translation_y)
            ,translation_z_series = curve_to_series(args.translation_z)
            ,rotation_x_series = curve_to_series(args.rotation_x)
            ,rotation_y_series = curve_to_series(args.rotation_y)
            ,rotation_z_series = curve_to_series(args.rotation_z)
            ,brightness_series = curve_to_series(args.brightness_curve)
            ,contrast_series = curve_to_series(args.contrast_curve)
            ,noise_series = curve_to_series(args.noise_curve)
            ,noise_scale_series = curve_to_series(args.noise_scale_curve)
            ,steps_series = curve_to_series(args.steps_curve)
            ,strength_series = curve_to_series(args.strength_curve)
            ,diffusion_cadence_series = curve_to_series(args.diffusion_cadence_curve)
            ,fov_series = curve_to_series(args.fov_curve)
            ,video_mix_in_series = curve_to_series(args.video_mix_in_curve)
        ))

        # prepare sorted list of key frames
        key_frame_values = sorted(list(animation_prompts.keys()))
        if key_frame_values[0] != 0:
            raise ValueError("First keyframe must be 0")
        if len(key_frame_values) != len(set(key_frame_values)):
            raise ValueError("Duplicate keyframes are not allowed!")
        self.keyframe_values = key_frame_values

        # diffusion performed every N frames. two prior diffused frames
        # are transformed and blended between to produce each output frame
        #diffusion_cadence_ofs = 0
        prior_frames = []

        # load input video
        video_in = args.video_init_path if args.animation_mode == 'Video Input' else None
        video_reader = None if video_in is None else cv2.VideoCapture(video_in)
        #video_extract_nth = args.extract_nth_frame
        video_prev_frame = None
        if video_reader is not None:
            success, image = video_reader.read()
            if not success:
                raise Exception(f"Failed to read first frame from {video_in}")
            video_prev_frame = cv2.resize(image, (args.W, args.H), interpolation=cv2.INTER_LANCZOS4)
            prior_frames = [video_prev_frame, video_prev_frame]
        
        self.prior_frames = prior_frames
        self.video_reader = video_reader
        self.video_prev_frame = video_prev_frame

    def build_prior_frame_transforms(self, prior_frames, frame_idx, args, prompts, weights, steps, seed, color_match_image):
        video_extract_nth = args.extract_nth_frame
        video_reader = self.video_reader

        ops = []
        if args.save_depth_maps or args.animation_mode == '3D':
            ops.append(generation.TransformOperation(                    
                depth_calc=generation.TransformDepthCalc(
                    blend_weight=args.midas_weight,
                    export=args.save_depth_maps
                )
            ))
        if args.animation_mode == '2D':
            ops.append(warp2d_op(
                self.frame_args.translation_x_series[frame_idx], 
                self.frame_args.translation_y_series[frame_idx], 
                self.frame_args.angle_series[frame_idx], 
                self.frame_args.zoom_series[frame_idx], 
                args.border
            ))
        elif args.animation_mode == '3D':
            ops.append(warp3d_op(
                self.frame_args.translation_x_series[frame_idx], 
                self.frame_args.translation_y_series[frame_idx], 
                self.frame_args.translation_z_series[frame_idx], 
                self.frame_args.rotation_x_series[frame_idx], 
                self.frame_args.rotation_y_series[frame_idx], 
                self.frame_args.rotation_z_series[frame_idx], 
                args.near_plane, args.far_plane, 
                self.frame_args.fov_series[frame_idx], args.border
            ))
        elif args.animation_mode == 'Video Input':
            for i in range(video_extract_nth):
                success, video_next_frame = video_reader.read()
            if success:
                video_next_frame = cv2.resize(video_next_frame, (args.W, args.H), interpolation=cv2.INTER_LANCZOS4)
                if args.video_flow_warp:
                    ops.append(generation.TransformOperation(
                        warp_flow=generation.TransformWarpFlow(
                            prev_frame=generation.Artifact(type=generation.ARTIFACT_IMAGE, binary=image_to_jpg_bytes(video_prev_frame)),
                            next_frame=generation.Artifact(type=generation.ARTIFACT_IMAGE, binary=image_to_jpg_bytes(video_next_frame)),
                        )
                    ))
                video_prev_frame = video_next_frame
                color_match_image = video_next_frame
        if len(ops):
            prior_frames, mask = image_xform(stub, prior_frames, ops, TRANSFORM_ENGINE_ID)
            inpaint_mask = mask if args.inpaint_border else None

            depth_map = prior_frames.pop(0) if len(prior_frames) == 3 else None
            if depth_map is not None and args.save_depth_maps:
                cv2.imwrite(os.path.join(out_dir, f"depth_{frame_idx:05d}.png"), depth_map)

            if inpaint_mask is not None:
                for i in range(len(prior_frames)):
                    prior_frames[i] = image_inpaint(stub, prior_frames[i], inpaint_mask, prompts, weights, steps//2, seed, args.cfg_scale)
                inpaint_mask = None
        return ops, prior_frames, mask, inpaint_mask, color_match_image

    def render_animation(self, args=None, out_dir=None):


        if not args:
            args = self.args
        if not out_dir:
            out_dir = self.out_dir
        key_frame_values = self.keyframe_values
        video_extract_nth = args.extract_nth_frame
        seed = args.seed
        color_match_image = None # optional target for color matching
        inpaint_mask = None      # optional mask of revealed areas
        diffusion_cadence_ofs = 0 # diffusion performed every N frames.

        video_reader = self.video_reader
        prior_frames = self.prior_frames
        video_prev_frame = self.video_prev_frame
        
        for frame_idx in tqdm(range(args.max_frames)):


            diffusion_cadence = max(1, int(self.frame_args.diffusion_cadence_series[frame_idx]))
            steps = int(self.frame_args.steps_series[frame_idx])

            # fetch set of prompts and weights for this frame
            prompts, weights = get_animation_prompts_weights(frame_idx, key_frame_values, interp=args.interpolate_prompts)
            if len(negative_prompt) and negative_prompt_weight != 0.0:
                prompts.append(negative_prompt)
                weights.append(-abs(negative_prompt_weight))

            """
            # apply transformation to prior frames
            if len(prior_frames):
                ops = []
                if args.save_depth_maps or args.animation_mode == '3D':
                    ops.append(generation.TransformOperation(                    
                        depth_calc=generation.TransformDepthCalc(
                            blend_weight=args.midas_weight,
                            export=args.save_depth_maps
                        )
                    ))
                if args.animation_mode == '2D':
                    ops.append(warp2d_op(
                        self.frame_args.translation_x_series[frame_idx], 
                        self.frame_args.translation_y_series[frame_idx], 
                        self.frame_args.angle_series[frame_idx], 
                        self.frame_args.zoom_series[frame_idx], 
                        args.border
                    ))
                elif args.animation_mode == '3D':
                    ops.append(warp3d_op(
                        self.frame_args.translation_x_series[frame_idx], 
                        self.frame_args.translation_y_series[frame_idx], 
                        self.frame_args.translation_z_series[frame_idx], 
                        self.frame_args.rotation_x_series[frame_idx], 
                        self.frame_args.rotation_y_series[frame_idx], 
                        self.frame_args.rotation_z_series[frame_idx], 
                        args.near_plane, args.far_plane, 
                        self.frame_args.fov_series[frame_idx], args.border
                    ))
                elif args.animation_mode == 'Video Input':
                    for i in range(video_extract_nth):
                        success, video_next_frame = video_reader.read()
                    if success:
                        video_next_frame = cv2.resize(video_next_frame, (args.W, args.H), interpolation=cv2.INTER_LANCZOS4)
                        if args.video_flow_warp:
                            ops.append(generation.TransformOperation(
                                warp_flow=generation.TransformWarpFlow(
                                    prev_frame=generation.Artifact(type=generation.ARTIFACT_IMAGE, binary=image_to_jpg_bytes(video_prev_frame)),
                                    next_frame=generation.Artifact(type=generation.ARTIFACT_IMAGE, binary=image_to_jpg_bytes(video_next_frame)),
                                )
                            ))
                        video_prev_frame = video_next_frame
                        color_match_image = video_next_frame
                if len(ops):
                    prior_frames, mask = image_xform(stub, prior_frames, ops, TRANSFORM_ENGINE_ID)
                    inpaint_mask = mask if args.inpaint_border else None

                    depth_map = prior_frames.pop(0) if len(prior_frames) == 3 else None
                    if depth_map is not None and args.save_depth_maps:
                        cv2.imwrite(os.path.join(out_dir, f"depth_{frame_idx:05d}.png"), depth_map)

                    if inpaint_mask is not None:
                        for i in range(len(prior_frames)):
                            prior_frames[i] = image_inpaint(stub, prior_frames[i], inpaint_mask, prompts, weights, steps//2, seed, args.cfg_scale)
                        inpaint_mask = None
            """
            if len(prior_frames):
                ops, prior_frames, mask, inpaint_mask, color_match_image = self.build_prior_frame_transforms(prior_frames, frame_idx, args, prompts, weights, steps, seed, color_match_image)
            

            # either run diffusion or emit an inbetween frame
            if (frame_idx-diffusion_cadence_ofs) % diffusion_cadence == 0:
                if inpaint_mask is not None:
                    prior_frames[-1] = image_inpaint(stub, prior_frames[-1], inpaint_mask, prompts, weights, steps//2, seed, args.cfg_scale)
                    inpaint_mask = None
                strength = self.frame_args.strength_series[frame_idx]

                # apply additional noising and color matching to previous frame to use as init
                init_image = prior_frames[-1] if len(prior_frames) and strength > 0 else None
                if init_image is not None:
                    noise = self.frame_args.noise_series[frame_idx]
                    brightness = self.frame_args.brightness_series[frame_idx]
                    contrast = self.frame_args.contrast_series[frame_idx]
                    mix_in = self.frame_args.video_mix_in_series[frame_idx]
                    ops = []
                    if args.color_coherence != 'None' and color_match_image is not None:                    
                        ops.append(generation.TransformOperation(color_match=generation.TransformColorMatch(
                            color_mode=color_match_from_string(args.color_coherence),
                            image=generation.Artifact(type=generation.ARTIFACT_IMAGE, binary=image_to_jpg_bytes(color_match_image))
                        )))
                    if mix_in > 0 and video_prev_frame is not None:
                        ops.append(generation.TransformOperation(blend=generation.TransformBlend(
                            amount=mix_in, 
                            target=generation.Artifact(type=generation.ARTIFACT_IMAGE, binary=image_to_jpg_bytes(video_prev_frame))
                        )))
                    if brightness != 1.0 or contrast != 1.0:
                        ops.append(generation.TransformOperation(contrast=generation.TransformContrast(
                            brightness=brightness, contrast=contrast
                        )))
                    if noise > 0:
                        ops.append(generation.TransformOperation(add_noise=generation.TransformAddNoise(amount=noise, seed=seed)))
                    if len(ops):
                        init_image = image_xform(stub, [init_image], ops, TRANSFORM_ENGINE_ID)[0][0]

                # generate the next frame
                sampler = sampler_from_string(args.sampler.lower())
                guidance = guidance_from_string(args.clip_guidance)
                noise_scale = self.frame_args.noise_scale_series[frame_idx]
                image = image_gen(
                    stub, 
                    args.W, args.H, 
                    prompts, weights, 
                    steps, seed, args.cfg_scale, sampler, 
                    init_image, strength,
                    init_noise_scale=noise_scale, 
                    guidance_preset=guidance
                )

                if color_match_image is None:
                    color_match_image = image
                if not len(prior_frames):
                    prior_frames = [image, image]
                
                cv2.imwrite(os.path.join(out_dir, f'frame_{frame_idx:05}.png'), prior_frames[1])
                display_frame(prior_frames[1])
                prior_frames[0] = prior_frames[1]
                prior_frames[1] = image
                diffusion_cadence_ofs = frame_idx
            else:
                # smoothly blend between prior frames
                tween = ((frame_idx-diffusion_cadence_ofs) % diffusion_cadence) / float(diffusion_cadence)
                t = image_mix(prior_frames[0], prior_frames[1], tween)
                cv2.imwrite(os.path.join(out_dir, f'frame_{frame_idx:05}.png'), t)
                display_frame(t)

            if not args.locked_seed:
                seed += 1

# create folder for frames output
timestring = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
out_dir = os.path.join(outputs_path, timestring)
os.makedirs(out_dir, exist_ok=True)
print(f"Saving animation frames to {out_dir}...")

args = SimpleNamespace(**Args())
artist = Animator(args, out_dir)
artist.render_animation()

In [57]:
#@title Create video from frames
skip_video_for_run_all = False #@param {type: 'boolean'}
fps = 12 #@param {type:"number"}

if skip_video_for_run_all == True:
    print('Skipping video creation, uncheck skip_video_for_run_all if you want to run it')
else:
    image_path = os.path.join(out_dir, "frame_%05d.png")
    mp4_path = os.path.join(out_dir, f"{timestring}.mp4")

    print(f"Compiling animation frames to {mp4_path}...")

    cmd = [
        'ffmpeg',
        '-y',
        '-vcodec', 'png',
        '-r', str(fps),
        '-start_number', str(0),
        '-i', image_path,
        '-c:v', 'libx264',
        '-vf',
        f'fps={fps}',
        '-pix_fmt', 'yuv420p',
        '-crf', '17',
        '-preset', 'veryfast',
        mp4_path
    ]
    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stdout, stderr = process.communicate()
    if process.returncode != 0:
        print(stderr)
        raise RuntimeError(stderr)

    mp4 = open(mp4_path,'rb').read()
    data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
    display.display( display.HTML(f'<video controls loop><source src="{data_url}" type="video/mp4"></video>') )
