<a href="https://colab.research.google.com/github/PsorTheDoctor/generative-ai/blob/master/text_to_video/fatezero.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#FateZero: Fusing Attentions for Zero-shot Text-based Video Editing

In [1]:
!nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader

Tesla T4, 15360 MiB, 15101 MiB


##Install requirements

In [None]:
!git clone https://github.com/ChenyangQiQi/FateZero /content/FateZero
%cd /content/FateZero
# %pip install -r requirements.txt
%pip install -q -U --pre triton
%pip install -q diffusers[torch]==0.11.1 transformers==4.26.0 bitsandbytes==0.35.4 \
decord accelerate omegaconf einops ftfy gradio imageio-ffmpeg xformers

##Download pretrained model

In [None]:
MODEL_NAME = "CompVis/stable-diffusion-v1-4"

download_pretrained_model = True 
if download_pretrained_model:
    !git lfs install
    !git clone https://huggingface.co/$MODEL_NAME ckpt/$MODEL_NAME
    MODEL_NAME = f"./ckpt/{MODEL_NAME}"
print(f"[*] MODEL_NAME={MODEL_NAME}")

##Usage

In [25]:
from omegaconf import OmegaConf

# It should be a path to folder with video frames as images!
VIDEO_FILE = 'data/teaser_car-turn'  
VIDEO_ID = VIDEO_FILE.split('/')[-1]
RESULT_DIR = 'result/' + VIDEO_ID
CONFIG_NAME = "config/" + VIDEO_ID + ".yaml" 

source_prompt = 'a silver jeep driving down a curvy road in the countryside' 
edit_prompt = 'a red porsche driving down a curvy road in the countryside'
EMPHYSIS_WORD = 'red porsche'
EMPHYSIS_VALUE = 10 
video_length = 8 
INVERSION_STEP = 8 
REPLACE_STRENGTH = 0.8 
STORE_ATTENTION_ON_disk = False
width = 512 
height = 512 

config = {
  "pretrained_model_path": MODEL_NAME,
  "logdir": RESULT_DIR,
  "train_dataset": {
    "path": VIDEO_FILE,
    "prompt": source_prompt,
    "n_sample_frame": video_length,
    "sampling_rate": 1,
    "stride": 80,
    "offset": 
    {
        "left": 0,
        "right": 0,
        "top": 0,
        "bottom": 0,
    }
  },
  "validation_sample_logger_config":{
      "use_train_latents": True,
      "use_inversion_attention": True,
      "guidance_scale": 7.5,
      "prompts":[
          source_prompt,
          edit_prompt,
      ],
      "p2p_config":[ 
          {
          "cross_replace_steps":{
              "default_":0.8
              },
          "self_replace_steps": 0.8,
          "masked_self_attention": True,
           "bend_th": [2, 2],
          "is_replace_controller": False 
          },
          {
          "cross_replace_steps":{
              "default_":0.8
              },
          "self_replace_steps": 0.8,
          "eq_params":{
              "words":[EMPHYSIS_WORD],
              "values": [EMPHYSIS_VALUE]
            },
          "use_inversion_attention": True,
          "is_replace_controller": False 
          }]
          ,
    "clip_length": "${..train_dataset.n_sample_frame}",
    "sample_seeds": [0],
    "num_inference_steps": INVERSION_STEP,
    "prompt2prompt_edit": True
     },
  "disk_store": STORE_ATTENTION_ON_disk,
  "model_config":{
      "lora": 160,
      "SparseCausalAttention_index": ['mid'],
      "least_sc_channel": 640
  },
  "test_pipeline_config":{
    "target": "video_diffusion.pipelines.p2pDDIMSpatioTemporalPipeline.p2pDDIMSpatioTemporalPipeline",
    "num_inference_steps": "${..validation_sample_logger.num_inference_steps}"
  },
  "epsilon": 1e-5,
  "train_steps": 100,
  "seed": 0,
  "learning_rate": 1e-5,
  "train_temporal_conv": False,
  "guidance_scale": "${validation_sample_logger_config.guidance_scale}"
}

OmegaConf.save(config, CONFIG_NAME)
print('save new configue to ', CONFIG_NAME)

save new configue to  config/teaser_car-turn.yaml


In [26]:
!accelerate launch test_fatezero.py --config=$CONFIG_NAME

2023-03-29 21:59:22.759621: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-29 21:59:25.678009: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
2023-03-29 21:59:25.681557: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
[2;36m           [0m         `accelerate launch` and had defaults used      [2m            

##Show the results

In [31]:
from IPython.display import HTML
from base64 import b64encode
import os, sys
import glob

def display_video(filepath):
  # get the last from results
  mp4_name = sorted(glob.glob('./result/*/sample/' + filepath))[-1]

  print(mp4_name)
  mp4 = open('{}'.format(mp4_name),'rb').read()
  data_url = "data:video/mp4;base64," + b64encode(mp4).decode()

  print('Display animation: {}'.format(mp4_name), file=sys.stderr)
  display(HTML("""
    <video width=512 controls>
          <source src="%s" type="video/mp4">
    </video>
    """ % data_url))
  
display_video('step_0.mp4')

./result/teaser_car-turn_230329-215937/sample/step_0.mp4


Display animation: ./result/teaser_car-turn_230329-215937/sample/step_0.mp4


In [33]:
display_video('step_0_1_0.mp4')

./result/teaser_car-turn_230329-215937/sample/step_0_1_0.mp4


Display animation: ./result/teaser_car-turn_230329-215937/sample/step_0_1_0.mp4
