# 1. Check GPU Status

In [1]:
import subprocess

#!nvidia-smi -i 0 -e 0
nvidiasmi_output = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE).stdout.decode('utf-8')
print(nvidiasmi_output)
nvidiasmi_ecc_note = subprocess.run(['nvidia-smi', '-i', '0', '-e', '0'], stdout=subprocess.PIPE).stdout.decode('utf-8')
print(nvidiasmi_ecc_note)

Thu May 12 16:32:29 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.100      Driver Version: 440.100      CUDA Version: 10.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce RTX 208...  On   | 00000000:19:00.0 Off |                  N/A |
| 44%   80C    P0   112W / 250W |      1MiB / 11019MiB |      9%      Default |
+-------------------------------+----------------------+----------------------+
|   1  GeForce RTX 208...  On   | 00000000:1A:00.0 Off |                  N/A |
| 29%   43C    P8     3W / 250W |      1MiB / 11019MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   2  GeForce RTX 208...  On   | 00000000:67:00.0 Off |                  N/A |
| 46%   

# 2. Set Up

In [2]:
#@title 1.2 Prepare Folders
import subprocess, os, sys
import pathlib, shutil

# the following paths are same as preprocessing.py
root_path = os.getcwd()
initDirPath = f'{root_path}/outputs/init_images'
outDirPath = f'{root_path}/outputs/images_out'
model_path = f'{root_path}/outputs/models'

# project directory
PROJECT_DIR = os.path.abspath(os.getcwd())
sys.path.append(PROJECT_DIR)
os.chdir(f'{PROJECT_DIR}')

In [3]:
# import all dependencies
from utils.utils_os import *
preprocess(model_path, PROJECT_DIR)

from utils.utils_functions import *
from utils.utils_midas import *
from utils.utils_video import *
from main import do_run


import torch
import torchvision
import gc
import io
import math
import lpips
from glob import glob
from types import SimpleNamespace
from CLIP import clip
from guided_diffusion.script_util import create_model_and_diffusion, model_and_diffusion_defaults
import numpy as np
import random
import time


import warnings
warnings.filterwarnings("ignore", category=UserWarning)

# If running locally, there's a good chance your env will need this in order to not crash upon np.matmul() or similar operations.
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'

# AdaBins stuff
MAX_ADABINS_AREA = 500000

# CUDA Device
DEVICE = torch.device('cuda:0' if (torch.cuda.is_available()) else 'cpu')
print('Using device:', DEVICE)
device = DEVICE # At least one of the modules expects this name..


if torch.cuda.get_device_capability(DEVICE) == (8,0): ## A100 fix thanks to Emad
    print('Disabling CUDNN for A100 gpu', file=sys.stderr)
    torch.backends.cudnn.enabled = False

Using device: cuda:0


# 3. Task Settings

In [4]:
##########################
#####  Text Prompts  #####
##########################
# `animation_mode: None` will only use the first set. `animation_mode: 2D / Video` will run through them per the set frames and hold on the last one.

text_prompts = {
    0: ["A beautiful painting of a singular lighthouse, shining its light across a dense forest by greg rutkowski and thomas kinkade, Trending on artstation.:2", 
        "a girl and a boy sit around a camp fire under the stars.:2", 
        "blue color scheme:1"],
    #100: ["This set of prompts start at frame 100","This prompt has weight five:5"],
}

image_prompts = {
    # 0:['ImagePromptsWorkButArentVeryGood.png:2',],
}

##########################
## Important Parameters ##
##########################

#@markdown ####**Basic Settings:**
batch_name = 'TimeToDisco2'
steps = 250                  # [25,50,100,150,250,500,1000]
width_height = [1280,768]
clip_guidance_scale = 5000
tv_scale =  0
range_scale =   150
sat_scale =   0
cutn_batches = 4  
skip_augs = False

# *dispay rate and number of generated images*
display_rate =  25 
n_batches =  10 

# *Init Settings:*
# *Make sure you set skip_steps to ~50% of your steps if you want to use an init image.*
init_image = None 
init_scale = 1000 
skip_steps = 10 


#Get corrected sizes
side_x = (width_height[0]//64)*64;
side_y = (width_height[1]//64)*64;
if side_x != width_height[0] or side_y != width_height[1]:
  print(f'Changing output size to {side_x}x{side_y}. Dimensions must by multiples of 64.')

#Update Model Settings
timestep_respacing = f'ddim{steps}'
diffusion_steps = (1000//steps)*steps if steps < 1000 else steps

#Make folder for batch
batchFolder = f'{outDirPath}/{batch_name}'
createPath(batchFolder)

# 4. Diffusion and CLIP model settings

In [5]:
#@markdown ####**Models Settings:**

diffusion_model = "512x512_diffusion_uncond_finetune_008100"  # "256x256_diffusion_uncond" / "512x512_diffusion_uncond_finetune_008100"
use_secondary_model = True 
diffusion_sampling_mode = 'ddim' # 'plms' / 'ddim'  

use_checkpoint = True 
ViTB32 = True 
ViTB16 = True 
ViTL14 = False
RN101 = False 
RN50 = True 
RN50x4 = False 
RN50x16 = False 
RN50x64 = False 

check_model_SHA = False

download_models(model_path, check_model_SHA, diffusion_model, use_secondary_model)

model_config = model_and_diffusion_defaults()
model_config = update_diffusion_config(diffusion_model, model_config, use_checkpoint, timestep_respacing, diffusion_steps)
model_default = model_config['image_size']

if use_secondary_model:
    secondary_model = SecondaryDiffusionImageNet2()
    secondary_model.load_state_dict(torch.load(f'{model_path}/secondary_model_imagenet_2.pth', map_location='cpu'))
    secondary_model.eval().requires_grad_(False).to(device)

clip_models = []
if ViTB32 is True: clip_models.append(clip.load('ViT-B/32', jit=False)[0].eval().requires_grad_(False).to(device)) 
if ViTB16 is True: clip_models.append(clip.load('ViT-B/16', jit=False)[0].eval().requires_grad_(False).to(device) ) 
if ViTL14 is True: clip_models.append(clip.load('ViT-L/14', jit=False)[0].eval().requires_grad_(False).to(device) ) 
if RN50 is True: clip_models.append(clip.load('RN50', jit=False)[0].eval().requires_grad_(False).to(device))
if RN50x4 is True: clip_models.append(clip.load('RN50x4', jit=False)[0].eval().requires_grad_(False).to(device)) 
if RN50x16 is True: clip_models.append(clip.load('RN50x16', jit=False)[0].eval().requires_grad_(False).to(device)) 
if RN50x64 is True: clip_models.append(clip.load('RN50x64', jit=False)[0].eval().requires_grad_(False).to(device)) 
if RN101 is True: clip_models.append(clip.load('RN101', jit=False)[0].eval().requires_grad_(False).to(device)) 

lpips_model = lpips.LPIPS(net='vgg').to(device)

512 Model already downloaded, check check_model_SHA if the file is corrupt
Secondary Model already downloaded, check check_model_SHA if the file is corrupt
Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off]
Loading model from: /home/kaihua/anaconda3/envs/disco_diffusion/lib/python3.7/site-packages/lpips/weights/v0.1/vgg.pth


# 5. Animation Settings

In [6]:
# Animation Mode:
#For animation, you probably want to turn `cutn_batches` to 1 to make it quicker.*

animation_mode = 'None' # ['None', '2D', '3D', 'Video Input'] 


# Video Input Settings:
video_init_path = "training.mp4"
extract_nth_frame = 2 
video_init_seed_continuity = True 

if animation_mode == "Video Input":
  videoFramesFolder = f'videoFrames'
  createPath(videoFramesFolder)
  print(f"Exporting Video Frames (1 every {extract_nth_frame})...")
  try:
    for f in pathlib.Path(f'{videoFramesFolder}').glob('*.jpg'):
      f.unlink()
  except:
    print('')
  vf = f'select=not(mod(n\,{extract_nth_frame}))'
  subprocess.run(['ffmpeg', '-i', f'{video_init_path}', '-vf', f'{vf}', '-vsync', 'vfr', '-q:v', '2', '-loglevel', 'error', '-stats', f'{videoFramesFolder}/%04d.jpg'], stdout=subprocess.PIPE).stdout.decode('utf-8')



# 2D Animation Settings:
# `zoom` is a multiplier of dimensions, 1 is no zoom.
# All rotations are provided in degrees.

key_frames = True 
max_frames = 10000

if animation_mode == "Video Input":
  max_frames = len(glob(f'{videoFramesFolder}/*.jpg'))

interp_spline = 'Linear' #Do not change, currently will not look good. param ['Linear','Quadratic','Cubic']
angle = "0:(0)"
zoom = "0: (1), 10: (1.05)"
translation_x = "0: (0)"
translation_y = "0: (0)"
translation_z = "0: (10.0)"
rotation_3d_x = "0: (0)"
rotation_3d_y = "0: (0)"
rotation_3d_z = "0: (0)"
midas_depth_model = "dpt_large"
midas_weight = 0.3
near_plane = 200
far_plane = 10000
fov = 40
padding_mode = 'border'
sampling_mode = 'bicubic'

#======= TURBO MODE
#@markdown ---
#@markdown ####**Turbo Mode (3D anim only):**
#@markdown (Starts after frame 10,) skips diffusion steps and just uses depth map to warp images for skipped frames.
#@markdown Speeds up rendering by 2x-4x, and may improve image coherence between frames. frame_blend_mode smooths abrupt texture changes across 2 frames.
#@markdown For different settings tuned for Turbo Mode, refer to the original Disco-Turbo Github: https://github.com/zippy731/disco-diffusion-turbo

turbo_mode = False 
turbo_steps = "3"       # ["2","3","4","5","6"] 
turbo_preroll = 10 

#insist turbo be used only w 3d anim.
if turbo_mode and animation_mode != '3D':
  print('=====')
  print('Turbo mode only available with 3D animations. Disabling Turbo.')
  print('=====')
  turbo_mode = False

#@markdown ---

#@markdown ####**Coherency Settings:**
#@markdown `frame_scale` tries to guide the new frame to looking like the old one. A good default is 1500.
#@markdown `frame_skip_steps` will blur the previous frame - higher values will flicker less but struggle to add enough new detail to zoom into.

frames_scale = 1500 
frames_skip_steps = '60%' # ['40%', '50%', '60%', '70%', '80%']

#======= VR MODE
#@markdown ---
#@markdown ####**VR Mode (3D anim only):**
#@markdown Enables stereo rendering of left/right eye views (supporting Turbo) which use a different (fish-eye) camera projection matrix.   
#@markdown Note the images you're prompting will work better if they have some inherent wide-angle aspect
#@markdown The generated images will need to be combined into left/right videos. These can then be stitched into the VR180 format.
#@markdown Google made the VR180 Creator tool but subsequently stopped supporting it. It's available for download in a few places including https://www.patrickgrunwald.de/vr180-creator-download
#@markdown The tool is not only good for stitching (videos and photos) but also for adding the correct metadata into existing videos, which is needed for services like YouTube to identify the format correctly.
#@markdown Watching YouTube VR videos isn't necessarily the easiest depending on your headset. For instance Oculus have a dedicated media studio and store which makes the files easier to access on a Quest https://creator.oculus.com/manage/mediastudio/
#@markdown 
#@markdown The command to get ffmpeg to concat your frames for each eye is in the form: `ffmpeg -framerate 15 -i frame_%4d_l.png l.mp4` (repeat for r)

#@markdown `vr_eye_angle` is the y-axis rotation of the eyes towards the center
#@markdown interpupillary distance (between the eyes)

vr_mode = False
vr_eye_angle = 0.5
vr_ipd = 5.0

#insist VR be used only w 3d anim.
if vr_mode and animation_mode != '3D':
  print('=====')
  print('VR mode only available with 3D animations. Disabling VR.')
  print('=====')
  vr_mode = False
    
# parse parameters
series_params, float_params = update_parameters(key_frames, max_frames, interp_spline, angle, zoom, translation_x, translation_y, translation_z,
                                                  rotation_3d_x, rotation_3d_y, rotation_3d_z)
angle_series, zoom_series, translation_x_series, translation_y_series, translation_z_series, rotation_3d_x_series, rotation_3d_y_series, rotation_3d_z_series = series_params
angle, zoom, translation_x, translation_y, translation_z, rotation_3d_x, rotation_3d_y, rotation_3d_z = float_params

# 6. Extra Settings
 Partial Saves, Advanced Settings, Cutn Scheduling

In [7]:
#@markdown ####**Saving:**

#@markdown Intermediate steps will save a copy at your specified intervals. You can either format it as a single integer or a list of specific steps 
#@markdown A value of `2` will save a copy at 33% and 66%. 0 will save none.
#@markdown A value of `[5, 9, 34, 45]` will save at steps 5, 9, 34, and 45. (Make sure to include the brackets)

intermediate_saves = 10 
intermediates_in_subfolder = True  

if type(intermediate_saves) is not list:
  if intermediate_saves:
    steps_per_checkpoint = math.floor((steps - skip_steps - 1) // (intermediate_saves+1))
    steps_per_checkpoint = steps_per_checkpoint if steps_per_checkpoint > 0 else 1
    print(f'Will save every {steps_per_checkpoint} steps')
  else:
    steps_per_checkpoint = steps+10
else:
  steps_per_checkpoint = None

if intermediate_saves and intermediates_in_subfolder is True:
  partialFolder = f'{batchFolder}/partials'
  createPath(partialFolder)



#@markdown ####**Advanced Settings:**
#@markdown *There are a few extra advanced settings available if you double click this cell.*

#@markdown *Perlin init will replace your init, so uncheck if using one.*

perlin_init = False  
perlin_mode = 'mixed' # ['mixed', 'color', 'gray']
set_seed = 'random_seed'
eta = 0.8
clamp_grad = True
clamp_max = 0.05


### EXTRA ADVANCED SETTINGS:
randomize_class = True
clip_denoised = False
fuzzy_prompt = False
rand_mag = 0.05

#@markdown ####**Cutn Scheduling:**
#@markdown Format: `[40]*400+[20]*600` = 40 cuts for the first 400 /1000 steps, then 20 for the last 600/1000

#@markdown cut_overview and cut_innercut are cumulative for total cutn on any given step. Overview cuts see the entire image and are good for early structure, innercuts are your standard cutn.

cut_overview = "[12]*400+[4]*600" 
cut_innercut ="[4]*400+[12]*600"
cut_ic_pow = 1
cut_icgray_p = "[0.2]*400+[0]*600"

Will save every 21 steps


# 7. Run Diffusion


In [None]:
#Update Model Settings
timestep_respacing = f'ddim{steps}'
diffusion_steps = (1000//steps)*steps if steps < 1000 else steps
model_config.update({
    'timestep_respacing': timestep_respacing,
    'diffusion_steps': diffusion_steps,
})

batch_size = 1 



resume_run = False
run_to_resume = 'latest'
resume_from_frame = 'latest'
retain_overwritten_frames = False
if retain_overwritten_frames is True:
  retainFolder = f'{batchFolder}/retained'
  createPath(retainFolder)


skip_step_ratio = int(frames_skip_steps.rstrip("%")) / 100
calc_frames_skip_steps = math.floor(steps * skip_step_ratio)


if steps <= calc_frames_skip_steps:
  sys.exit("ERROR: You can't skip more steps than your total steps")

if resume_run:
  if run_to_resume == 'latest':
    try:
      batchNum
    except:
      batchNum = len(glob(f"{batchFolder}/{batch_name}(*)_settings.txt"))-1
  else:
    batchNum = int(run_to_resume)
  if resume_from_frame == 'latest':
    start_frame = len(glob(batchFolder+f"/{batch_name}({batchNum})_*.png"))
    if animation_mode != '3D' and turbo_mode == True and start_frame > turbo_preroll and start_frame % int(turbo_steps) != 0:
      start_frame = start_frame - (start_frame % int(turbo_steps))
  else:
    start_frame = int(resume_from_frame)+1
    if animation_mode != '3D' and turbo_mode == True and start_frame > turbo_preroll and start_frame % int(turbo_steps) != 0:
      start_frame = start_frame - (start_frame % int(turbo_steps))
    if retain_overwritten_frames is True:
      existing_frames = len(glob(batchFolder+f"/{batch_name}({batchNum})_*.png"))
      frames_to_save = existing_frames - start_frame
      print(f'Moving {frames_to_save} frames to the Retained folder')
      move_files(start_frame, existing_frames, batchFolder, retainFolder, batch_name, batchNum)
else:
  start_frame = 0
  batchNum = len(glob(batchFolder+"/*.txt"))
  while os.path.isfile(f"{batchFolder}/{batch_name}({batchNum})_settings.txt") is True or os.path.isfile(f"{batchFolder}/{batch_name}-{batchNum}_settings.txt") is True:
    batchNum += 1

print(f'Starting Run: {batch_name}({batchNum}) at frame {start_frame}')

if set_seed == 'random_seed':
    random.seed()
    seed = random.randint(0, 2**32)
    # print(f'Using seed: {seed}')
else:
    seed = int(set_seed)

args = {
    'batchNum': batchNum,
    'prompts_series':split_prompts(text_prompts, max_frames) if text_prompts else None,
    'image_prompts_series':split_prompts(image_prompts, max_frames) if image_prompts else None,
    'seed': seed,
    'display_rate':display_rate,
    'n_batches':n_batches if animation_mode == 'None' else 1,
    'batch_size':batch_size,
    'batch_name': batch_name,
    'steps': steps,
    'diffusion_sampling_mode': diffusion_sampling_mode,
    'width_height': width_height,
    'clip_guidance_scale': clip_guidance_scale,
    'tv_scale': tv_scale,
    'range_scale': range_scale,
    'sat_scale': sat_scale,
    'cutn_batches': cutn_batches,
    'init_image': init_image,
    'init_scale': init_scale,
    'skip_steps': skip_steps,
    'side_x': side_x,
    'side_y': side_y,
    'timestep_respacing': timestep_respacing,
    'diffusion_steps': diffusion_steps,
    'animation_mode': animation_mode,
    'video_init_path': video_init_path,
    'extract_nth_frame': extract_nth_frame,
    'video_init_seed_continuity': video_init_seed_continuity,
    'key_frames': key_frames,
    'max_frames': max_frames if animation_mode != "None" else 1,
    'interp_spline': interp_spline,
    'start_frame': start_frame,
    'angle': angle,
    'zoom': zoom,
    'translation_x': translation_x,
    'translation_y': translation_y,
    'translation_z': translation_z,
    'rotation_3d_x': rotation_3d_x,
    'rotation_3d_y': rotation_3d_y,
    'rotation_3d_z': rotation_3d_z,
    'midas_depth_model': midas_depth_model,
    'midas_weight': midas_weight,
    'near_plane': near_plane,
    'far_plane': far_plane,
    'fov': fov,
    'padding_mode': padding_mode,
    'sampling_mode': sampling_mode,
    'angle_series':angle_series,
    'zoom_series':zoom_series,
    'translation_x_series':translation_x_series,
    'translation_y_series':translation_y_series,
    'translation_z_series':translation_z_series,
    'rotation_3d_x_series':rotation_3d_x_series,
    'rotation_3d_y_series':rotation_3d_y_series,
    'rotation_3d_z_series':rotation_3d_z_series,
    'frames_scale': frames_scale,
    'frames_skip_steps': frames_skip_steps,
    'skip_step_ratio': skip_step_ratio,
    'calc_frames_skip_steps': calc_frames_skip_steps,
    'text_prompts': text_prompts,
    'image_prompts': image_prompts,
    'cut_overview': eval(cut_overview),
    'cut_innercut': eval(cut_innercut),
    'cut_ic_pow': cut_ic_pow,
    'cut_icgray_p': eval(cut_icgray_p),
    'intermediate_saves': intermediate_saves,
    'intermediates_in_subfolder': intermediates_in_subfolder,
    'steps_per_checkpoint': steps_per_checkpoint,
    'perlin_init': perlin_init,
    'perlin_mode': perlin_mode,
    'set_seed': set_seed,
    'eta': eta,
    'clamp_grad': clamp_grad,
    'clamp_max': clamp_max,
    'skip_augs': skip_augs,
    'randomize_class': randomize_class,
    'clip_denoised': clip_denoised,
    'fuzzy_prompt': fuzzy_prompt,
    'rand_mag': rand_mag,
    'resume_run': resume_run,
    'batchFolder': batchFolder,
    'batch_name': batch_name,
    'batchNum': batchNum,
    'turbo_mode': turbo_mode,
    'turbo_preroll': turbo_preroll,
    'turbo_steps': turbo_steps,
    'vr_mode': vr_mode,
    'video_init_seed_continuity': video_init_seed_continuity,
    'videoFramesFolder': videoFramesFolder if animation_mode == "Video Input" else None,
    'clip_models': clip_models,
    'use_secondary_model': use_secondary_model,
    'partialFolder': partialFolder,
}

args = SimpleNamespace(**args)

print('Prepping model...')
model, diffusion = create_model_and_diffusion(**model_config)
model.load_state_dict(torch.load(f'{model_path}/{diffusion_model}.pt', map_location='cpu'))
model.requires_grad_(False).eval().to(device)
for name, param in model.named_parameters():
    if 'qkv' in name or 'norm' in name or 'proj' in name:
        param.requires_grad_()
if model_config['use_fp16']:
    model.convert_to_fp16()

gc.collect()
torch.cuda.empty_cache()
try:
  do_run(args, diffusion, model,lpips_model, secondary_model, model_path, gpu_device=device)
except KeyboardInterrupt:
    pass
finally:
    print('Seed used:', seed)
    gc.collect()
    torch.cuda.empty_cache()

Batches:   0%|          | 0/10 [00:00<?, ?it/s]




Output()

  0%|          | 0/240 [00:00<?, ?it/s]

# 8. Create the video

In [None]:
# @title ### **Create video**
#@markdown Video file will save in the same folder as your images.

skip_video_for_run_all = False #@param {type: 'boolean'}

if skip_video_for_run_all == True:
  print('Skipping video creation, uncheck skip_video_for_run_all if you want to run it')

else:
  # import subprocess in case this cell is run without the above cells
  import subprocess
  from base64 import b64encode

  latest_run = batchNum

  folder = batch_name #@param
  run = latest_run #@param
  final_frame = 'final_frame'


  init_frame = 1#@param {type:"number"} This is the frame where the video will start
  last_frame = final_frame#@param {type:"number"} You can change i to the number of the last frame you want to generate. It will raise an error if that number of frames does not exist.
  fps = 12#@param {type:"number"}
  # view_video_in_cell = True #@param {type: 'boolean'}

  frames = []
  # tqdm.write('Generating video...')

  if last_frame == 'final_frame':
    last_frame = len(glob(batchFolder+f"/{folder}({run})_*.png"))
    print(f'Total frames: {last_frame}')

  image_path = f"{outDirPath}/{folder}/{folder}({run})_%04d.png"
  filepath = f"{outDirPath}/{folder}/{folder}({run}).mp4"


  cmd = [
      'ffmpeg',
      '-y',
      '-vcodec',
      'png',
      '-r',
      str(fps),
      '-start_number',
      str(init_frame),
      '-i',
      image_path,
      '-frames:v',
      str(last_frame+1),
      '-c:v',
      'libx264',
      '-vf',
      f'fps={fps}',
      '-pix_fmt',
      'yuv420p',
      '-crf',
      '17',
      '-preset',
      'veryslow',
      filepath
  ]

  process = subprocess.Popen(cmd, cwd=f'{batchFolder}', stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  stdout, stderr = process.communicate()
  if process.returncode != 0:
      print(stderr)
      raise RuntimeError(stderr)
  else:
      print("The video is ready and saved to the images folder")

  # if view_video_in_cell:
  #     mp4 = open(filepath,'rb').read()
  #     data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
  #     display.HTML(f'<video width=400 controls><source src="{data_url}" type="video/mp4"></video>')
  