In [128]:
%load_ext autoreload
%autoreload 2

import torch
from text3d2video.artifacts.anim_artifact import AnimationArtifact
from text3d2video.rendering import render_depth_map, render_texture
import torchvision.transforms.functional as TF
from text3d2video.artifacts.texture_artifact import TextureArtifact
from PIL import Image

torch.set_grad_enabled(False)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


<torch.autograd.grad_mode.set_grad_enabled at 0x7ad264640be0>

In [53]:
from text3d2video.pipelines.generative_rendering_pipeline import GenerativeRenderingPipeline
from text3d2video.pipelines.pipeline_utils import load_pipeline

device = torch.device("cuda")
dtype = torch.float16
sd_repo = "runwayml/stable-diffusion-v1-5"
controlnet_repo = "lllyasviel/control_v11f1p_sd15_depth"
pipe = load_pipeline(GenerativeRenderingPipeline, sd_repo, controlnet_repo)

Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

In [67]:
dog_image = Image.open("data/dog.png")
prompt = "Photograph of a puppy over grass"

images = [dog_image]
prompts = [prompt]
depths = None

[<PIL.PngImagePlugin.PngImageFile image mode=RGB size=512x512>]

In [121]:
# read texture
texture_art = TextureArtifact.from_wandb_artifact_tag("texture:v21")
texture = texture_art.read_texture()

# read anim
anim = AnimationArtifact.from_wandb_artifact_tag('catwalk_180_20:latest')
anim = AnimationArtifact.from_wandb_artifact_tag('rumba_20:latest')
cams, meshes = anim.load_frames()
verts_uvs, faces_uvs = anim.uv_data()
depths = render_depth_map(meshes, cams)

# render textured frames
renders = render_texture(meshes, cams, texture, verts_uvs, faces_uvs)
renders = [TF.to_pil_image(r) for r in renders]

prompt = "Stormtrooper"
images = renders
prompts = [prompt] * len(images)

In [127]:
from text3d2video.noise_initialization import UVNoiseInitializer
from text3d2video.pipelines.generative_rendering_pipeline import GenerativeRenderingConfig

num_inf_steps = 15
start_noise_level = 0
start_t = pipe.get_partial_timesteps(num_inf_steps, start_noise_level)[0]

print(start_t)

latents = pipe.encode_images(images)
latents = pipe.scheduler.add_noise(latents, torch.randn_like(latents), start_t)

decoded = pipe.decode_latents(latents)

tensor(925)


OutOfMemoryError: CUDA out of memory. Tried to allocate 1.25 GiB. GPU 0 has a total capacity of 15.60 GiB of which 913.88 MiB is free. Including non-PyTorch memory, this process has 14.67 GiB memory in use. Of the allocated memory 11.21 GiB is allocated by PyTorch, and 2.85 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:

module_paths = [
    "mid_block.attentions.0.transformer_blocks.0.attn1",
    "up_blocks.1.attentions.0.transformer_blocks.0.attn1",
    "up_blocks.1.attentions.1.transformer_blocks.0.attn1",
    "up_blocks.1.attentions.2.transformer_blocks.0.attn1",
    "up_blocks.2.attentions.0.transformer_blocks.0.attn1",
    "up_blocks.2.attentions.1.transformer_blocks.0.attn1",
    "up_blocks.2.attentions.2.transformer_blocks.0.attn1",
    "up_blocks.3.attentions.0.transformer_blocks.0.attn1",
    "up_blocks.3.attentions.1.transformer_blocks.0.attn1",
    "up_blocks.3.attentions.2.transformer_blocks.0.attn1",
]

gr_config = GenerativeRenderingConfig(
    do_pre_attn_injection=True,
    do_post_attn_injection=True,
    feature_blend_alpha=0.8,
    attend_to_self_kv=True,
    mean_features_weight=0.5,
    chunk_size=5,
    num_keyframes=1,
    num_inference_steps=num_inf_steps,
    guidance_scale=7.5,
    controlnet_conditioning_scale=1.0,
    module_paths=module_paths,
)

generator = torch.Generator(device="cuda")
generator.manual_seed(0)

kf_generator = torch.Generator(device="cuda")
kf_generator.manual_seed(0)

noise_initializer = UVNoiseInitializer(noise_texture_res=120)

video_frames = pipe(
    prompt,
    meshes,
    cams,
    verts_uvs,
    faces_uvs,
    conf=gr_config,
    noise_initializer=noise_initializer,
    generator=generator,
    kf_generator=kf_generator,
    start_noise_level=start_noise_level,
    start_latents=latents,
)

In [123]:
from text3d2video.utilities.ipython_utils import display_vid
from text3d2video.utilities.video_util import pil_frames_to_clip

display_vid(pil_frames_to_clip(video_frames))