In [6]:
%load_ext autoreload
%autoreload 2

from omegaconf import OmegaConf
import wandb_util.wandb_util as wbu
from scripts.wandb_experiments.benchmark import Scene, BenchmarkConfig

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
metal_cat = Scene(
    "cat_statue_mv:latest",
    "cat_statue_mv:latest",
    "Metalic Cat Statue",
    "Silver Cat Statue",
)

deadpool_cat = Scene(
    "cat_statue_mv:latest",
    "cat_statue_mv:latest",
    "Deadpool Cat Statue",
    "Deadpool Cat Statue",
)

stormtrooper_ymca = Scene(
    "ymca_20:latest", "human_mv:latest", "Stormtrooper", "Stormtrooper"
)

stormtrooper_catwalk = Scene(
    "catwalk_180_20:latest",
    "human_mv:latest",
    "Stormtrooper",
    "Stormtrooper",
)

stormtrooper_rumba = Scene(
    "rumba_20:latest", "human_mv:latest", "Stormtrooper", "Stormtrooper"
)

deadpool_catwalk = Scene(
    "catwalk_180_20:latest",
    "human_mv:latest",
    "Deadpool, blank background",
    "Deadpool",
)


scenes = [
    metal_cat,
    deadpool_cat,
    stormtrooper_ymca,
    stormtrooper_catwalk,
    stormtrooper_rumba,
    deadpool_catwalk,
]

In [22]:
from scripts.wandb_experiments.benchmark import Method
from scripts.wandb_runs.render_noise_gr import RenderNoiseGrConfig, render_noise_gr
from scripts.wandb_runs.run_generative_rendering import (
    RunGenerativeRenderingConfig,
    run_generative_rendering,
)
from text3d2video.pipelines.generative_rendering_pipeline import (
    GenerativeRenderingConfig,
)
from text3d2video.pipelines.pipeline_utils import ModelConfig
from text3d2video.util import PrintCols
from text3d2video.utilities.omegaconf_util import get_import_path


decoder_paths = [
    "mid_block.attentions.0.transformer_blocks.0.attn1",
    "up_blocks.1.attentions.0.transformer_blocks.0.attn1",
    "up_blocks.1.attentions.1.transformer_blocks.0.attn1",
    "up_blocks.1.attentions.2.transformer_blocks.0.attn1",
    "up_blocks.2.attentions.0.transformer_blocks.0.attn1",
    "up_blocks.2.attentions.1.transformer_blocks.0.attn1",
    "up_blocks.2.attentions.2.transformer_blocks.0.attn1",
    "up_blocks.3.attentions.0.transformer_blocks.0.attn1",
    "up_blocks.3.attentions.1.transformer_blocks.0.attn1",
    "up_blocks.3.attentions.2.transformer_blocks.0.attn1",
]
# GR base Config
base_gr = OmegaConf.structured(
    RunGenerativeRenderingConfig(
        "prompt",
        "anim_tag",
        GenerativeRenderingConfig(decoder_paths, num_keyframes=1),
        ModelConfig(),
    )
)

# ControlNet base Config
base_controlnet = OmegaConf.structured(
    RunGenerativeRenderingConfig(
        "prompt",
        "anim_tag",
        GenerativeRenderingConfig(
            [], do_pre_attn_injection=False, do_post_attn_injection=False
        ),
        ModelConfig(),
    )
)

methods = [
    Method("GR", get_import_path(run_generative_rendering), base_gr),
    Method("ControlNet", get_import_path(run_generative_rendering), base_controlnet),
]

for start_noise in [0, 0.25, 0.5, 0.75, 1]:
    base_render_gr = OmegaConf.structured(
        RenderNoiseGrConfig(
            "prompt",
            "anim_tag",
            "texture_tag",
            GenerativeRenderingConfig(decoder_paths),
            ModelConfig(),
            start_noise_level=start_noise,
        )
    )

    methods.append(
        Method(
            f"RenderThenGR-{start_noise}",
            get_import_path(render_noise_gr),
            base_render_gr,
        )
    )

for start_noise in [0.25, 0.5]:
    base_render_cn = OmegaConf.structured(
        RenderNoiseGrConfig(
            "",
            "",
            "",
            GenerativeRenderingConfig(
                [], do_pre_attn_injection=False, do_post_attn_injection=False
            ),
            ModelConfig(),
            start_noise_level=start_noise,
        )
    )

    methods.append(
        Method(
            f"RenderThenCN-{start_noise}",
            get_import_path(render_noise_gr),
            base_render_cn,
        )
    )

p = PrintCols(["Method", "Function", "config"], widths=[20, 25, 100])
for m in methods:
    p.print_row(m.name, m.fun_path.split(".")[-1], str(m.base_config))

GR                   run_generative_rendering  {'prompt': 'prompt', 'animation_tag': 'anim_tag', 'generative_rendering': {'module_paths': ['mid_block.attentions.0.transformer_blocks.0.attn1', 'up_blocks.1.attentions.0.transformer_blocks.0.attn1', 'up_blocks.1.attentions.1.transformer_blocks.0.attn1', 'up_blocks.1.attentions.2.transformer_blocks.0.attn1', 'up_blocks.2.attentions.0.transformer_blocks.0.attn1', 'up_blocks.2.attentions.1.transformer_blocks.0.attn1', 'up_blocks.2.attentions.2.transformer_blocks.0.attn1', 'up_blocks.3.attentions.0.transformer_blocks.0.attn1', 'up_blocks.3.attentions.1.transformer_blocks.0.attn1', 'up_blocks.3.attentions.2.transformer_blocks.0.attn1'], 'do_pre_attn_injection': True, 'do_post_attn_injection': True, 'feature_blend_alpha': 0.8, 'attend_to_self_kv': False, 'mean_features_weight': 0.5, 'chunk_size': 5, 'num_inference_steps': 15, 'guidance_scale': 7.5, 'controlnet_conditioning_scale': 1.0, 'num_keyframes': 1, 'kf_indices': None}, 'model': {'sd_repo

In [15]:
config = BenchmarkConfig(scenes, methods)
config = OmegaConf.structured(config)

In [20]:
from scripts.wandb_experiments.benchmark import benchmark

spec = benchmark(config)

In [21]:
from scripts.wandb_experiments.benchmark import benchmark

wbu.sync_experiment(benchmark, config, "benchmark_small")

Experiment: https://wandb.ai/romeu/diffusion-3D-features/groups/benchmark_small/workspace

Would execute 46 new runs:
- human_mvlatest_Deadpool
- human_mvlatest_Stormtrooper
- cat_statue_mvlatest_DeadpoolCatStatue
- cat_statue_mvlatest_SilverCatStatue
- RenderThenGR-0
- RenderThenGR-0.25
- RenderThenGR-0.5
- RenderThenGR-0.75
- RenderThenGR-1
- RenderThenCN-0.25
- RenderThenCN-0.5
- RenderThenGR-0
- RenderThenGR-0
- RenderThenGR-0
- RenderThenGR-0.25
- RenderThenGR-0.25
- RenderThenGR-0.25
- RenderThenGR-0.5
- RenderThenGR-0.5
- RenderThenGR-0.5
- RenderThenGR-0.75
- RenderThenGR-0.75
- RenderThenGR-0.75
- RenderThenGR-1
- RenderThenGR-1
- RenderThenGR-1
- RenderThenCN-0.25
- RenderThenCN-0.25
- RenderThenCN-0.25
- RenderThenCN-0.5
- RenderThenCN-0.5
- RenderThenCN-0.5
- RenderThenGR-0
- RenderThenGR-0.25
- RenderThenGR-0.5
- RenderThenGR-0.75
- RenderThenGR-1
- RenderThenCN-0.25
- RenderThenCN-0.5
- RenderThenGR-0
- RenderThenGR-0.25
- RenderThenGR-0.5
- RenderThenGR-0.75
- RenderThen

In [14]:
from scripts.wandb_experiments.benchmark import split_runs

exp_name = "benchmark_small"

runs = wbu.get_logged_runs(exp_name)
print(wbu.get_exp_url(exp_name))
texture_runs, video_gen_runs = split_runs(runs)

https://wandb.ai/romeu/diffusion-3D-features/groups/benchmark_small/workspace


In [15]:
from text3d2video.util import group_into_array


def scene_key(run):
    config = OmegaConf.create(run.config)
    return f"{config.animation_tag}-{config.prompt}"


def method_key(run):
    return run.name.split("_")[0]


runs_grouped, labels = group_into_array(video_gen_runs, [method_key, scene_key])

# methods = ["ControlNet", "GR"]
# method_indices = [labels[0].index(m) for m in methods]

runs_grouped.shape

(9, 6)

In [16]:
from text3d2video.experiment_analysis import GrData
from text3d2video.util import map_array


run_data = map_array(runs_grouped, GrData.from_gr_run, pbar=True)

  0%|          | 0/54 [00:00<?, ?it/s]

 22%|██▏       | 12/54 [01:00<04:02,  5.78s/it][34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m:   10 of 10 files downloaded.  
[34m[1mwandb[0m:   10 of 10 files downloaded.  4m[1mwandb[0m: \ 1 of 10 files downloaded...
[34m[1mwandb[0m:   20 of 20 files downloaded.  4m[1mwandb[0m: \ 1 of 20 files downloaded...
[34m[1mwandb[0m:   20 of 20 files downloaded.  4m[1mwandb[0m: \ 1 of 20 files downloaded...
[34m[1mwandb[0m:   20 of 20 files downloaded.  4m[1mwandb[0m: \ 1 of 20 files downloaded...
[34m[1mwandb[0m:   20 of 20 files downloaded.  4m[1mwandb[0m: \ 1 of 20 files downloaded...
[34m[1mwandb[0m:   10 of 10 files downloaded.  4m[1mwandb[0m: \ 1 of 10 files downloaded...
[34m[1mwandb[0m:   10 of 10 files downloaded.  4m[1mwandb[0m: \ 1 of 10 files downloaded...
[34m[1mwandb[0m:   20 of 20 files downloaded.  4m[1mwandb[0m: \ 1 of 20 files downloaded...
[3

In [17]:
from scripts.wandb_experiments.benchmark import GrRunData
from text3d2video.clip_metrics import CLIPMetrics
from text3d2video.util import map_array
from functools import partial

clip = CLIPMetrics()

map_array(run_data, partial(GrRunData.compute_clip_metrics, model=clip), pbar=True)
map_array(run_data, GrRunData.compute_uv_mse, pbar=True)

100%|██████████| 54/54 [00:15<00:00,  3.49it/s]
100%|██████████| 54/54 [00:15<00:00,  3.43it/s]


array([[None, None, None, None, None, None],
       [None, None, None, None, None, None],
       [None, None, None, None, None, None],
       [None, None, None, None, None, None],
       [None, None, None, None, None, None],
       [None, None, None, None, None, None],
       [None, None, None, None, None, None],
       [None, None, None, None, None, None],
       [None, None, None, None, None, None]], dtype=object)

In [18]:
from text3d2video.utilities.video_comparison import video_grid
from text3d2video.utilities.video_util import pil_frames_to_clip
from text3d2video.utilities.video_comparison import VideoLabel, add_label_to_clip


def video_with_metrics(d: GrRunData):
    clip = pil_frames_to_clip(d.frames)

    rows = []
    if d.frame_consistency is not None:
        rows.append(f"FC: {d.frame_consistency:.4f}")
    if d.prompt_fidelity is not None:
        rows.append(f"PF: {d.prompt_fidelity:.4f}")
    if d.uv_mse is not None:
        rows.append(f"UV MSE: {d.uv_mse:.4f}")

    content = "\n".join(rows).format(d=d)
    label = VideoLabel(content, font_size=30)
    return add_label_to_clip(clip, label, position=("left", "bottom"))


cropped_run_data = run_data[:, :]
vids = map_array(cropped_run_data, video_with_metrics, pbar=True)

vid = video_grid(vids, y_labels=labels[0])
vid.write_videofile("outs/benchmark_small.mp4")

  0%|          | 0/54 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  2%|▏         | 1/54 [00:00<00:09,  5.34it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  4%|▎         | 2/54 [00:00<00:09,  5.28it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  6%|▌         | 3/54 [00:00<00:09,  5.24it/s]huggingface/t

Moviepy - Building video outs/benchmark_small.mp4.
Moviepy - Writing video outs/benchmark_small.mp4



                                                            

Moviepy - Done !
Moviepy - video ready outs/benchmark_small.mp4


In [56]:
import tabulate


table_rows = []

values = []

for row_i, row in enumerate(cropped_run_data):
    method = labels[0][row_i]

    uv_mse = map_array(row, lambda d: d.uv_mse).mean()
    fc = map_array(row, lambda d: d.frame_consistency).mean()
    pf = map_array(row, lambda d: d.prompt_fidelity).mean()

    table_rows.append([method, uv_mse, fc, pf])

    values.append([uv_mse, fc, pf])



headers = ["Method", "UV MSE", "Frame Consistency", "Prompt Fidelity"]
print(tabulate.tabulate(table_rows, headers=headers, tablefmt="markdown"))

Method                 UV MSE    Frame Consistency    Prompt Fidelity
-----------------  ----------  -------------------  -----------------
ControlNet         0.127024               0.899591           0.310699
GR                 0.0202782              0.959487           0.309206
RenderThenCN-0.25  0.0832425              0.92785            0.315243
RenderThenCN-0.5   0.0360604              0.929483           0.310039
RenderThenGR-0     0.0147605              0.953705           0.310851
RenderThenGR-0.25  0.0138913              0.959041           0.301336
RenderThenGR-0.5   0.00714404             0.955827           0.296033
RenderThenGR-0.75  0.00489579             0.956097           0.294004
RenderThenGR-1     0.00148459             0.958688           0.290508
