In [8]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
# Restart kernel after runnning this cell for the imports to take actions, not needed if using new v3 image
import sys
import torch

pyt_version_str=torch.__version__.split("+")[0].replace(".", "")
version_str="".join([
    f"py3{sys.version_info.minor}_cu",
    torch.version.cuda.replace(".",""),
    f"_pyt{pyt_version_str}"
])

!pip install -r requirements.txt
!pip install iopath
!pip install --no-deps --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html

print(f"pip install --no-deps --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html")

Defaulting to user installation because normal site-packages is not writeable
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Defaulting to user installation because normal site-packages is not writeable
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Defaulting to user installation because normal site-packages is not writeable
Looking in links: https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py310_cu117_pyt201/download.html
pip install --no-deps --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py310_cu117_pyt201/download.html


In [9]:
# Full imports
import torch
from blockgen.configs import VoxelConfig, DiffusionConfig
from blockgen.utils import create_model_and_trainer, create_dataloaders
from blockgen.inference import DiffusionInference3D
from scripts.generate import load_model_for_inference
from blockgen.utils.metrics import compute_metrics
from blockgen.utils.evaluate import evaluate_generation
from scripts.legolize import LegoConverter

In [10]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

Using device: cuda
GPU: NVIDIA A100-SXM4-40GB


In [11]:
# Configs for two_stage
shape_voxel_config = VoxelConfig(
        mode='two_stage',
        stage='shape',
        default_color=[0.5, 0.5, 0.5],
        alpha_weight=1.0,
        rgb_weight=1.0
    )

color_voxel_config = VoxelConfig(
        mode='two_stage',
        stage='color',
        default_color=[0.5, 0.5, 0.5],
        alpha_weight=1.0,
        rgb_weight=1.0
    )

shape_diffusion_config = DiffusionConfig(
        num_timesteps=1000,
        use_ema=True,  # Enable EMA
        ema_decay=0.9999,
        ema_update_after_step=0,
        ema_device=device,
        use_ddim=False, 
        seed=42 
    )

color_diffusion_config = DiffusionConfig(
        num_timesteps=1000,
        use_ema=True,  # Enable EMA
        ema_decay=0.9999,
        ema_update_after_step=0,
        ema_device=device,
        use_ddim=False, 
        seed=42 
    )

# Configs for Combined RGBA (Simple) 

combined_voxel_config_simple = VoxelConfig(
    mode='combined', 
    stage=None,  
    default_color=[0.5, 0.5, 0.5],
    alpha_weight=1.0,
    rgb_weight=1.0,
    use_simple_mse=True
)

combined_diffusion_config_simple = DiffusionConfig(
    num_timesteps=1000,
    use_ema=True,  # Enable EMA
    ema_decay=0.9999,
    ema_update_after_step=0,
    ema_device=device,
    use_ddim=False, 
    seed=42 
)

# Configs for Combined RGBA (Weighted) 

combined_voxel_config_weighted = VoxelConfig(
    mode='combined', 
    stage=None,  
    default_color=[0.5, 0.5, 0.5],
    alpha_weight=1.0,
    rgb_weight=1.0,
    use_simple_mse=False
)

combined_diffusion_config_weighted = DiffusionConfig(
    num_timesteps=1000,
    use_ema=True,  # Enable EMA
    ema_decay=0.9999,
    ema_update_after_step=0,
    ema_device=device,
    use_ddim=False, 
    seed=42 
)

## Ablation Continuation

### EMA vs No EMA

In [12]:
shape_model = load_model_for_inference(
        model_path="runs/experiment_two_stage/shape/best_model/model",
        voxel_config=shape_voxel_config,
        diffusion_config=shape_diffusion_config,
        device='cuda',
        ema=True
    )

color_model = load_model_for_inference(
        model_path="runs/experiment_two_stage/color/best_model/model",
        voxel_config=color_voxel_config,
        diffusion_config=color_diffusion_config,
        device='cuda',
        ema=True
    )

The number of input channels is: 1 The number of out channels is1


The config attributes {'decay': 0.9999, 'inv_gamma': 1.0, 'min_decay': 0.0, 'optimization_step': 120000, 'power': 0.6666666666666666, 'update_after_step': 0, 'use_ema_warmup': False} were passed to UNet3DConditionModel, but are not expected and will be ignored. Please verify your config.json configuration file.


Total parameters: 120096449
Loaded EMA model weights
The number of input channels is: 4 The number of out channels is3


The config attributes {'decay': 0.9999, 'inv_gamma': 1.0, 'min_decay': 0.0, 'optimization_step': 110000, 'power': 0.6666666666666666, 'update_after_step': 0, 'use_ema_warmup': False} were passed to UNet3DConditionModel, but are not expected and will be ignored. Please verify your config.json configuration file.


Total parameters: 120099331
Loaded EMA model weights


In [14]:
# Get baseline for ablation
test_data_dir = "evaluation_set_voxelized"
annotation_file = "evaluation_set/annotations.json"

# For two-stage model
metrics = evaluate_generation(
    model=shape_model,
    color_model=color_model,  # Required for two_stage
    model_type='two_stage',
    test_data_dir=test_data_dir,
    annotation_file=annotation_file,
    guidance_scale=20.0,
    color_guidance_scale=20.0,
    use_rotations=False,
    device='cuda'
)

print("\nEvaluation Results:")
print("-" * 50)
for metric_name, value in metrics.items():
    print(f"{metric_name:15s}: {value:.4f}")

Evaluating 8 samples


Evaluating samples:   0%|                                 | 0/8 [00:00<?, ?it/s]
[A

Stage 1: Generating shapes...

Stage 2: Adding colors...



Evaluating samples:  12%|███                     | 1/8 [04:58<34:49, 298.48s/it] 0.537

Stage 1: Generating shapes...

Stage 2: Adding colors...



Evaluating samples:  25%|██████                  | 2/8 [09:56<29:50, 298.43s/it] 0.537

Stage 1: Generating shapes...

Stage 2: Adding colors...



Evaluating samples:  38%|█████████               | 3/8 [14:55<24:51, 298.34s/it] 0.537

Stage 1: Generating shapes...

Stage 2: Adding colors...



Evaluating samples:  50%|████████████            | 4/8 [19:53<19:53, 298.35s/it] 0.476

Stage 1: Generating shapes...

Stage 2: Adding colors...



Evaluating samples:  62%|███████████████         | 5/8 [24:52<14:55, 298.47s/it] 0.476

Stage 1: Generating shapes...

Stage 2: Adding colors...



Evaluating samples:  75%|██████████████████      | 6/8 [29:50<09:57, 298.54s/it] 0.491

Stage 1: Generating shapes...

Stage 2: Adding colors...



Evaluating samples:  88%|█████████████████████   | 7/8 [34:49<04:58, 298.56s/it] 0.491

Stage 1: Generating shapes...

Stage 2: Adding colors...



Evaluating samples: 100%|████████████████████████| 8/8 [39:48<00:00, 298.51s/it] 0.491
Current Averages | iou: 0.039 | f1: 0.070 | color_score: 0.836 | combined_score: 0.491


Evaluation Results:
--------------------------------------------------
iou            : 0.0394
f1             : 0.0701
color_score    : 0.8358
combined_score : 0.4909





### rotation-aug sampling vs no rot aug sampl

In [None]:
# Get baseline for ablation
test_data_dir = "evaluation_set_voxelized"
annotation_file = "evaluation_set/annotations.json"

# For two-stage model
metrics = evaluate_generation(
    model=shape_model,
    color_model=color_model,  # Required for two_stage
    model_type='two_stage',
    test_data_dir=test_data_dir,
    annotation_file=annotation_file,
    guidance_scale=20.0,
    color_guidance_scale=20.0,
    use_rotations=True,
    use_mean_init=False,
    device='cuda'
)

print("\nEvaluation Results:")
print("-" * 50)
for metric_name, value in metrics.items():
    print(f"{metric_name:15s}: {value:.4f}")


Starting evaluation of 8 samples...
Model type: two_stage
Guidance scale: 20.0, Color guidance: 20.0
--------------------------------------------------


Overall Progress:   0%|                                                       | 0/8 [00:00<?, ?it/s]
[Arics | Waiting for first sample...

Stage 1: Generating shapes...

Stage 2: Adding colors...



Processing (1/8):  12%|█████▌                                      | 1/8 [14:51<1:44:01, 891.60s/it]

Stage 1: Generating shapes...

Stage 2: Adding colors...



Processing (2/8):  25%|███████████                                 | 2/8 [29:42<1:29:08, 891.48s/it]

Stage 1: Generating shapes...

Stage 2: Adding colors...



Processing (3/8):  38%|████████████████▌                           | 3/8 [44:34<1:14:16, 891.29s/it]

Stage 1: Generating shapes...

Stage 2: Adding colors...



Processing (4/8):  50%|███████████████████████                       | 4/8 [59:26<59:26, 891.62s/it]

Stage 1: Generating shapes...

Stage 2: Adding colors...



Processing (5/8):  62%|███████████████████████████▌                | 5/8 [1:14:19<44:36, 892.29s/it]

Stage 1: Generating shapes...

Stage 2: Adding colors...



Processing (6/8):  75%|█████████████████████████████████           | 6/8 [1:29:13<29:45, 892.76s/it]

Stage 1: Generating shapes...

Stage 2: Adding colors...



Processing (7/8):  88%|██████████████████████████████████████▌     | 7/8 [1:44:06<14:52, 892.97s/it]

Stage 1: Generating shapes...
