In [1]:
from pathlib import Path
import open3d as o3d
import os

from pytorch_lightning import seed_everything

from src.dataset_utils import (
    get_singleview_data,
    get_multiview_data,
    get_voxel_data_json,
    get_image_transform_latent_model,
    get_pointcloud_data,
    get_mv_dm_data,
    get_sv_dm_data,
    get_sketch_data
)
from src.model_utils import Model
from src.mvdream_utils import load_mvdream_model
import argparse
from PIL import Image


Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [2]:
import os
os.environ["XFORMERS_DISABLED"] = "0"

In [3]:

def simplify_mesh(obj_path, target_num_faces=1000):
    mesh = o3d.io.read_triangle_mesh(obj_path)
    simplified_mesh = mesh.simplify_quadric_decimation(target_num_faces)
    o3d.io.write_triangle_mesh(obj_path, simplified_mesh)


def generate_3d_object(
    model,
    data,
    data_idx,
    images,
    low,
    ids,
    scale,
    diffusion_rescale_timestep,
    save_dir="examples",
    output_format="obj",
    target_num_faces=None,
    seed=42,
):
    # Set seed
    seed_everything(seed, workers=True)

    save_dir.mkdir(parents=True, exist_ok=True)
    model.set_inference_fusion_params(scale, diffusion_rescale_timestep)
    out = model.forward(images,low,data_idx)

    return out

   


In [4]:
model_name = 'ADSKAILab/WaLa-SV-1B'
images = ['examples/single_view/table.png']
output_dir = 'examples'
output_format = 'obj'
target_num_faces = None
scale = 1.8
seed = 42
diffusion_rescale_timestep = 5
resolution = 224


In [5]:
print(f"Loading model")

model = Model.from_pretrained(pretrained_model_name_or_path=model_name)
image_transform = get_image_transform_latent_model()

for image_path in images:
    print(f"Processing image: {image_path}")
    data = get_singleview_data(
        image_file=Path(image_path),
        image_transform=image_transform,
        device=model.device,
        image_over_white=False,
    )
    data_idx = data ['img_idx']
    images = data['images']
    low = data['low']
    ids = data['id']
    save_dir = Path(output_dir) / Path(image_path).stem

    model.set_inference_fusion_params(
        scale, diffusion_rescale_timestep
    )
    out = generate_3d_object(
            model,
            data,
            data_idx,
            images,
            low,
            ids,
            scale,
            diffusion_rescale_timestep,
            save_dir,
            output_format,
            target_num_faces,
            seed,
        )



        

Loading model
'DotDict' object has no attribute 'dataset_path'
'DotDict' object has no attribute 'low_avg'
'DotDict' object has no attribute 'low_avg'
Low avg used : None high value: 63


/opt/miniconda/envs/wala/lib/python3.10/site-packages/pytorch_lightning/utilities/migration/utils.py:55: The loaded checkpoint was produced with Lightning v2.3.3, which is newer than your current Lightning version: v2.1.0
Using cache found in /home/ray/.cache/torch/hub/facebookresearch_dinov2_main


cond_emb_dim: 1024
Input resolution: 224
Vocab size: N/A
'DotDict' object has no attribute 'use_multiple_views_inferences'
'DotDict' object has no attribute 'use_multiple_views_inferences'
'DotDict' object has no attribute 'use_multiple_views_inferences'
'DotDict' object has no attribute 'use_multiple_views_inferences'


Seed set to 42


Processing image: examples/single_view/table.png
'DotDict' object has no attribute 'use_multiple_views_inferences'
'DotDict' object has no attribute 'use_multiple_views_inferences'


  0%|          | 0/5 [00:00<?, ?it/s]

In [12]:
low, highs = out
print("low shape:", low.shape)
if isinstance(highs, (list, tuple)):
    for i, h in enumerate(highs):
        print(f"highs[{i}] shape:", h.shape)
else:
    print("highs shape:", highs.shape)

low shape: torch.Size([1, 1, 46, 46, 46])
highs[0] shape: torch.Size([1, 1, 7, 136, 136, 136])
highs[1] shape: torch.Size([1, 1, 7, 76, 76, 76])
highs[2] shape: torch.Size([1, 1, 7, 46, 46, 46])


In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
from src.diffusion_modules.dwt import DWTInverse3d


# Example parameter values (replace with your actual values). BUGGED NEED FIX AND INVESTIGATION INTO REVERSING DWT
max_depth = 3
wavelet = 'haar'
padding_mode = 'zero'
resolution = 224  # Set your grid resolution

from src.diffusion_modules.dwt import DWTInverse3d

dwt_inverse_3d = DWTInverse3d(
    J=max_depth, wave=wavelet, mode=padding_mode
)

# Order highs so that the first element matches low's shape
highs_ordered = sorted(highs, key=lambda h: h.shape[-1])
sdf_recon = dwt_inverse_3d((low, highs_ordered))


vertices, triangles = mcubes.marching_cubes(
    sdf_recon.cpu().detach().numpy()[0, 0], 0.0
)
vertices = (vertices / resolution) * 2.0 - 1.0
triangles = triangles[:, ::-1]

# Plot the mesh
fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(111, projection='3d')
mesh = Poly3DCollection(vertices[triangles], alpha=0.7)
mesh.set_facecolor([0.5, 0.5, 1, 0.7])
ax.add_collection3d(mesh)
ax.set_xlim(-1, 1)
ax.set_ylim(-1, 1)
ax.set_zlim(-1, 1)
plt.show()


RuntimeError: The size of tensor a (46) must match the size of tensor b (136) at non-singleton dimension 4

In [None]:
assert False
for image_path in images:
    print(f"Processing image: {image_path}")
    data = get_singleview_data(
        image_file=Path(image_path),
        image_transform=image_transform,
        device=model.device,
        image_over_white=False,
    )
    data_idx = data ['img_idx']
    images = data['images']
    low = data['low']
    ids = data['id']
    save_dir = Path(output_dir) / Path(image_path).stem

    model.set_inference_fusion_params(
        scale, diffusion_rescale_timestep
    )




    generate_3d_object(
        model,
        data,
        data_idx,
        images,
        low,
        ids,
        scale,
        diffusion_rescale_timestep,
        save_dir,
        output_format,
        target_num_faces,
        seed,
    )


    

AssertionError: 

### Forward Mod

In [None]:
file_name = data["id"][data_idx]
with torch.no_grad():
    low_pred, highs_pred = self.inference_sample(
        data, data_idx, return_wavelet_volume=False
    )



### ONNIX

In [2]:
import torchvision
print(torchvision.__version__)

0.16.2+cu121


In [6]:
import onnx
import torch

torch.onnx.export(model, (images, low,data_idx), "model.onnx")


'DotDict' object has no attribute 'use_multiple_views_inferences'
'DotDict' object has no attribute 'use_multiple_views_inferences'


  assert H % patch_H == 0, f"Input image height {H} is not a multiple of patch height {patch_H}"
  assert W % patch_W == 0, f"Input image width {W} is not a multiple of patch width: {patch_W}"
  if npatch == N and w == h:
  M = int(math.sqrt(N))  # Recover the number of patches in each dimension
  assert N == M * M
  assert (w0, h0) == patch_pos_embed.shape[-2:]


  0%|          | 0/5 [00:00<?, ?it/s]

  t = torch.tensor([i] * shape[0], device=device)
  assert t.shape == (B,)
  map_tensor = torch.tensor(self.timestep_map, device=ts.device, dtype=ts.dtype)
  res = torch.from_numpy(arr).to(device=timesteps.device)[timesteps].float()
  assert x_start.shape == x_t.shape
  assert (
  assert (
  unique = len(torch.unique(min_encoding_indices))
  assert x.shape[1] == self.channels
  coordinates = torch.from_numpy(coordinates).long()
  torch.from_numpy(
  assert highs_indices.size(0) == highs_values.size(0)


UnsupportedOperatorError: Exporting the operator 'aten::_upsample_bicubic2d_aa' to ONNX opset version 17 is not supported. Please feel free to request support or submit a pull request on PyTorch GitHub: https://github.com/pytorch/pytorch/issues.