In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import torch


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [6]:
from diffusers import UNet3DConditionModel
from diffusion import DiffusionModel3D

# Create a 3D model
model = UNet3DConditionModel(
    sample_size=32,  # the target resolution (assuming 32x32x32 voxels)
    in_channels=1,  # number of input channels, 1 for voxel data
    out_channels=1,  # number of output channels
    layers_per_block=2,  # how many ResNet layers to use per UNet block
    block_out_channels=(64, 128, 256, 512),  # channel numbers for each block
    down_block_types=(
        "CrossAttnDownBlock3D",  # a downsampling block with cross-attention
        "CrossAttnDownBlock3D",
        "CrossAttnDownBlock3D",
        "DownBlock3D",  # a regular downsampling block without attention
    ),
    up_block_types=(
        "UpBlock3D",  # a regular upsampling block without attention
        "CrossAttnUpBlock3D",  # an upsampling block with cross-attention
        "CrossAttnUpBlock3D",
        "CrossAttnUpBlock3D",
    ),
    cross_attention_dim=512,  # dimension of the cross attention features
)
model.to(device)

diffusion_model = DiffusionModel3D(unet3d_model, num_timesteps=1000)
diffusion_model.to(device)

# Print model summary
print(diffusion_model)
print(f"Total parameters: {sum(p.numel() for p in diffusion_model.parameters())}")

DiffusionModel3D(
  (model): UNet3DConditionModel(
    (conv_in): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (time_proj): Timesteps()
    (time_embedding): TimestepEmbedding(
      (linear_1): Linear(in_features=64, out_features=256, bias=True)
      (act): SiLU()
      (linear_2): Linear(in_features=256, out_features=256, bias=True)
    )
    (transformer_in): TransformerTemporalModel(
      (norm): GroupNorm(32, 64, eps=1e-06, affine=True)
      (proj_in): Linear(in_features=64, out_features=512, bias=True)
      (transformer_blocks): ModuleList(
        (0): BasicTransformerBlock(
          (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
          (attn1): Attention(
            (to_q): Linear(in_features=512, out_features=512, bias=False)
            (to_k): Linear(in_features=512, out_features=512, bias=False)
            (to_v): Linear(in_features=512, out_features=512, bias=False)
            (to_out): ModuleList(
              (0): Linea

  self.dummy_encoder = nn.Parameter(torch.randn(1, 1, model.cross_attention_dim))


In [7]:
# Import your data loader (assumed to be defined in a separate file)
from data_loader import create_dataloader

# Create training and testing dataloaders
batch_size = 3  # Adjust as necessary for memory
data_dir = "./objaverse_processed/processed_models"  # Replace with actual data directory
train_dataloader, test_dataloader = create_dataloader(data_dir, batch_size=batch_size)

print(f"Train dataset size: {len(train_dataloader.dataset)}")
print(f"Test dataset size: {len(test_dataloader.dataset)}")

Loading data from ./objaverse_processed/processed_models...
Found 1000 files in ./objaverse_processed/processed_models
Train dataset size: 900
Test dataset size: 100


In [None]:
from training import train_diffusion_model

losses, test_losses = train_diffusion_model(diffusion_model, train_dataloader, test_dataloader, epochs=2, device=device)

Epoch 1/2 - Training:  14%|█▍        | 42/300 [43:25<4:24:04, 61.41s/it]

In [None]:
# Plot the training loss
import matplotlib.pyplot as plt
plt.plot(losses)
plt.title("Training Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.show()


In [14]:
from inference import DiffusionInference3D

inference = DiffusionInference3D(diffusion_model, diffusion_model.noise_scheduler, device=device)
samples = inference.sample(num_samples=4, image_size=(32, 32, 32))

inference.visualize_samples(samples, threshold=0.5)


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
