In [1]:
%load_ext autoreload
%autoreload 2
%config ZMQInteractiveShell.cache_size = 0

# import all the necessary modules
from pathlib import Path
import random

from nerfstudio.models.nerfacto import NerfactoModelConfig
from nerfstudio.configs.base_config import ViewerConfig
from nerfstudio.configs.experiment_config import ExperimentConfig
from nerfstudio.pipelines.base_pipeline import VanillaPipelineConfig
from nerfstudio.data.datamanagers.base_datamanager import VanillaDataManagerConfig
from nerfstudio.data.dataparsers.nerfstudio_dataparser import NerfstudioDataParserConfig
from nerfstudio.cameras.camera_optimizers import CameraOptimizerConfig
from nerfstudio.engine.optimizers import AdamOptimizerConfig, RAdamOptimizerConfig
from nerfstudio.engine.trainer import TrainerConfig
import torch
import lovely_tensors as lt
lt.monkey_patch()

from utils import *

In [2]:
# Dice DATA
MODEL_CHECKPOINT_PATH = Path("/data/vision/polina/projects/wmh/dhollidt/documents/nerf/outputs/dice_256/nerfacto/2023-01-16_101826/nerfstudio_models")
MODEL_LOAD_STEP = 24000
DATA_PATH = Path("/data/vision/polina/projects/wmh/dhollidt/documents/nerf/data/dice_rand_v3")

# CLEVR DATA
# MODEL_CHECKPOINT_PATH = Path("/data/vision/polina/scratch/clintonw/datasets/nerfacto/0/nerfacto/2023-01-13_145424/nerfstudio_models")
# MODEL_LOAD_STEP = 29999
# DATA_PATH = Path("/data/vision/polina/scratch/clintonw/datasets/kubric/0/")

OUTPUT_DIR = Path("/data/vision/polina/projects/wmh/dhollidt/documents/nerf/playground")


trainConfig = TrainerConfig(
    method_name="nerfacto",
    experiment_name="/tmp",
    data=DATA_PATH,
    output_dir=OUTPUT_DIR,
    steps_per_eval_batch=500,
    steps_per_save=2000,
    max_num_iterations=30000,
    mixed_precision=True,
    pipeline=VanillaPipelineConfig(
        datamanager=VanillaDataManagerConfig(
            dataparser=NerfstudioDataParserConfig(),
            train_num_rays_per_batch=4096,
            eval_num_rays_per_batch=4096,
            camera_optimizer=CameraOptimizerConfig(
                mode="off", optimizer=AdamOptimizerConfig(lr=6e-4, eps=1e-8, weight_decay=1e-2)
            ),
        ),
        model=NerfactoModelConfig(eval_num_rays_per_chunk=1 << 15),
    ),
    optimizers={
        "proposal_networks": {
            "optimizer": AdamOptimizerConfig(lr=1e-2, eps=1e-15),
            "scheduler": None,
        },
        "fields": {
            "optimizer": AdamOptimizerConfig(lr=1e-2, eps=1e-15),
            "scheduler": None,
        },
    },
    viewer=ViewerConfig(num_rays_per_chunk=1 << 15),
    vis="wandb",
    load_dir=MODEL_CHECKPOINT_PATH,
    load_step=MODEL_LOAD_STEP
)

trainConfig.set_timestamp()
trainConfig.pipeline.datamanager.dataparser.data = trainConfig.data
trainConfig.save_config()

trainer = trainConfig.setup(local_rank=0, world_size=1)
trainer.setup()


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mdominik-hollidt[0m ([33mdhollidt[0m). Use [1m`wandb login --relogin`[0m to force relogin


Output()

Output()

In [3]:
pipeline = trainer.pipeline
model = pipeline.model

num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"The network has {num_params:,} trainable parameters.")




The network has 13,633,168 trainable parameters.


In [4]:
ray_bundle, batch = pipeline.datamanager.next_train(1)
ray_bundle_eval, batch_eval = pipeline.datamanager.next_eval(1)
print(ray_bundle.shape)
print(ray_bundle_eval.shape)

torch.Size([4096])
torch.Size([4096])


In [5]:
if model.collider is not None:
    ray_bundle = model.collider(ray_bundle)

ray_samples, weights_list, ray_samples_list = model.proposal_sampler(ray_bundle, density_fns=model.density_fns)
field_outputs = model.field(ray_samples, compute_normals=model.config.predict_normals)

print(f"{ray_bundle.origins.shape=}")
print(f"{ray_bundle.directions.shape=}")
print(f"{ray_samples.frustums.starts.shape=}")
print(f"{ray_samples.frustums.ends.shape=}")
print(f"{ray_samples.frustums.get_positions().shape=}")
print(f"{ray_samples.shape=}")

print(field_outputs[FieldHeadNames.RGB].shape)
print(field_outputs[FieldHeadNames.DENSITY].shape)
print(field_outputs.keys())

ray_bundle.origins.shape=torch.Size([4096, 3])
ray_bundle.directions.shape=torch.Size([4096, 3])
ray_samples.frustums.starts.shape=torch.Size([4096, 48, 1])
ray_samples.frustums.ends.shape=torch.Size([4096, 48, 1])
ray_samples.frustums.get_positions().shape=torch.Size([4096, 48, 3])
ray_samples.shape=torch.Size([4096, 48])
torch.Size([4096, 48, 3])
torch.Size([4096, 48, 1])
dict_keys([<FieldHeadNames.RGB: 'rgb'>, <FieldHeadNames.DENSITY: 'density'>])


torch.Size([4096, 3])
torch.Size([4096, 3])
torch.Size([4096, 48, 1])
torch.Size([4096, 48, 1])
torch.Size([4096, 48, 3])
torch.Size([4096, 48, 1])

In [6]:
try:
    import tinycudann as tcnn
except ImportError:
    # tinycudann module doesn't exist
    pass
from torch import Tensor, nn
from nerfstudio.models.base_model import Model, ModelConfig
from nerfstudio.models.nerfacto import NerfactoModel
from typing import cast
from nerfstudio.fields.nerfacto_field import get_normalized_directions
class FeatureGenerator(nn.Module):
    """Takes in a batch of b Ray bundles, samples s points along the ray. Then it outputs n x m x f matrix.
    Each row corresponds to one feature of a sampled point of the ray.

    Args:
        nn (_type_): _description_
    """

    def __init__(self, positional_encoding_dim=128, field_output_encoding=128):
        super().__init__()
        self.direction_encoding = tcnn.Encoding(
            n_input_dims=3,
            encoding_config={
                "otype": "SphericalHarmonics",
                "degree": 4,
            },
        )

        self.position_frustums_encoding = tcnn.Encoding(
            n_input_dims=3,
            encoding_config={"otype": "Frequency", "n_frequencies": 2},
        )

        self.mlp_merged_pos_encoding = tcnn.Network(
            n_input_dims=self.direction_encoding.n_output_dims + self.position_frustums_encoding.n_output_dims,
            n_output_dims=positional_encoding_dim,
            network_config={
                "otype": "FullyFusedMLP",
                "activation": "ReLU",
                "output_activation": "None",
                "n_neurons": 64,
                "n_hidden_layers": 4,
            },
        )

        # Tiny cudnn network for processing the field outputs of the samples b x s x (rgb + density = 4)
        self.mlp_field_output = tcnn.Network(
            n_input_dims=4,
            n_output_dims=field_output_encoding,
            network_config={
                "otype": "FullyFusedMLP",
                "activation": "ReLU",
                "output_activation": "None",
                "n_neurons": 64,
                "n_hidden_layers": 4,
            },
        )

    def forward(self, ray_bundle: RayBundle, model: Model) -> Tensor:

        if isinstance(model, NerfactoModel):
            model = cast(NerfactoModel, model)
            if model.collider is not None:
                ray_bundle = model.collider(ray_bundle)

            ray_samples, _, _ = model.proposal_sampler(ray_bundle, density_fns=model.density_fns)
            field_outputs = model.field(ray_samples, compute_normals=model.config.predict_normals)
        else:
            raise NotImplementedError("Only NerfactoModel is supported for now")


        # normalize field densitities:
        densities = field_outputs[FieldHeadNames.DENSITY]
        mean_vals = torch.mean(densities, dim=1, keepdim=True)
        std_vals = torch.std(densities, dim=1, keepdim=True)

        # Scale the values to have a mean of 0 and a standard deviation of 1
        normalized_densities = (densities - mean_vals) / std_vals

        field_outputs_stacked = torch.cat((field_outputs[FieldHeadNames.RGB], normalized_densities), dim=-1)
        print(f"{torch.isnan(field_outputs[FieldHeadNames.DENSITY]).sum()=}, \n{field_outputs[FieldHeadNames.DENSITY].shape=} ")
        print(f"{normalized_densities.max()=}, {normalized_densities.min()=}, {normalized_densities.mean()=}, {normalized_densities.std()=}")
        print(f"{torch.isnan(field_outputs_stacked).sum()=}")
        field_features = self.mlp_field_output(field_outputs_stacked.view(-1, 4))
        print(f"{torch.isnan(field_features).sum()=}")
        

        # Positional encoding of the Frustums
        positions_frustums = ray_samples.frustums.get_positions()
        positions_frustums_flat = self.position_frustums_encoding(positions_frustums.view(-1, 3))
        print(f"{torch.isnan(positions_frustums_flat).sum()=}")

        # Positional encoding of the ray
        directions = get_normalized_directions(ray_samples.frustums.directions)
        directions_flat = directions.view(-1, 3)
        d = self.direction_encoding(directions_flat)
        print(f"{torch.isnan(d).sum()=}")
        

        pos_encode = torch.cat([d, positions_frustums_flat], dim=1)
        
        pos_features = self.mlp_merged_pos_encoding(pos_encode)
        print(f"{torch.isnan(pos_features).sum()=}")
        
        
        features = torch.cat([pos_features, field_features], dim=1)
        features = features.view(ray_samples.shape[0], ray_samples.shape[1], -1)
        return features

In [6]:
fg = FeatureGenerator()

num_params = sum(p.numel() for p in fg.parameters() if p.requires_grad)
print(f"The network has {num_params:,} trainable parameters.")


NameError: name 'FeatureGenerator' is not defined

In [8]:
features = fg(ray_bundle, model)

torch.isnan(field_outputs[FieldHeadNames.DENSITY]).sum()=tensor(0, device='cuda:0'), 
field_outputs[FieldHeadNames.DENSITY].shape=torch.Size([256, 48, 1]) 
normalized_densities.max()=tensor(6.7839, device='cuda:0', grad_fn=<MaxBackward1>), normalized_densities.min()=tensor(-0.6007, device='cuda:0', grad_fn=<MinBackward1>), normalized_densities.mean()=tensor(-6.8297e-09, device='cuda:0', grad_fn=<MeanBackward0>), normalized_densities.std()=tensor(0.9896, device='cuda:0', grad_fn=<StdBackward0>)
torch.isnan(field_outputs_stacked).sum()=tensor(0, device='cuda:0')
torch.isnan(field_features).sum()=tensor(0, device='cuda:0')
torch.isnan(positions_frustums_flat).sum()=tensor(0, device='cuda:0')
torch.isnan(d).sum()=tensor(0, device='cuda:0')
torch.isnan(pos_features).sum()=tensor(0, device='cuda:0')


In [9]:
# count the nan in features
print(f"{torch.isnan(features).sum()=}")
print(f"{features.shape=}")

torch.isnan(features).sum()=tensor(0, device='cuda:0')
features.shape=torch.Size([256, 48, 256])


In [10]:
from torch import nn

class Block(nn.Module):
    def __init__(self, in_ch, out_ch):
        super().__init__()
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_ch, out_ch, 3, padding=1),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(),
            nn.Conv2d(out_ch, out_ch, 3, padding=1),
            nn.BatchNorm2d(out_ch),
            nn.ReLU()
        )
    
    def forward(self, x):
        return self.double_conv(x)

In [11]:
class Encoder(nn.Module):
    def __init__(self, chs=(1,64,128,256)):
        super().__init__()
        self.enc_blocks = nn.ModuleList([Block(chs[i], chs[i+1]) for i in range(len(chs)-1)])
        self.pool       = nn.MaxPool2d(2)
    
    def forward(self, x):
        ftrs = []
        for block in self.enc_blocks:
            x = block(x)
            ftrs.append(x)
            x = self.pool(x)
        return ftrs


In [12]:
import torchvision
class Decoder(nn.Module):
    def __init__(self, chs=(256, 128, 64, 1)):
        super().__init__()
        self.chs         = chs
        self.upconvs    = nn.ModuleList([nn.ConvTranspose2d(chs[i], chs[i+1], 2, 2) for i in range(len(chs)-1)])
        self.dec_blocks = nn.ModuleList([Block(chs[i], chs[i+1]) for i in range(len(chs)-1)]) 
        
    def forward(self, x, encoder_features):
        for i in range(len(self.chs)-1):
            x        = self.upconvs[i](x)
            enc_ftrs = self.crop(encoder_features[i], x)
            x        = torch.cat([x, enc_ftrs], dim=1)
            x        = self.dec_blocks[i](x)
        return x
    
    def crop(self, enc_ftrs, x):
        _, _, H, W = x.shape
        enc_ftrs   = torchvision.transforms.CenterCrop([H, W])(enc_ftrs)
        return enc_ftrs


In [41]:
import torch.nn.functional as F

class UNet(nn.Module):
    def __init__(self, enc_chs=(1, 16, 32, 64), dec_chs=(64, 32, 16), num_class=4):
        super().__init__()
        self.encoder            = Encoder(enc_chs)
        self.decoder            = Decoder(dec_chs)
        self.head               = nn.Conv2d(dec_chs[-1], num_class, 1)
        self.density_activation = nn.ReLU()

    def forward(self, x):
        x = x.unsqueeze(1)
        enc_ftrs = self.encoder(x)
        out      = self.decoder(enc_ftrs[::-1][0], enc_ftrs[::-1][1:])
        out      = self.head(out)
        
        # reduce the channel dimension
        out      = torch.mean(out, dim=-1)
        
        output = {}
        output[FieldHeadNames.RGB] = torch.sigmoid(out[:, :3, :].squeeze(1).permute(0, 2, 1))
        output[FieldHeadNames.DENSITY] = self.density_activation(out[:, 3:, :].permute(0, 2, 1))
        return output

In [42]:
unet = UNet()
unet = unet.half()
unet = unet.to("cuda")

num_params = sum(p.numel() for p in unet.parameters() if p.requires_grad)
print(f"The network has {num_params:,} trainable parameters.")

The network has 117,444 trainable parameters.


In [43]:
features.shape
f = features.unsqueeze(1)
f.shape

torch.Size([256, 1, 48, 256])

In [44]:
transformed_features = unet(features)
# print(f"{transformed_features.shape=}")
print(f"{transformed_features[FieldHeadNames.RGB].shape=}")
print(f"{transformed_features[FieldHeadNames.DENSITY].shape=}")

torch.Size([256, 16, 48, 256])
torch.Size([256, 4, 48, 256])
torch.Size([256, 4, 48])
out[:, 3:, :].permute(0, 2, 1)=tensor[256, 48, 1] f16 n=12288 x∈[-0.115, 0.072] μ=0.032 σ=0.019 grad PermuteBackward0 cuda:0
transformed_features[FieldHeadNames.RGB].shape=torch.Size([256, 48, 3])
transformed_features[FieldHeadNames.DENSITY].shape=torch.Size([256, 48, 1])


In [39]:
print(f"{transformed_features[FieldHeadNames.RGB]=}")

transformed_features[FieldHeadNames.RGB]=tensor[256, 48, 3] f16 n=36864 x∈[0.485, 0.646] μ=0.554 σ=0.020 grad SigmoidBackward0 cuda:0


In [40]:
print(f"{transformed_features[FieldHeadNames.DENSITY]=}")


transformed_features[FieldHeadNames.DENSITY]=tensor[256, 48, 1] f16 n=12288 x∈[0., 0.121] μ=0.011 σ=0.016 grad ReluBackward0 cuda:0


In [10]:
from nerfstudio.models.nesf import FeatureGeneratorTorch

fg = FeatureGeneratorTorch(model.scene_box.aabb).to("cuda")


In [23]:
features, weights, density_mask = fg(ray_bundle.to("cuda"), model.to("cuda"))
features = features[:1024].unsqueeze(0)
print(features)

tensor[1, 1024, 48] n=49152 x∈[-1.000, 1.000] μ=0.182 σ=0.489 grad UnsqueezeBackward0 cuda:0


In [21]:
from nerfstudio.models.nesf import TransformerModel

feature_transformer = TransformerModel(
            output_size=6,
            num_layers=2,
            d_model=fg.get_out_dim(),
            num_heads=4,
            dff=64,
            dropout_rate=0.1).to("cuda")


In [26]:
transformed_features = feature_transformer(features)
print(transformed_features.shape)

torch.Size([1, 1024, 6])


In [8]:
from torchinfo import summary

# summary(fg, input_data=[ray_bundle, model])
summary(fg.linear, input_size=((1 << 15)*48, 3), mode="train")

Layer (type:depth-idx)                   Output Shape              Param #
Sequential                               [1572864, 3]              --
├─Linear: 1-1                            [1572864, 128]            512
├─ReLU: 1-2                              [1572864, 128]            --
├─Linear: 1-3                            [1572864, 256]            33,024
├─ReLU: 1-4                              [1572864, 256]            --
├─Linear: 1-5                            [1572864, 256]            65,792
├─ReLU: 1-6                              [1572864, 256]            --
├─Linear: 1-7                            [1572864, 128]            32,896
├─ReLU: 1-8                              [1572864, 128]            --
├─Linear: 1-9                            [1572864, 3]              387
├─Sigmoid: 1-10                          [1572864, 3]              --
Total params: 132,611
Trainable params: 132,611
Non-trainable params: 0
Total mult-adds (G): 208.58
Input size (MB): 18.87
Forward/backward p

In [1]:
from nerfstudio.models.nesf import FeatureGeneratorTorch, UNet
from torchinfo import summary

unet = UNet().to("cuda")

summary(unet, input_size=(2048, 48, 32), mode="train")

Layer (type:depth-idx)                        Output Shape              Param #
UNet                                          [2048, 48, 1]             --
├─Encoder: 1-1                                [2048, 16, 48, 32]        --
│    └─ModuleList: 2-3                        --                        (recursive)
│    │    └─Block: 3-1                        [2048, 16, 48, 32]        2,544
│    └─MaxPool2d: 2-2                         [2048, 16, 24, 16]        --
│    └─ModuleList: 2-3                        --                        (recursive)
│    │    └─Block: 3-2                        [2048, 32, 24, 16]        14,016
│    └─MaxPool2d: 2-4                         [2048, 32, 12, 8]         --
├─Decoder: 1-2                                [2048, 16, 48, 32]        --
│    └─ModuleList: 2-5                        --                        --
│    │    └─ConvTranspose2d: 3-3              [2048, 16, 48, 32]        2,064
│    └─ModuleList: 2-6                        --                   