In [None]:
# test PSNR/SSIM/LPIPS
import os
dataset = 'dynerf'
scene_list = ['flame_steak', 'sear_steak-5', 'cut_roasted_beef', 'coffee_martini', 'flame_salmon_1', 'cook_spinach']
for scene in scene_list:
    for frame in range(1, 300):
        ply_path = f'/SSD2/chenzx/Projects/FCGS/output_gt/{dataset}/{scene}/frame{frame:06d}/point_cloud/iteration_150/point_cloud.ply'
        source_path = f'/SDD_D/zwk/data_dynamic/{dataset}/{scene}/frame{frame:06d}'
        save_path = f'/SSD2/chenzx/Projects/FCGS/output_gt/{dataset}/{scene}/frame{frame:06d}'

        script = f'python validate.py --use_first_as_test --ply_path {ply_path} --source_path {source_path} --gpu 1 --save_path {save_path} '
        os.system(script)

Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off]




Loading model from: /SSD2/chenzx/miniconda3/envs/fcgs/lib/python3.11/site-packages/lpips/weights/v0.1/vgg.pth


Rendering progress:   0%|          | 0/1 [00:00<?, ?it/s]

SSIM: 0.9302, L1: 0.0210, LPIPS: 0.1405, PSNR: 28.8961
save_path /SSD2/chenzx/Projects/FCGS/output_gt/dynerf/coffee_martini/frame000001


Rendering progress: 100%|██████████| 1/1 [00:00<00:00,  1.63it/s]


0

### 01 ply、GS、pth等的读取

In [1]:
import torch
import torch.nn as nn
import MinkowskiEngine as ME
import os
import sys
sys.path.append(os.path.join(os.getcwd(), 'D_DPCC'))
from D_DPCC.models.model_utils import *

class get_model(nn.Module):
    def __init__(self, channels=8):
        super(get_model, self).__init__()
        self.enc1 = DownsampleLayer(1, 16, 32, 3)
        self.enc2 = DownsampleLayer(32, 32, 64, 3)
        self.inter_prediction = inter_prediction(64, 64, 48)
        self.enc3 = DownsampleLayer(64, 64, 32, 3)
        self.enc4 = ME.MinkowskiConvolution(in_channels=32, out_channels=channels, kernel_size=3, stride=1, bias=True, dimension=3)

        self.dec1 = UpsampleLayer(channels, 64, 64, 3)
        self.dec2 = UpsampleLayer(64, 32, 32, 3)
        self.dec3 = UpsampleLayer(32, 16, 16, 3)

        self.BitEstimator = BitEstimator(channels, 3)
        self.MotionBitEstimator = BitEstimator(48, 3)
        self.crit = torch.nn.BCEWithLogitsLoss()

    def forward(self, f1, f2, device, epoch=99999):
        num_points = f2.C.size(0)

        ys1, ys2 = [f1, 0, 0, 0, 0], [f2, 0, 0, 0, 0]
        out2, out_cls2, target2, keep2 = [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]

        # feature extraction
        ys1[1] = self.enc1(ys1[0])
        ys1[2] = self.enc2(ys1[1])
        print(ys1[2])

        ys2[1] = self.enc1(ys2[0])
        ys2[2] = self.enc2(ys2[1])
        print(ys2[2])

        # inter prediction
        residual, predicted_point2, quant_motion = self.inter_prediction(ys1[2], ys2[2], stride=4)

        # residual compression
        quant_motion_F = quant_motion.F.unsqueeze(0)
        ys2[3] = self.enc3(residual)
        ys2[4] = self.enc4(ys2[3])
        quant_y = quant(ys2[4].F.unsqueeze(0), training=self.training)

        # bit rate calculation
        p = self.BitEstimator(quant_y+0.5) - self.BitEstimator(quant_y-0.5)
        bits = torch.sum(torch.clamp(-1.0 * torch.log(p + 1e-10) / math.log(2.0), 0, 50))
        motion_p = self.MotionBitEstimator(quant_motion_F+0.5) - self.MotionBitEstimator(quant_motion_F-0.5)
        motion_bits = torch.sum(torch.clamp(-1.0 * torch.log(motion_p + 1e-10) / math.log(2.0), 0, 50))
        factor = 0.95
        if self.training:
            motion_bits = factor * motion_bits
        bpp = (bits + motion_bits) / num_points

        # point cloud reconstruction
        y2_recon = ME.SparseTensor(quant_y.squeeze(0), coordinate_map_key=ys2[4].coordinate_map_key,
                                   coordinate_manager=ys2[4].coordinate_manager, device=ys2[4].device)

        out2[0], out_cls2[0], target2[0], keep2[0] = self.dec1(y2_recon, ys2[2], True, residual=predicted_point2)
        out2[1], out_cls2[1], target2[1], keep2[1] = self.dec2(out2[0], ys2[1], True, 1 if self.training else 1)
        out2[2], out_cls2[2], target2[2], keep2[2] = self.dec3(out2[1], ys2[0], True, 1 if self.training else 1)
        return ys2, out2, out_cls2, target2, keep2, bpp


if __name__ == '__main__':
    from D_DPCC.dataset_lossy import *
    import os

    torch.manual_seed(0)

    d_model = 32
    seq_len = 2000
    batch_size = 1
    num_heads = 4
    k_dim = 8

    tmp_dir = os.getcwd()

    feat1 = torch.randint(low=0, high=2, size=(seq_len, 1), dtype=torch.float32)
    # print(feat1)

    coord1 = [[2 * y + 0.6 for i in range(3)] for y in range(seq_len)]
    coord1 = torch.Tensor(coord1)
    print(coord1)

    coords1, feats1 = ME.utils.sparse_collate(coords=[coord1], feats=[feat1])
    input1 = ME.SparseTensor(coordinates=coords1, features=feats1)
    print(input1.C)


    feat2 = torch.randint(low=0, high=2, size=(seq_len, 1), dtype=torch.float32)

    coord2 = [[2 * y + 1 for i in range(3)] for y in range(seq_len)]
    coord2 = torch.Tensor(coord2)

    coords2, feats2 = ME.utils.sparse_collate(coords=[coord2], feats=[feat2])
    input2 = ME.SparseTensor(coordinates=coords2, features=feats2)

    model_test = get_model(channels=8)
    _, out2, _, _, _, _ = model_test(input1, input2, device='cpu')  # device='cpu' may error in unpooling
    output = out2[-1]
    print(output.C.shape)  # output.C is final output points. 16-channel .F makes no sense



Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.
tensor([[6.0000e-01, 6.0000e-01, 6.0000e-01],
        [2.6000e+00, 2.6000e+00, 2.6000e+00],
        [4.6000e+00, 4.6000e+00, 4.6000e+00],
        ...,
        [3.9946e+03, 3.9946e+03, 3.9946e+03],
        [3.9966e+03, 3.9966e+03, 3.9966e+03],
        [3.9986e+03, 3.9986e+03, 3.9986e+03]])
tensor([[   0,    0,    0,    0],
        [   0,    2,    2,    2],
        [   0,    4,    4,    4],
        ...,
        [   0, 3994, 3994, 3994],
        [   0, 3996, 3996, 3996],
        [   0, 3998, 3998, 3998]], dtype=torch.int32)
SparseTensor(
  coordinates=tensor([[   0,  948,  948,  948],
        [   0, 2692, 2692, 2692],
        [   0, 2696, 2696, 2696],
        ...,
        [   0, 1716, 1716, 1716],
        [   0, 1088, 1088, 1088],
        [   0, 3112, 3112, 3112]], dtype=torch.int32)
  features=tensor([[-0.0475, -0.0313,  



torch.Size([4000, 4])


In [2]:
import open3d as o3d  
import numpy as np
import torch
import MinkowskiEngine as ME
import os
import sys

sys.path.append(os.path.join(os.getcwd(), 'D_DPCC'))
from D_DPCC.models.model_utils import *
from D_DPCC.models.DDPCC_geo import get_model

torch.cuda.set_device(3)
torch.cuda.empty_cache()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 读取PLY文件  
pcd = o3d.io.read_point_cloud("/SSD2/chenzx/Projects/FCGS/output_gt/coffee_martini-4/frame000001/point_cloud/iteration_150/point_cloud.ply")
pcd1 = o3d.io.read_point_cloud("/SSD2/chenzx/Projects/FCGS/output_gt/coffee_martini-4/frame000002/point_cloud/iteration_150/point_cloud.ply")

p = np.asarray(pcd.points)
p1 = np.asarray(pcd1.points)

pc = torch.tensor(p[:, :3]).cuda()
xyz, point = pc, torch.ones_like(pc[:, :1])

pc1 = torch.tensor(p1[:, :3]).cuda()
xyz1, point1 = pc1, torch.ones_like(pc1[:, :1])

xyz, point, xyz1, point1 = xyz.to(torch.float32), point.to(torch.float32), xyz1.to(
                    torch.float32), point1.to(torch.float32)

print(xyz)

xyz, point = ME.utils.sparse_collate(coords=[xyz], feats=[point])
xyz1, point1 = ME.utils.sparse_collate(coords=[xyz1], feats=[point1])

print(xyz)

f1 = ME.SparseTensor(features=point, coordinates=xyz, device=device)
f2 = ME.SparseTensor(features=point1, coordinates=xyz1, device=device)


model = get_model(channels=8).to(device)
model.train()

# epoch = 0
# ys2, out2, out_cls2, targets2, keeps2, bpp = model(f1, f2, device, epoch)

# print('ys2:', ys2)
# print('out2:', out2)
# print('out_cls2:', out_cls2)
# print('targets2:', targets2)
# print('keeps2:', keeps2)



tensor([[-11.3219,  -6.9881,  19.2728],
        [  9.5368,  -3.9147,  17.4386],
        [  9.0782,  -3.8933,  17.5616],
        ...,
        [-40.4199,   6.2570,  75.2718],
        [-54.2136,  -4.0934,  93.0983],
        [ -3.6881,  -9.4607,  13.5995]], device='cuda:3')
tensor([[  0, -12,  -7,  19],
        [  0,   9,  -4,  17],
        [  0,   9,  -4,  17],
        ...,
        [  0, -41,   6,  75],
        [  0, -55,  -5,  93],
        [  0,  -4, -10,  13]], device='cuda:3', dtype=torch.int32)


get_model(
  (enc1): DownsampleLayer(
    (conv): MinkowskiConvolution(in=1, out=16, kernel_size=[3, 3, 3], stride=[1, 1, 1], dilation=[1, 1, 1])
    (down): MinkowskiConvolution(in=16, out=32, kernel_size=[2, 2, 2], stride=[2, 2, 2], dilation=[1, 1, 1])
    (block): Sequential(
      (0): InceptionResNet(
        (conv0_0): MinkowskiConvolution(in=32, out=8, kernel_size=[3, 3, 3], stride=[1, 1, 1], dilation=[1, 1, 1])
        (conv0_1): MinkowskiConvolution(in=8, out=16, kernel_size=[3, 3, 3], stride=[1, 1, 1], dilation=[1, 1, 1])
        (conv1_0): MinkowskiConvolution(in=32, out=8, kernel_size=[1, 1, 1], stride=[1, 1, 1], dilation=[1, 1, 1])
        (conv1_1): MinkowskiConvolution(in=8, out=8, kernel_size=[3, 3, 3], stride=[1, 1, 1], dilation=[1, 1, 1])
        (conv1_2): MinkowskiConvolution(in=8, out=16, kernel_size=[1, 1, 1], stride=[1, 1, 1], dilation=[1, 1, 1])
        (relu): MinkowskiReLU()
      )
      (1): InceptionResNet(
        (conv0_0): MinkowskiConvolution(in=32, out

In [7]:
from train_fcgsd import read_gaussian_file
import os

base_dir = '/SSD2/chenzx/Projects/FCGS/output_gt/cmu'
scene_list = os.listdir(base_dir)
for scene in scene_list:
    print(scene)
    for frame in range(1, 4):
        ply_path = os.path.join(base_dir, scene, f'frame{frame:06d}', 'point_cloud', 'iteration_150', 'point_cloud.ply')
        gs = read_gaussian_file(ply_path)
        print(gs._xyz.shape)
        print(frame)




piano1_0
torch.Size([87726, 3])
1
torch.Size([87726, 3])
2
torch.Size([87726, 3])
3
pose3_12
torch.Size([29431, 3])
1
torch.Size([39938, 3])
2
torch.Size([39938, 3])
3
ultimatum2_1
torch.Size([78343, 3])
1
torch.Size([78343, 3])
2
torch.Size([78343, 3])
3
ultimatum2_4
torch.Size([44494, 3])
1
torch.Size([44494, 3])
2
torch.Size([44494, 3])
3
pose4_2
torch.Size([20967, 3])
1
torch.Size([27184, 3])
2
torch.Size([27184, 3])
3
band1_1
torch.Size([84967, 3])
1
torch.Size([84967, 3])
2
torch.Size([84967, 3])
3
pose3_11
torch.Size([72467, 3])
1
torch.Size([72467, 3])
2
torch.Size([72467, 3])
3
pose3_8
torch.Size([36852, 3])
1
torch.Size([52198, 3])
2
torch.Size([52198, 3])
3
ultimatum2_5
torch.Size([26664, 3])
1
torch.Size([36609, 3])
2
torch.Size([36609, 3])
3
pose3_1
torch.Size([46091, 3])
1
torch.Size([66550, 3])
2
torch.Size([66550, 3])
3
haggling1_4
torch.Size([67953, 3])
1
torch.Size([67953, 3])
2
torch.Size([67953, 3])
3
band1_0
torch.Size([84040, 3])
1
torch.Size([84040, 3])
2
torch.S

In [6]:
# pth
import torch
# model_path = 'checkpoints/scanobjectnn-pointnext-s_best.pth'
# model = torch.load(model_path, map_location='cpu')
# print(model.keys())
# # print model and shape of parameters
# for k, v in model['model'].items():
#     print(k, v.shape if isinstance(v, torch.Tensor) else None)

model_path = '/SSD2/chenzx/Projects/FCGS/3DGStream-Res/dynerf/all_scenes-0.01-joint-stepping-0320-tmp/model.pth'

model = torch.load(model_path, map_location='cpu')
print(model)
# print model and shape of parameters
# for k, v in model['model'].items():
#     print(k, v.shape if isinstance(v, torch.Tensor) else None)



OrderedDict([('ResidualGenerator.0.weight', tensor([[ 0.1207, -0.0283,  0.0260,  ...,  0.0206,  0.1209, -0.0225],
        [ 0.0786,  0.0977,  0.1120,  ..., -0.0952, -0.0185,  0.0392],
        [-0.0100, -0.1402,  0.0972,  ..., -0.0906, -0.1300, -0.1351],
        ...,
        [-0.0441,  0.0551, -0.1312,  ...,  0.1016,  0.0305, -0.1122],
        [-0.0499, -0.0225,  0.1419,  ...,  0.0383,  0.0616,  0.1287],
        [-0.1359, -0.0636, -0.0734,  ...,  0.1279, -0.0300, -0.0170]])), ('ResidualGenerator.0.bias', tensor([ 0.0749,  0.1185, -0.0088, -0.1014,  0.0567,  0.1424,  0.1076,  0.0270,
        -0.1267,  0.0944, -0.1388, -0.1296,  0.0614, -0.1408,  0.1360,  0.1057,
        -0.0973,  0.0376,  0.0624, -0.1289, -0.0927,  0.0855, -0.0295,  0.0631,
         0.1203, -0.0535, -0.1427, -0.1302, -0.0697,  0.0504,  0.0295,  0.0583,
        -0.0372,  0.0429,  0.0999, -0.0255, -0.1202, -0.1231,  0.0500, -0.0876,
         0.0655, -0.0856, -0.0925,  0.0790, -0.0071, -0.0210, -0.1399,  0.1176,
         0.

In [7]:
# 读取pkl文件，修改参数
import torch
import torch.nn as nn
import pickle

def modify_and_save_pkl(input_file_path, output_file_path, name_mapping):
    """
    该函数用于加载一个pkl文件，修改其中参数的名称，并将修改后的数据保存为新的pkl文件。

    :param input_file_path: 输入pkl文件的路径
    :param output_file_path: 输出pkl文件的路径
    :param name_mapping: 一个字典，用于指定参数名称的映射关系，键为原名称，值为新名称
    """
    try:
        # 打开输入的pkl文件并加载数据
        with open(input_file_path, 'rb') as f:
            data = pickle.load(f)

        # 如果加载的数据是字典类型
        if isinstance(data, dict):
            new_data = {}
            for key, value in data.items():
                # 根据名称映射关系修改键名
                new_key = name_mapping.get(key, key)
                new_data[new_key] = value
        else:
            # 若数据不是字典类型，直接使用原数据
            new_data = data

        # 打开输出的pkl文件并保存修改后的数据
        with open(output_file_path, 'wb') as f:
            pickle.dump(new_data, f)

        print(f"参数名称修改完成，新文件已保存到 {output_file_path}")
    except FileNotFoundError:
        print(f"未找到输入文件: {input_file_path}")
    except Exception as e:
        print(f"处理文件时出现错误: {e}")

pkl_path = 'checkpoints/checkpoint_0.0001.pkl'

# 读取pkl文件
state_dict = torch.load(pkl_path, map_location='cpu')

# 查看state_dict的key
print(state_dict.keys())

# 示例使用
input_file = pkl_path
output_file = 'output.pkl'
# 定义参数名称的映射关系
name_mapping = {
    'old_name_1': 'new_name_1',
    'old_name_2': 'new_name_2'
}

# modify_and_save_pkl(input_file, output_file, name_mapping)


odict_keys(['ad_fe', 'ad_op', 'ad_sc', 'ad_ro', 'Encoder_mask.0.weight', 'Encoder_mask.0.bias', 'Encoder_mask.2.weight', 'Encoder_mask.2.bias', 'Encoder_mask.4.weight', 'Encoder_mask.4.bias', 'Encoder_fea.0.weight', 'Encoder_fea.0.bias', 'Encoder_fea.2.weight', 'Encoder_fea.2.bias', 'Encoder_fea.4.weight', 'Encoder_fea.4.bias', 'Encoder_fea.6.weight', 'Encoder_fea.6.bias', 'Decoder_fea.0.weight', 'Decoder_fea.0.bias', 'Decoder_fea.2.weight', 'Decoder_fea.2.bias', 'Decoder_fea.4.weight', 'Decoder_fea.4.bias', 'Decoder_fea.6.weight', 'Decoder_fea.6.bias', 'head_f_dc.0.weight', 'head_f_dc.0.bias', 'head_f_dc.2.weight', 'head_f_dc.2.bias', 'head_f_rst.0.weight', 'head_f_rst.0.bias', 'head_f_rst.2.weight', 'head_f_rst.2.bias', 'latdim_2_griddim_fea.0.weight', 'latdim_2_griddim_fea.0.bias', 'context_analyzer_fea.0.weight', 'context_analyzer_fea.0.bias', 'context_analyzer_fea.2.weight', 'context_analyzer_fea.2.bias', 'context_analyzer_fea.4.weight', 'context_analyzer_fea.4.bias', 'context_ana

### 02 基本模型尝试

In [None]:
# GDN
import torch
import torch.nn as nn

class GDN1D(nn.Module):
    def __init__(self, num_features, inverse=False, beta_min=1e-6, gamma_init=0.1):
        super(GDN1D, self).__init__()
        self.inverse = inverse
        self.beta_min = beta_min
        self.gamma_init = gamma_init
        # 可学习的参数
        self.beta = nn.Parameter(torch.ones(num_features))
        self.gamma = nn.Parameter(torch.eye(num_features) * gamma_init)

    def forward(self, x):
        # 确保 beta 不小于 beta_min
        beta = torch.max(self.beta, torch.tensor(self.beta_min, dtype=torch.float32, device=x.device))
        # 计算归一化因子
        norm_pool = torch.einsum('bi,ij->bj', x ** 2, self.gamma) + beta
        norm_pool = torch.sqrt(norm_pool)

        if self.inverse:
            output = x * norm_pool
        else:
            output = x / norm_pool

        return output

# 示例使用
if __name__ == "__main__":
    # 输入数据的特征数量
    num_features = 100
    # 创建 GDN 层
    gdn_layer = GDN1D(num_features)
    # 生成随机 1 维输入数据
    input_data = torch.randn(16, num_features)
    # 前向传播
    print(input_data.mean())
    output = gdn_layer(input_data)
    print("Input shape:", input_data.shape)
    print("Output shape:", output.shape)
    print(output.mean())

In [14]:
# MaskedConv1d

import torch
import torch.nn as nn

class MaskedConv1d(nn.Conv1d):
    r"""Masked 1D convolution implementation, mask future "unseen" pixels.
    Useful for building auto-regressive network components.

    Inherits the same arguments as a `nn.Conv1d`. Use `mask_type='A'` for the
    first layer (which also masks the "current pixel"), `mask_type='B'` for the
    following layers.
    """

    def __init__(self, *args, mask_type="A", **kwargs):
        super().__init__(*args, **kwargs)

        if mask_type not in ("A", "B"):
            raise ValueError(f'Invalid "mask_type" value "{mask_type}"')

        # 初始化掩码
        self.register_buffer("mask", torch.ones_like(self.weight.data))
        _, _, w = self.mask.size()

        # 根据掩码类型设置掩码
        center = w // 2
        if mask_type == "A":
            self.mask[:, :, center:] = 0
        else:
            self.mask[:, :, center + 1:] = 0

    def forward(self, x):
        # 应用掩码到权重上
        self.weight.data *= self.mask
        return super().forward(x)


# 示例使用
if __name__ == "__main__":
    # 输入通道数
    in_channels = 1
    # 输出通道数
    out_channels = 1
    # 卷积核大小
    kernel_size = 3
    # 步长
    stride = 1
    # 填充
    padding = 1
    # 输入序列长度
    sequence_length = 15

    # 创建 MaskedConv1d 层
    masked_conv1d_layer = MaskedConv1d(in_channels, out_channels, kernel_size, stride, padding, mask_type="A")

    # 生成随机输入数据
    input_data = torch.randn(3, in_channels, sequence_length)

    # 前向传播
    output = masked_conv1d_layer(input_data)

    
    print("Input shape:", input_data.shape)
    print("Output shape:", output.shape)

Input shape: torch.Size([3, 1, 15])
Output shape: torch.Size([3, 1, 15])


In [None]:
import torch
import numpy as np
import arithmetic
import os

chunk_size_cuda = 10000

def encoder_gaussian(x, mean, scale, Q, file_name='tmp.b'):
    # should be single dimension
    assert file_name.endswith('.b')
    assert len(x.shape) == 1
    if not isinstance(Q, torch.Tensor):
        Q = torch.tensor([Q], dtype=mean.dtype, device=mean.device).repeat(mean.shape[0])
    x_int_round = torch.round(x / Q)  # [100]
    max_value = x_int_round.max()
    min_value = x_int_round.min()

    lower = arithmetic.calculate_cdf(
        mean,
        scale,
        Q,
        min_value,
        max_value
    )

    print(lower)

    x_int_round_idx = (x_int_round - min_value).to(torch.int16)
    (byte_stream_torch, cnt_torch) = arithmetic.arithmetic_encode(
        x_int_round_idx,
        lower,
        chunk_size_cuda,
        int(lower.shape[0]),
        int(lower.shape[1])
    )
    cnt_bytes = cnt_torch.cpu().numpy().tobytes()
    byte_stream_bytes = byte_stream_torch.cpu().numpy().tobytes()
    len_cnt_bytes = len(cnt_bytes)
    with open(file_name, 'wb') as fout:
        fout.write(min_value.to(torch.float32).cpu().numpy().tobytes())
        fout.write(max_value.to(torch.float32).cpu().numpy().tobytes())
        fout.write(np.array([len_cnt_bytes]).astype(np.int32).tobytes())
        fout.write(cnt_bytes)
        fout.write(byte_stream_bytes)
    bit_len = (len(byte_stream_bytes) + len(cnt_bytes))*8 + 32 * 3
    return bit_len

if __name__ == "__main__":
    
    mean = torch.randn(100).cuda()
    scale = torch.randn(100).cuda()
    x = torch.randn(100).cuda()
    Q = 1
    bit_len = encoder_gaussian(x, mean, scale, Q)
    print(bit_len)



In [7]:
# Conv1d
import torch
import torch.nn as nn

# 定义输入数据
# 输入数据的形状为 (batch_size, in_channels, sequence_length)
batch_size = 16
in_channels = 3
sequence_length = 100
input_data = torch.randn(batch_size, in_channels, sequence_length)

# 定义 Conv1d 层
# 输入通道数为 3，输出通道数为 64，卷积核大小为 3
conv1d_layer = nn.Conv1d(in_channels=in_channels, out_channels=64, kernel_size=3)

# 进行卷积操作
output = conv1d_layer(input_data)

# 输出结果的形状
print(f"输入数据形状: {input_data.shape}")
print(f"输出数据形状: {output.shape}")

输入数据形状: torch.Size([16, 3, 100])
输出数据形状: torch.Size([16, 64, 98])


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
from einops import rearrange, repeat


# 定义一个简单的 Conv 层
class Conv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(Conv, self).__init__()
        self.Conv = nn.Sequential(
            nn.Conv1d(in_channels, out_channels, kernel_size=1),
            nn.BatchNorm1d(out_channels),
            nn.ReLU()
        )

    def forward(self, x):
        return self.Conv(x)


# 定义 PointTransformer 层
class PointTransformerLayer(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(PointTransformerLayer, self).__init__()
        self.query = Conv(in_channels, out_channels)
        self.key = Conv(in_channels, out_channels)
        self.value = Conv(in_channels, out_channels)
        self.out_proj = Conv(out_channels, out_channels)

    def forward(self, x, y=None):
        # x: [B, C, N]
        q = self.query(x)
        k = self.key(x)
        v = self.value(x)

        # 计算注意力分数
        attn_scores = torch.einsum('bcn,bcm->bnm', q, k) / math.sqrt(q.shape[1])
        attn_probs = F.softmax(attn_scores, dim=-1)

        # 加权求和
        out = torch.einsum('bnm,bcm->bcn', attn_probs, v)
        out = self.out_proj(out)
        out = out + x if x.shape == out.shape else out
        return out


# 定义简单的 PointTransformer 网络
class PointTransformer(nn.Module):
    def __init__(self, in_channels, hidden_channels, num_layers):
        super(PointTransformer, self).__init__()
        self.initial_conv = Conv(in_channels, hidden_channels)
        self.transformer_layers = nn.ModuleList([
            PointTransformerLayer(hidden_channels, hidden_channels)
            for _ in range(num_layers)
        ])
        self.final_conv = Conv(hidden_channels, 10)  # 这里假设最后输出一个标量

    def forward(self, x):
        x = self.initial_conv(x)
        for layer in self.transformer_layers:
            x = layer(x)
        x = self.final_conv(x)
        # x = torch.mean(x, dim=-1)  # 全局平均池化
        return x

# 测试脚本
if __name__ == "__main__":
    # 初始化模型
    in_channels = 3  # 假设输入点云特征维度为 3
    hidden_channels = 64
    num_layers = 2
    model = PointTransformer(in_channels, hidden_channels, num_layers)

    # 生成测试数据
    batch_size = 1
    num_points = 1000
    x = torch.randn(batch_size, in_channels, num_points)

    # 前向传播
    output = model(x)

    # 打印输出形状
    print(f"输入数据形状: {x.shape}")
    print(f"输出数据形状: {output.shape}")

输入数据形状: torch.Size([1, 3, 1000])
输出数据形状: torch.Size([1, 10, 1000])


In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from PointNeXt.openpoints.cpp.pointops.functions import pointops
class PointTransformerLayer(nn.Module):
    def __init__(self, in_planes, out_planes, share_planes=8, nsample=16):
        super().__init__()
        self.mid_planes = mid_planes = out_planes // 1
        self.out_planes = out_planes
        self.share_planes = share_planes
        self.nsample = nsample
        self.linear_q = nn.Linear(in_planes, mid_planes)
        self.linear_k = nn.Linear(in_planes, mid_planes)
        self.linear_v = nn.Linear(in_planes, out_planes)
        self.linear_p = nn.Sequential(nn.Linear(3, 3), 
                                      nn.BatchNorm1d(3), 
                                      nn.ReLU(inplace=True),
                                      nn.Linear(3, out_planes))
        self.linear_w = nn.Sequential(nn.BatchNorm1d(mid_planes), 
                                      nn.ReLU(inplace=True),
                                      nn.Linear(mid_planes, mid_planes // share_planes),
                                      nn.BatchNorm1d(mid_planes // share_planes), 
                                      nn.ReLU(inplace=True),
                                      nn.Linear(out_planes // share_planes, out_planes // share_planes))
        self.softmax = nn.Softmax(dim=1)

    def forward(self, p, x, o = 1) -> torch.Tensor:
        x_q, x_k, x_v = self.linear_q(x), self.linear_k(x), self.linear_v(x)  # (n, c)
        x_k = pointops.queryandgroup(self.nsample, p, p, x_k, None, o, o, use_xyz=True)  # (n, nsample, 3+c)
        x_v = pointops.queryandgroup(self.nsample, p, p, x_v, None, o, o, use_xyz=False)  # (n, nsample, c)
        p_r, x_k = x_k[:, :, 0:3], x_k[:, :, 3:]
        for i, layer in enumerate(self.linear_p): 
            p_r = layer(p_r.transpose(1, 2).contiguous()).transpose(1,2).contiguous() if i == 1 else layer(p_r)  # (n, nsample, c)
        w = x_k - x_q.unsqueeze(1) + p_r.view(p_r.shape[0], p_r.shape[1], self.out_planes // self.mid_planes, self.mid_planes).sum(2)  # (n, nsample, c)
        for i, layer in enumerate(self.linear_w): 
            w = layer(w.transpose(1, 2).contiguous()).transpose(1,2).contiguous() if i % 3 == 0 else layer(w)
        w = self.softmax(w)  # (n, nsample, c)
        n, nsample, c = x_v.shape
        s = self.share_planes
        x = ((x_v + p_r).view(n, nsample, s, c // s) * w.unsqueeze(2)).sum(1).view(n, c)
        return x


batch_size = 1
num_points = 1024
in_channels = 32
out_channels = 64

# 创建随机点云数据
points = torch.randn(batch_size * num_points, 3)  # 点云坐标
features = torch.randn(batch_size * num_points, in_channels)  # 点云特征

# 初始化transformer层
transformer = PointTransformerLayer(
    in_planes=in_channels,
    out_planes=out_channels,
    share_planes=8,
    nsample=16
)

# 前向传播
output = transformer(points, features, torch.tensor(1, dtype=torch.int32) )

# 打印输出形状
print(f"Input points shape: {points.shape}")
print(f"Input features shape: {features.shape}")
print(f"Features device: {features.device}")
print(f"Points device: {points.device}")
print(f"Output features shape: {output.shape}")

# 可视化一些统计信息
print(f"Output mean: {output.mean().item():.4f}")
print(f"Output std: {output.std().item():.4f}")

TypeError: knnquery_cuda(): incompatible function arguments. The following argument types are supported:
    1. (arg0: int, arg1: int, arg2: int, arg3: int, arg4: torch.Tensor, arg5: torch.Tensor, arg6: torch.Tensor, arg7: torch.Tensor) -> None

Invoked with: 1024, 16, tensor([[ 0.7096,  0.6279, -0.2932],
        [-0.3459,  0.0974, -0.2566],
        [-1.5184, -0.1631, -0.8768],
        ...,
        [-1.4346,  0.1771, -0.4979],
        [ 0.1357,  0.7864, -0.2208],
        [ 0.2962,  0.9287, -0.2482]]), tensor([[ 0.7096,  0.6279, -0.2932],
        [-0.3459,  0.0974, -0.2566],
        [-1.5184, -0.1631, -0.8768],
        ...,
        [-1.4346,  0.1771, -0.4979],
        [ 0.1357,  0.7864, -0.2208],
        [ 0.2962,  0.9287, -0.2482]]), tensor(1, dtype=torch.int32), tensor(1, dtype=torch.int32), tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:0', dtype=torch.int32), tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], device='cuda:0')

In [3]:
# tinycudann
import tinycudann as tcnn
import numpy as np
import torch
from model.grid_utils import normalize_xyz, _grid_creater, _grid_encoder, FreqEncoder

encoding_cofig = {
	"otype": "Frequency", 
	"n_frequencies": 4  
}

encoder = tcnn.Encoding(3, encoding_cofig)
input = torch.rand(6, 3).cuda()
output = encoder(input)
print(input)
print(output.shape)

encoder2 = FreqEncoder(3, 4)
output2 = encoder2(input)
print(output2.shape)


tensor([[0.8700, 0.4158, 0.1203],
        [0.4343, 0.6499, 0.9589],
        [0.2377, 0.6179, 0.2902],
        [0.1266, 0.2256, 0.4255],
        [0.0838, 0.7619, 0.6959],
        [0.1637, 0.5286, 0.9121]], device='cuda:0')
torch.Size([6, 24])
torch.Size([6, 27])


In [2]:
from model.subnet import DownsampleLayer
import torch

# test
model = DownsampleLayer(3, 16, 32, 3)
# print(model)
print(model(torch.randn(1, 3, 200000)).shape)

torch.Size([1, 32, 200000])


In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from pytorch3d.ops import knn_points

torch.cuda.set_device(1)

class PointNetSetAbstraction(nn.Module):
    def __init__(self, npoint, k, in_channel, mlp):
        """
        npoint: 采样点的数量，如果设置为 None 则保留所有点
        k: 每个采样点的邻域内点的个数
        in_channel: 输入特征通道数
        mlp: 列表，定义 MLP 每层的输出通道数 (注意：输入会拼接点坐标的差值)
        """
        super(PointNetSetAbstraction, self).__init__()
        self.npoint = npoint
        self.k = k
        layers = []
        last_channel = in_channel + 3  # 点坐标差分信息
        for out_channel in mlp:
            layers.append(nn.Conv2d(last_channel, out_channel, kernel_size=1))
            layers.append(nn.BatchNorm2d(out_channel))
            layers.append(nn.ReLU())
            last_channel = out_channel
        self.mlp = nn.Sequential(*layers)

    def forward(self, xyz, points):
        # xyz: (B, N, 3)
        # points: (B, N, D) 或者 None
        B, N, _ = xyz.size()
        if self.npoint is None or self.npoint >= N:
            new_xyz = xyz
        else:
            # 简单随机采样中心点
            indices = torch.randperm(N)[:self.npoint]
            new_xyz = xyz[:, indices, :]

        # 使用 PyTorch3D 的 knn_points 进行 k 近邻搜索
        # new_xyz: (B, npoint, 3)， xyz: (B, N, 3)
        knn_result = knn_points(new_xyz, xyz, K=self.k)
        idx = knn_result.idx  # (B, npoint, k)

        # 采集邻域点坐标，并计算相对中心点的差值
        grouped_xyz = torch.gather(
            xyz.unsqueeze(1).expand(-1, self.npoint, -1, -1),
            2,
            idx.unsqueeze(-1).expand(-1, -1, -1, 3)
        )  # (B, npoint, k, 3)
        grouped_xyz_diff = grouped_xyz - new_xyz.unsqueeze(2)

        if points is not None:
            # 采集邻域内的其他特征
            grouped_points = torch.gather(
                points.unsqueeze(1).expand(-1, self.npoint, -1, -1),
                2,
                idx.unsqueeze(-1).expand(-1, -1, -1, points.size(-1))
            )  # (B, npoint, k, D)
            new_points = torch.cat([grouped_xyz_diff, grouped_points], dim=-1)  # (B, npoint, k, 3+D)
        else:
            new_points = grouped_xyz_diff  # (B, npoint, k, 3)

        # 转换维度以适配 2D 卷积：(B, D, npoint, k)
        new_points = new_points.permute(0, 3, 1, 2)
        new_points = self.mlp(new_points)  # (B, mlp[-1], npoint, k)
        # 对邻域内的特征进行 max pooling
        new_points = torch.max(new_points, dim=3)[0]  # (B, mlp[-1], npoint)
        new_points = new_points.permute(0, 2, 1)  # (B, npoint, mlp[-1])
        return new_xyz, new_points

class PointNetPP(nn.Module):
    def __init__(self, num_classes=40):
        super(PointNetPP, self).__init__()
        # 第一层 SA，无初始特征
        self.sa1 = PointNetSetAbstraction(npoint=512, k=32, in_channel=0, mlp=[64, 64, 128])
        # 第二层 SA，接收上一层的特征
        self.sa2 = PointNetSetAbstraction(npoint=128, k=32, in_channel=128, mlp=[128, 128, 256])
        # 最后的全连接层用于分类
        self.fc1 = nn.Linear(256, 256)
        self.bn1 = nn.BatchNorm1d(256)
        self.dropout1 = nn.Dropout(0.4)
        self.fc2 = nn.Linear(256, num_classes)
    
    def forward(self, xyz):
        # xyz: (B, N, 3)
        B, N, _ = xyz.size()
        # 第一层 SA
        l1_xyz, l1_points = self.sa1(xyz, None)
        # 第二层 SA
        l2_xyz, l2_points = self.sa2(l1_xyz, l1_points)
        # 全局特征提取 (对所有点取最大值)
        # global_feat = torch.max(l2_points, dim=1)[0]
        # x = F.relu(self.bn1(self.fc1(global_feat)))
        # x = self.dropout1(x)
        # x = self.fc2(x)
        x = l2_points
        return x

# 测试代码
if __name__ == '__main__':
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = PointNetPP(num_classes=40).to(device)
    # 生成随机点云数据, B=16, N=1024, 3维坐标
    points = torch.rand(16, 100000, 3).to(device)
    outputs = model(points)
    print("输出 shape:", outputs.shape)  # 预期为 (16, 40)

输出 shape: torch.Size([16, 128, 256])


In [17]:
import torch
from pytorch3d.ops import sample_farthest_points
from pytorch3d.ops import knn_points, ball_query


# 生成一个随机的点云，形状为 (batch_size, num_points, 3)
batch_size = 1
num_points = 10
points = torch.randn(batch_size, num_points, 3)

# 定义要采样的点数
num_samples = 5

# 执行最远点采样
sampled_points, _ = sample_farthest_points(points, K=num_samples)

print("原始点云形状:", points)
print("采样后的点云形状:", sampled_points)

# feature = torch.randn(num_points, 7)
# feature = feature.to(points.device)
# # 计算每个采样点的特征
# sampled_features = feature[_, :]
# print("采样后的特征形状:", sampled_features.shape)

# use knn to find the nearest point

# 由采样点构建查询点
query_points = sampled_points
# 计算 k 最近邻
dis = knn_points(query_points, points, K=2)
# dis = ball_query(query_points, points, K=2)
print("最近邻点索引形状:", dis)
# # knn_result.idx 是最近邻点的索引
# nearest_points = points[knn_result.idx]  # (batch_size, num_samples, 3)
# print("最近邻点形状:", nearest_points.shape)
# # 计算每个采样点的特征
# sampled_features = feature[knn_result.idx.squeeze(1), :]
# print("采样后的特征形状:", sampled_features.shape)



原始点云形状: tensor([[[-1.1960,  1.1026,  0.8442],
         [ 1.8299, -0.9503, -1.0362],
         [ 0.8276,  0.1713,  0.1024],
         [ 1.2512, -1.5140, -1.0679],
         [ 0.5480, -1.3171,  1.3830],
         [ 0.1500,  1.4661, -0.9078],
         [-0.9683, -0.1326,  0.6626],
         [ 0.7243, -1.3832,  0.2012],
         [-0.5615,  0.0430,  0.1323],
         [ 0.1477,  0.9632,  0.2935]]])
采样后的点云形状: tensor([[[-1.1960,  1.1026,  0.8442],
         [ 1.8299, -0.9503, -1.0362],
         [ 0.5480, -1.3171,  1.3830],
         [ 0.1500,  1.4661, -0.9078],
         [ 0.8276,  0.1713,  0.1024]]])
最近邻点索引形状: KNN(dists=tensor([[[0.0000, 1.6104],
         [0.0000, 0.6536],
         [0.0000, 1.4320],
         [0.0000, 1.6960],
         [0.0000, 1.1260]]]), idx=tensor([[[0, 6],
         [1, 3],
         [4, 7],
         [5, 9],
         [2, 9]]]), knn=None)


In [4]:
import torch
import torch.nn.functional as F

# 创建一个 N*m 的张量
tensor = torch.tensor([[1.0, 2.0, 3.0],
                       [4.0, 5.0, 14.0],
                       [7.0, 8.0, 9.0]], dtype=torch.float32)

# 对张量每列应用 softmax 函数
softmax_tensor = F.softmax(tensor, dim=1)

print("原始张量:")
print(tensor)
print("应用 softmax 后的张量:")
print(softmax_tensor)

# normalize
import torch
import torch.nn.functional as F

# 创建一个 N*m 的张量
tensor = torch.tensor([[1.0, 2.0, 3.0],
                       [4.0, 5.0, 6.0],
                       [7.0, 8.0, 9.0]], dtype=torch.float32)

# 对张量每行应用 L2 归一化
normalized_tensor = F.normalize(tensor, p=1, dim=1)

print("原始张量:")
print(tensor)
print("应用 L2 归一化后的张量:")
print(normalized_tensor)

    

原始张量:
tensor([[ 1.,  2.,  3.],
        [ 4.,  5., 14.],
        [ 7.,  8.,  9.]])
应用 softmax 后的张量:
tensor([[9.0031e-02, 2.4473e-01, 6.6524e-01],
        [4.5392e-05, 1.2339e-04, 9.9983e-01],
        [9.0031e-02, 2.4473e-01, 6.6524e-01]])
原始张量:
tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])
应用 L2 归一化后的张量:
tensor([[0.1667, 0.3333, 0.5000],
        [0.2667, 0.3333, 0.4000],
        [0.2917, 0.3333, 0.3750]])


In [8]:
import torch

# 假设 N 为 5，这里可以根据实际情况修改
N = 1
a = torch.randn(N, 2)
b = torch.randn(N, 3)

# 对 a 进行维度扩展
a_expanded = a.unsqueeze(1)  # 形状变为 (N, 1, 7)
b_expanded = b.unsqueeze(-1)  # 形状变为 (N, 10, 1)

# 执行逐元素相乘
result = a_expanded * b_expanded

print("结果张量的形状:", result.shape)
print(a, b, result)

结果张量的形状: torch.Size([1, 3, 2])
tensor([[-0.4584,  1.8688]]) tensor([[ 0.2211,  0.7348, -0.1831]]) tensor([[[-0.1014,  0.4132],
         [-0.3368,  1.3732],
         [ 0.0839, -0.3421]]])


In [1]:
import torch

# 假设这是你的原始数据
g_xyz = torch.randn(10, 3)
g_fea = torch.randn(10, 56)

# 打乱数据
shuffled_indices = torch.randperm(g_xyz.size(0))
shuffled_g_xyz = g_xyz[shuffled_indices]
shuffled_g_fea = g_fea[shuffled_indices]

# 还原数据
# 创建一个与 shuffled_indices 大小相同的索引数组，用于记录原始位置
original_indices = torch.zeros_like(shuffled_indices)
original_indices[shuffled_indices] = torch.arange(shuffled_indices.size(0))

# 使用 original_indices 还原数据
restored_g_xyz = shuffled_g_xyz[original_indices]
restored_g_fea = shuffled_g_fea[original_indices]

# 验证还原后的数据是否与原始数据相同
print(torch.allclose(g_xyz, restored_g_xyz))
print(torch.allclose(g_fea, restored_g_fea))

True
True
