In [None]:
import torch
import torch.optim as optim
from model.FCGS_model import FCGS

def fidelity_loss(outputs, gt):
    # 定义fidelity loss
    return torch.nn.functional.mse_loss(outputs, gt)

def rate_loss():
    # 定义rate loss
    return torch.tensor(0.0)

def loss_fun(outputs, gt):
    return fidelity_loss(outputs, gt) + rate_loss()

model = FCGS()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

iterations = 1000
for i in range(iterations):
    model.train()
    inputs = None  # 这里需要加载输入数据
    gt = None  # 这里需要加载ground truth数据
    outputs = model(inputs)
    loss = loss_fun(outputs, gt)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if i % 100 == 0:
        print(f'Iteration {i}, Loss: {loss.item()}')

# 保存模型
torch.save(model.state_dict(), 'fcgs_model.pth')

In [1]:
# GDN
import torch
import torch.nn as nn

class GDN1D(nn.Module):
    def __init__(self, num_features, inverse=False, beta_min=1e-6, gamma_init=0.1):
        super(GDN1D, self).__init__()
        self.inverse = inverse
        self.beta_min = beta_min
        self.gamma_init = gamma_init
        # 可学习的参数
        self.beta = nn.Parameter(torch.ones(num_features))
        self.gamma = nn.Parameter(torch.eye(num_features) * gamma_init)

    def forward(self, x):
        # 确保 beta 不小于 beta_min
        beta = torch.max(self.beta, torch.tensor(self.beta_min, dtype=torch.float32, device=x.device))
        # 计算归一化因子
        norm_pool = torch.einsum('bi,ij->bj', x ** 2, self.gamma) + beta
        norm_pool = torch.sqrt(norm_pool)

        if self.inverse:
            output = x * norm_pool
        else:
            output = x / norm_pool

        return output

# 示例使用
if __name__ == "__main__":
    # 输入数据的特征数量
    num_features = 100
    # 创建 GDN 层
    gdn_layer = GDN1D(num_features)
    # 生成随机 1 维输入数据
    input_data = torch.randn(16, num_features)
    # 前向传播
    print(input_data.mean())
    output = gdn_layer(input_data)
    print("Input shape:", input_data.shape)
    print("Output shape:", output.shape)
    print(output.mean())

tensor(0.0196)
Input shape: torch.Size([16, 100])
Output shape: torch.Size([16, 100])
tensor(0.0167, grad_fn=<MeanBackward0>)


In [2]:
import torch
import torch.nn as nn

class MaskedConv1d(nn.Conv1d):
    r"""Masked 1D convolution implementation, mask future "unseen" pixels.
    Useful for building auto-regressive network components.

    Inherits the same arguments as a `nn.Conv1d`. Use `mask_type='A'` for the
    first layer (which also masks the "current pixel"), `mask_type='B'` for the
    following layers.
    """

    def __init__(self, *args, mask_type="A", **kwargs):
        super().__init__(*args, **kwargs)

        if mask_type not in ("A", "B"):
            raise ValueError(f'Invalid "mask_type" value "{mask_type}"')

        # 初始化掩码
        self.register_buffer("mask", torch.ones_like(self.weight.data))
        _, _, w = self.mask.size()

        # 根据掩码类型设置掩码
        center = w // 2
        if mask_type == "A":
            self.mask[:, :, center:] = 0
        else:
            self.mask[:, :, center + 1:] = 0

    def forward(self, x):
        # 应用掩码到权重上
        self.weight.data *= self.mask
        return super().forward(x)


# 示例使用
if __name__ == "__main__":
    # 输入通道数
    in_channels = 1
    # 输出通道数
    out_channels = 1
    # 卷积核大小
    kernel_size = 3
    # 步长
    stride = 1
    # 填充
    padding = 1
    # 输入序列长度
    sequence_length = 15

    # 创建 MaskedConv1d 层
    masked_conv1d_layer = MaskedConv1d(in_channels, out_channels, kernel_size, stride, padding, mask_type="A")

    # 生成随机输入数据
    input_data = torch.randn(in_channels, sequence_length)

    # 前向传播
    output = masked_conv1d_layer(input_data)

    
    print("Input shape:", input_data.shape)
    print("Output shape:", output.shape)

Input shape: torch.Size([1, 15])
Output shape: torch.Size([1, 15])


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import tinycudann as tcnn
from gaussian_renderer import GaussianModel
from model.encodings_cuda import STE_multistep, encoder_gaussian, decoder_gaussian, encoder_gaussian_chunk, decoder_gaussian_chunk, \
    encoder_gaussian_mixed, decoder_gaussian_mixed, encoder_gaussian_mixed_chunk, decoder_gaussian_mixed_chunk,\
    encoder_factorized, decoder_factorized, encoder_factorized_chunk, decoder_factorized_chunk
from model.entropy_models import Entropy_factorized


class FCGS_D(nn.Module):
    def __init__(self, args):
        super(FCGS_D, self).__init__()
        self.args = args
        self.Q = 1 # args.Q
        self.gof_size = 10 # args.gof_size
        self.gaussian_position_dim = 3
        self.gaussian_feature_dim = 56 # args.gaussian_feature_dim
        self.motion_dim = 256  # args.motion_dim # 256
        self.hidden_dim = 256 # args.hidden_dim # 256
        self.lat_dim = 256 # args.lat_dim # 256
        self.GDN = GDN1D
        self.init_test_gaussians()

        self.FeatureExtractor = nn.Sequential(
            nn.Linear(self.gaussian_feature_dim, self.hidden_dim),
            self.GDN(self.hidden_dim),
            nn.LeakyReLU(inplace=True),
            nn.Linear(self.hidden_dim, self.hidden_dim),
            self.GDN(self.hidden_dim),
            nn.LeakyReLU(inplace=True),
            nn.Linear(self.hidden_dim, self.lat_dim)
        )

        self.MotionEstimator = None

        self.MotionEncoder = nn.Sequential(
            nn.Linear(self.motion_dim, self.hidden_dim),
            self.GDN(self.hidden_dim),
            nn.LeakyReLU(inplace=True),
            nn.Linear(self.hidden_dim, self.hidden_dim),
            self.GDN(self.hidden_dim),
            nn.LeakyReLU(inplace=True),
            nn.Linear(self.hidden_dim, self.lat_dim),
        )

        self.MotionDecoder = nn.Sequential(
            nn.Linear(self.lat_dim, self.hidden_dim),
            self.GDN(self.hidden_dim),
            nn.LeakyReLU(inplace=True),
            nn.Linear(self.hidden_dim, self.hidden_dim),
            self.GDN(self.hidden_dim),
            nn.LeakyReLU(inplace=True),
            nn.Linear(self.hidden_dim, self.motion_dim),
        )

        self.MotionPriorEncoder = nn.Sequential(
            nn.Linear(self.lat_dim, self.hidden_dim),
            self.GDN(self.hidden_dim),
            nn.LeakyReLU(inplace=True),
            nn.Linear(self.hidden_dim, self.hidden_dim),
            self.GDN(self.hidden_dim),
            nn.LeakyReLU(inplace=True),
            nn.Linear(self.hidden_dim, self.lat_dim),
        )

        self.MotionPriorDecoder = nn.Sequential(
            nn.Linear(self.lat_dim, self.hidden_dim),
            self.GDN(self.hidden_dim),
            nn.LeakyReLU(inplace=True),
            nn.Linear(self.hidden_dim, self.hidden_dim),
            self.GDN(self.hidden_dim),
            nn.LeakyReLU(inplace=True),
            nn.Linear(self.hidden_dim, self.lat_dim),
        )

        self.AutoRegressiveMotion = nn.Sequential(
            MaskedConv1d(in_channels=1, out_channels=1, kernel_size=3, stride=1, padding=1, mask_type="A"),
            nn.LeakyReLU(inplace=True),
            MaskedConv1d(in_channels=1, out_channels=1, kernel_size=3, stride=1, padding=1, mask_type="A"),
            nn.LeakyReLU(inplace=True),
            MaskedConv1d(in_channels=1, out_channels=1, kernel_size=3, stride=1, padding=1, mask_type="A"),
            nn.LeakyReLU(inplace=True)
        )

        self.EntropyParametersMotion = nn.Sequential(
            nn.Linear(self.lat_dim * 2, self.hidden_dim),
            nn.LeakyReLU(inplace=True),
            nn.Linear(self.hidden_dim, self.hidden_dim),
            nn.LeakyReLU(inplace=True),
            nn.Linear(self.hidden_dim, 2)
        )

        self.EntropyFactorizedMotion = Entropy_factorized(self.lat_dim, self.Q)


    def quantize(self, x, Q=1, test_flag=False):
        if test_flag:
            x_q = STE_multistep.apply(x, Q)
        else:
            # add uniform noise to simulate quantization while training
            x_q = x + torch.empty_like(x).uniform_(-0.5, 0.5) * Q  
        return self.clamp(x_q, Q)
    
    def clamp(self, x, Q):
        x[torch.isnan(x)] = 0 
        x_mean = x.mean().detach()
        x_min = x_mean - 15_000 * Q
        x_max = x_mean + 15_000 * Q
        x = torch.clamp(x, min=x_min.detach(), max=x_max.detach())
        return x

    def MotionCompensation(self, lat_motion, pre_gaussians):
        ...

    def refresh_gaussians(self, pre_gaussian_path, cur_gaussian_path, sh_degree = 3):
        self.cur_gaussians = self.read_gaussian_file(cur_gaussian_path, sh_degree)
        self.pre_gaussians = self.read_gaussian_file(pre_gaussian_path, sh_degree)

    def read_gaussian_file(self, file_path, sh_degree = 3):
        with torch.no_grad():
            gaussians = GaussianModel(sh_degree)
            gaussians.load_ply(file_path)
        return gaussians
    
    def init_test_gaussians(self, sh_degree = 3):
        self.cur_gaussians = self.read_gaussian_file('/SDD_D/zwk/output/cook_spinach-3-ori/init_3dgs.ply')

    def compress(self):
        ...

    def decompress(self):
        ...
        
    def forward(self):
        est_motion = self.MotionEstimator() # I-NGP in 3DGStream / MLP in Deformable-GS / Hexplane in 4D-GS
        y_motion = self.MotionEncoder(est_motion) # N, latent_dim
        y_hat_motion = self.quantize(y_motion, Q=1, test_flag=False)
        ctx_params_motion = self.AutoRegressiveMotion(y_hat_motion) # N, latent_dim

        z_motion = self.MotionPriorEncoder(y_motion) # N, latent_dim
        z_hat_motion = self.quantize(z_motion, Q=1, test_flag=False)
        params_motion = self.MotionPriorDecoder(z_hat_motion) # N, latent_dim

        distribution_motion = self.EntropyParametersMotion(torch.cat((y_motion, z_motion), dim=1)) # N, 2
        mean_motion, std_motion = torch.chunk(distribution_motion, 2, dim=1)
        std_motion = F.softplus(std_motion) + 1e-6

        # TODO 精细化
        bits_motion = encoder_gaussian_chunk(y_hat_motion, mean_motion, std_motion, self.Q, 'motion.b')
        bits_prior_motion = encoder_factorized_chunk(z_hat_motion, self.EntropyFactorizedMotion._logits_cumulative, self.Q, 'motion_prior.b')

        return ctx_params_motion, params_motion
        

if __name__ == "__main__":
    args = None

    init_3dgs = '/SDD_D/zwk/output/cook_spinach-3-ori/init_3dgs.ply'
    ntc = '/SDD_D/zwk/output/cook_spinach-3-ori/NTCs/NTC_000000.pth'
    model = FCGS_D(args)
    # 查看模型结构
    # print(model)

    # ctx_params_motion, params_motion = model()
    # print(ctx_params_motion.shape, params_motion.shape)
    # print(ctx_params_motion.mean(), params_motion.mean())

FCGS_D(
  (cur_gaussians): GaussianModel()
  (FeatureExtractor): Sequential(
    (0): Linear(in_features=56, out_features=256, bias=True)
    (1): GDN1D()
    (2): LeakyReLU(negative_slope=0.01, inplace=True)
    (3): Linear(in_features=256, out_features=256, bias=True)
    (4): GDN1D()
    (5): LeakyReLU(negative_slope=0.01, inplace=True)
    (6): Linear(in_features=256, out_features=256, bias=True)
  )
  (MotionEncoder): Sequential(
    (0): Linear(in_features=256, out_features=256, bias=True)
    (1): GDN1D()
    (2): LeakyReLU(negative_slope=0.01, inplace=True)
    (3): Linear(in_features=256, out_features=256, bias=True)
    (4): GDN1D()
    (5): LeakyReLU(negative_slope=0.01, inplace=True)
    (6): Linear(in_features=256, out_features=256, bias=True)
  )
  (MotionDecoder): Sequential(
    (0): Linear(in_features=256, out_features=256, bias=True)
    (1): GDN1D()
    (2): LeakyReLU(negative_slope=0.01, inplace=True)
    (3): Linear(in_features=256, out_features=256, bias=True)
   

In [6]:
import torch
import numpy as np
import arithmetic
import os

chunk_size_cuda = 10000

def encoder_gaussian(x, mean, scale, Q, file_name='tmp.b'):
    # should be single dimension
    assert file_name.endswith('.b')
    assert len(x.shape) == 1
    if not isinstance(Q, torch.Tensor):
        Q = torch.tensor([Q], dtype=mean.dtype, device=mean.device).repeat(mean.shape[0])
    x_int_round = torch.round(x / Q)  # [100]
    max_value = x_int_round.max()
    min_value = x_int_round.min()

    lower = arithmetic.calculate_cdf(
        mean,
        scale,
        Q,
        min_value,
        max_value
    )

    print(lower)

    x_int_round_idx = (x_int_round - min_value).to(torch.int16)
    (byte_stream_torch, cnt_torch) = arithmetic.arithmetic_encode(
        x_int_round_idx,
        lower,
        chunk_size_cuda,
        int(lower.shape[0]),
        int(lower.shape[1])
    )
    cnt_bytes = cnt_torch.cpu().numpy().tobytes()
    byte_stream_bytes = byte_stream_torch.cpu().numpy().tobytes()
    len_cnt_bytes = len(cnt_bytes)
    with open(file_name, 'wb') as fout:
        fout.write(min_value.to(torch.float32).cpu().numpy().tobytes())
        fout.write(max_value.to(torch.float32).cpu().numpy().tobytes())
        fout.write(np.array([len_cnt_bytes]).astype(np.int32).tobytes())
        fout.write(cnt_bytes)
        fout.write(byte_stream_bytes)
    bit_len = (len(byte_stream_bytes) + len(cnt_bytes))*8 + 32 * 3
    return bit_len

if __name__ == "__main__":
    
    mean = torch.randn(100).cuda()
    scale = torch.randn(100).cuda()
    x = torch.randn(100).cuda()
    Q = 1
    bit_len = encoder_gaussian(x, mean, scale, Q)
    print(bit_len)



tensor([[7.5018e-03, 3.6340e-02, 1.2356e-01, 3.0157e-01, 5.4679e-01, 7.7488e-01,
         9.1811e-01, 9.7882e-01],
        [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 1.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         1.0000e+00, 1.0000e+00],
        [2.3675e-24, 1.3898e-14, 1.6869e-07, 4.7385e-03, 4.6516e-01, 9.9223e-01,
         1.0000e+00, 1.0000e+00],
        [2.9773e-36, 2.1199e-21, 1.1863e-10, 5.9014e-04, 4.3953e-01, 9.9836e-01,
         1.0000e+00, 1.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00,
         1.0000e+00, 1.0000e+00],
        [6.0210e-39, 1.0779e-21, 1.0287e-09, 6.4013e-03, 8.4490e-01, 1.0000e+00,
         1.0000e+00, 1.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         1.0000e+00, 1.0000e+00],
        [0.0000e+00, 0.0000e+00, 2.2384e-41, 1.2593e-21, 1.5149e-08, 5.4975e-02,

In [9]:
# 读取pkl文件，修改参数
import torch
import torch.nn as nn
import pickle

def modify_and_save_pkl(input_file_path, output_file_path, name_mapping):
    """
    该函数用于加载一个pkl文件，修改其中参数的名称，并将修改后的数据保存为新的pkl文件。

    :param input_file_path: 输入pkl文件的路径
    :param output_file_path: 输出pkl文件的路径
    :param name_mapping: 一个字典，用于指定参数名称的映射关系，键为原名称，值为新名称
    """
    try:
        # 打开输入的pkl文件并加载数据
        with open(input_file_path, 'rb') as f:
            data = pickle.load(f)

        # 如果加载的数据是字典类型
        if isinstance(data, dict):
            new_data = {}
            for key, value in data.items():
                # 根据名称映射关系修改键名
                new_key = name_mapping.get(key, key)
                new_data[new_key] = value
        else:
            # 若数据不是字典类型，直接使用原数据
            new_data = data

        # 打开输出的pkl文件并保存修改后的数据
        with open(output_file_path, 'wb') as f:
            pickle.dump(new_data, f)

        print(f"参数名称修改完成，新文件已保存到 {output_file_path}")
    except FileNotFoundError:
        print(f"未找到输入文件: {input_file_path}")
    except Exception as e:
        print(f"处理文件时出现错误: {e}")

pkl_path = 'checkpoints/checkpoint_0.0001.pkl'

# 读取pkl文件
state_dict = torch.load(pkl_path, map_location='cpu')

# 查看state_dict的key
print(state_dict.keys())

# 示例使用
input_file = pkl_path
output_file = 'output.pkl'
# 定义参数名称的映射关系
name_mapping = {
    'old_name_1': 'new_name_1',
    'old_name_2': 'new_name_2'
}

# modify_and_save_pkl(input_file, output_file, name_mapping)


odict_keys(['ad_fe', 'ad_op', 'ad_sc', 'ad_ro', 'Encoder_mask.0.weight', 'Encoder_mask.0.bias', 'Encoder_mask.2.weight', 'Encoder_mask.2.bias', 'Encoder_mask.4.weight', 'Encoder_mask.4.bias', 'Encoder_fea.0.weight', 'Encoder_fea.0.bias', 'Encoder_fea.2.weight', 'Encoder_fea.2.bias', 'Encoder_fea.4.weight', 'Encoder_fea.4.bias', 'Encoder_fea.6.weight', 'Encoder_fea.6.bias', 'Decoder_fea.0.weight', 'Decoder_fea.0.bias', 'Decoder_fea.2.weight', 'Decoder_fea.2.bias', 'Decoder_fea.4.weight', 'Decoder_fea.4.bias', 'Decoder_fea.6.weight', 'Decoder_fea.6.bias', 'head_f_dc.0.weight', 'head_f_dc.0.bias', 'head_f_dc.2.weight', 'head_f_dc.2.bias', 'head_f_rst.0.weight', 'head_f_rst.0.bias', 'head_f_rst.2.weight', 'head_f_rst.2.bias', 'latdim_2_griddim_fea.0.weight', 'latdim_2_griddim_fea.0.bias', 'context_analyzer_fea.0.weight', 'context_analyzer_fea.0.bias', 'context_analyzer_fea.2.weight', 'context_analyzer_fea.2.bias', 'context_analyzer_fea.4.weight', 'context_analyzer_fea.4.bias', 'context_ana

tensor([-0.2199,  0.0228, -0.2152, -0.1741, -0.0614, -0.0755, -0.1351, -0.0658,
        -0.1244, -0.4855, -0.1434, -0.1962, -0.0422, -0.4207, -0.0133, -0.1403,
        -0.2316, -0.2603, -0.1911, -0.0319, -0.0466, -0.0643,  0.0294, -0.0397,
        -0.2160,  0.0190, -0.1236,  0.0410,  0.1044, -0.0074, -0.1154, -0.0603,
        -0.0627, -0.0903, -0.1829, -0.4057, -0.2135,  0.0430,  0.0401, -0.3041,
         0.0220, -0.2577, -0.1143, -0.3101, -0.1599, -0.3278, -0.0194, -0.2106,
        -0.1497, -0.1500,  0.1130, -0.0709, -0.2816, -0.1030,  0.0338, -0.1919,
        -0.5007, -0.0663, -0.0362, -0.2168, -0.1294,  0.0091, -0.0842, -0.0747,
        -0.1157, -0.1401,  0.0516, -0.1424, -0.0477, -0.0292,  0.0224, -0.1151,
         0.0307, -0.0398, -0.3047,  0.0506, -0.0277, -0.3021, -0.0702, -0.5035,
        -0.0821,  0.0174, -0.3068,  0.0021, -0.2357, -0.1727, -0.1807, -0.1134,
         0.0648, -0.1752,  0.0254, -0.1737, -0.0982, -0.1247, -0.1661, -0.0044,
         0.1510, -0.1707,  0.1030, -0.08