In [2]:
import numpy as np

def load_vertices(filepath):
    """
    从指定的文本文件中加载顶点数据。
    数据格式要求：每行包含以空格分隔的浮点数（例如：x y z w）。

    参数:
        filepath (str): 文件的完整路径。

    返回:
        numpy.ndarray: 包含顶点数据的二维 NumPy 数组。
                       例如，形状为 (N, 4)，其中 N 是顶点的数量。
    """
    try:
        # numpy.loadtxt 非常适合读取以空格（或其他分隔符）分隔的数值数据
        # 默认 dtype 是 float，默认 delimiter 是任何空白字符，正好符合您的需求
        vertices = np.loadtxt(filepath, dtype=np.float64)
        
        # 简单检查数据的形状
        if vertices.ndim != 2 or vertices.shape[1] != 4:
            print(f"警告: 读取的数据形状为 {vertices.shape}，可能不是预期的 (N, 4) 格式。")
        
        return vertices
    except FileNotFoundError:
        print(f"错误: 文件未找到 - {filepath}")
        return None
    except Exception as e:
        print(f"读取文件时发生错误: {e}")
        return None

# 定义文件路径
file_path = "C:\\Users\\31878\\Desktop\\standardVertex.txt"

# 调用函数加载顶点数据
vertex_data = load_vertices(file_path)

# 返回结果
if vertex_data is not None:
    print(f"成功加载 {vertex_data.shape[0]} 个顶点。")
    print(f"数据形状: {vertex_data.shape}")
    print("\n前几个顶点的数据:")
    print(vertex_data[:5]) # 打印前5行数据进行验证

# vertex_data 现在就是一个 NumPy 数组，包含了文件中的所有顶点数据

成功加载 14218 个顶点。
数据形状: (14218, 4)

前几个顶点的数据:
[[0.025637 1.416699 0.11155  1.      ]
 [0.021846 1.418134 0.112966 1.      ]
 [0.023548 1.422539 0.112968 1.      ]
 [0.026068 1.425363 0.112705 1.      ]
 [0.029768 1.41646  0.110247 1.      ]]


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# 默认的BN动量 (在TensorFlow中通常是衰减率，PyTorch中是动量，两者关系为 momentum = 1 - decay)
# PointNet的原始实现通常使用 0.9 或 0.99，所以这里使用 0.1 作为动量 (1 - 0.9)
DEFAULT_BN_MOMENTUM = 0.1 

class TokenGenerator(nn.Module):
    def __init__(self, in_dim=1024, num_tokens=77, token_dim=768):
        super().__init__()
        self.token_embeddings = nn.ModuleList([
            nn.Sequential(
                nn.Linear(in_dim, 512),
                nn.ReLU(),
                nn.Linear(512, token_dim)
            ) for _ in range(num_tokens)
        ])

    def forward(self, x):
        # x: (B, 1024)
        tokens = [mlp(x) for mlp in self.token_embeddings]  # list of (B, 768)
        return torch.stack(tokens, dim=1)  # (B, 77, 768)


class TNet(nn.Module):
    """
    Transform Net for PointNet. 
    Can be used for both input (K=3) and feature (K=64) transformation.
    """
    def __init__(self, K=3, bn_decay=None):
        super(TNet, self).__init__()
        self.K = K
        # 使用提供的bn_decay或默认值
        momentum = bn_decay if bn_decay is not None else DEFAULT_BN_MOMENTUM

        # 卷积层: Conv2D [1, K] (K=3 或 64), 卷积核大小 (1, K)
        # 输入维度: (B, C_in, N, F) -> (B, 1, N, K) for Conv2d
        self.conv1 = nn.Sequential(
            # [1, 3] 或 [1, 64] 卷积核, 步长 [1, 1], padding=(0, 0)
            # 对于 Conv2d，输入是 (B, 1, N, K)，kernel_size=(1, K) 意味着它覆盖整个特征维度 K。
            nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(1, K), stride=(1, 1), padding=(0, 0), bias=False),
            nn.BatchNorm2d(64, momentum=momentum),
            nn.ReLU(inplace=True)
        )
        
        # 1x1 卷积 (对于 N 个点独立)
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0), bias=False),
            nn.BatchNorm2d(128, momentum=momentum),
            nn.ReLU(inplace=True)
        )

        self.conv3 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=1024, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0), bias=False),
            nn.BatchNorm2d(1024, momentum=momentum),
            nn.ReLU(inplace=True)
        )

        # 全连接层 (应用于最大池化后的特征)
        self.fc1 = nn.Sequential(
            nn.Linear(1024, 512, bias=False),
            nn.BatchNorm1d(512, momentum=momentum),
            nn.ReLU(inplace=True)
        )
        self.fc2 = nn.Sequential(
            nn.Linear(512, 256, bias=False),
            nn.BatchNorm1d(256, momentum=momentum),
            nn.ReLU(inplace=True)
        )

        # 变换矩阵预测层: 256 -> K*K
        self.transform_fc = nn.Linear(256, K * K)

        # 初始化权重和偏置以确保初始变换矩阵接近单位矩阵 (Identity)
        nn.init.constant_(self.transform_fc.weight, 0)
        
        # 偏置初始化为单位矩阵的展平形式
        identity = torch.eye(K).flatten()
        # 确保偏置维度匹配
        if self.transform_fc.bias is not None and self.transform_fc.bias.numel() == K * K:
             self.transform_fc.bias.data.copy_(identity)
        else: # 如果 bias 是 False 或者维度不匹配，则忽略偏置初始化
            pass

    def forward(self, x):
        # x: (B, N, K) or (B, N, 3) 
        batch_size = x.size(0)
        num_point = x.size(1)
        K = self.K

        # 1. 适配 Conv2d: (B, N, K) -> (B, 1, N, K)
        # tf.expand_dims(point_cloud, -1) 得到 (B, N, K, 1)，然后 Conv2D [1, K]
        # 在 PyTorch 中，我们使用 (B, C_in, H, W) = (B, 1, N, K)
        # 我们将特征维度 K 视为 W，N 视为 H，并添加 C_in=1 通道。
        net = x.unsqueeze(1) # (B, N, K) -> (B, 1, N, K)

        # 2. 卷积层
        net = self.conv1(net) 
        # K=3: (B, 1, N, 3) -> Conv(1x3) -> (B, 64, N, 1)
        # K=64: (B, 1, N, 64) -> Conv(1x64) -> (B, 64, N, 1)
        net = self.conv2(net) # (B, 128, N, 1)
        net = self.conv3(net) # (B, 1024, N, 1)

        # 3. Max Pool: [num_point, 1]
        net = F.max_pool2d(net, kernel_size=(num_point, 1)) # (B, 1024, 1, 1)

        # 4. Reshape
        net = net.view(batch_size, -1) # (B, 1024)

        # 5. 全连接层
        net = self.fc1(net) # (B, 512)
        net = self.fc2(net) # (B, 256)

        # 6. 变换矩阵
        transform = self.transform_fc(net) # (B, K*K)
        transform = transform.view(batch_size, K, K) # (B, K, K)

        return transform

# 假设 PointNetCls 是主模型
class PointNetCls(nn.Module):
    """ Classification PointNet, input is BxNx3, output Bx40 """
    def __init__(self, num_classes=40, bn_decay=None,in_dim=1024, num_tokens=77, token_dim=768):
        super(PointNetCls, self).__init__()
        self.num_classes = num_classes
        # 使用提供的bn_decay或默认值
        momentum = bn_decay if bn_decay is not None else DEFAULT_BN_MOMENTUM
        
        # T-Nets
        self.input_transform_net = TNet(K=3, bn_decay=momentum)
        self.feature_transform_net = TNet(K=64, bn_decay=momentum) # K=64 for feature space

        # Shared MLP (Convolutional Layers)
        # 1. 64 out
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 0), bias=False),
            nn.BatchNorm2d(64, momentum=momentum),
            nn.ReLU(inplace=True)
        )
        
        # 2. 64 out (1x1)
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0), bias=False),
            nn.BatchNorm2d(64, momentum=momentum),
            nn.ReLU(inplace=True)
        )
        
        # 3. 64 out (1x1) - After Feature T-Net
        self.conv3 = nn.Sequential(
            # 注意: 尽管特征变换后维度仍是 64，但这里使用 nn.Conv1d 
            # 因为经过特征变换后的 net_transformed 是 (B, N, 64) -> (B, 64, N) 
            # 原始代码是 Conv2D [1, 1] on (B, N, 1, 64)
            # 我们将使用 Conv1d on (B, 64, N)
            nn.Conv1d(in_channels=64, out_channels=64, kernel_size=1, bias=False),
            nn.BatchNorm1d(64, momentum=momentum),
            nn.ReLU(inplace=True)
        )

        # 4. 128 out (1x1)
        self.conv4 = nn.Sequential(
            nn.Conv1d(in_channels=64, out_channels=128, kernel_size=1, bias=False),
            nn.BatchNorm1d(128, momentum=momentum),
            nn.ReLU(inplace=True)
        )
        
        # 5. 1024 out (1x1) - Global Feature Vector
        self.conv5 = nn.Sequential(
            nn.Conv1d(in_channels=128, out_channels=1024, kernel_size=1, bias=False),
            nn.BatchNorm1d(1024, momentum=momentum),
            nn.ReLU(inplace=True)
        )
        
        # Classification MLP
        self.fc1 = nn.Sequential(
            nn.Linear(1024, 512, bias=False),
            nn.BatchNorm1d(512, momentum=momentum),
            nn.ReLU(inplace=True)
        )
        self.dropout1 = nn.Dropout(p=0.3) # keep_prob=0.7 -> dropout_prob=0.3

        self.fc2 = nn.Sequential(
            nn.Linear(512, 256, bias=False),
            nn.BatchNorm1d(256, momentum=momentum),
            nn.ReLU(inplace=True)
        )
        self.dropout2 = nn.Dropout(p=0.3) # keep_prob=0.7 -> dropout_prob=0.3

        self.fc3 = nn.Linear(256, num_classes) # No activation_fn

        self.tokenGenerator=TokenGenerator(in_dim=in_dim, num_tokens=num_tokens, token_dim=token_dim)


    def forward(self, point_cloud):
        # point_cloud: (B, N, 3)
        batch_size = point_cloud.size(0)
        num_point = point_cloud.size(1)
        
        # ------------------- Input T-Net -------------------
        # 1. Input Transformation: BxNx3 -> Bx3x3
        transform = self.input_transform_net(point_cloud) # (B, 3, 3)

        # 2. Apply Transformation
        # point_cloud (B, N, 3) x transform (B, 3, 3) -> (B, N, 3)
        point_cloud_transformed = torch.bmm(point_cloud, transform) # BxNx3

        # 3. Reshape for Conv2D: (B, N, 3) -> (B, 1, N, 3)
        input_image = point_cloud_transformed.unsqueeze(1) # (B, 1, N, 3)

        # 4. Shared MLP: Conv2D [1, 3] then Conv2D [1, 1]
        # (B, 1, N, 3) -> (B, 64, N, 1)
        net = self.conv1(input_image) 
        # (B, 64, N, 1) -> (B, 64, N, 1)
        net = self.conv2(net)
        
        # 5. Reshape for Feature T-Net and next Conv1D: (B, 64, N)
        net_feature = net.squeeze(-1) # (B, 64, N)
        # Note: Original TF code works on (B, N, 1, 64), here we have (B, 64, N)
        # We need to reshape to (B, N, 64) for matmul with T-Net output (B, 64, 64)
        net_to_transform = net_feature.transpose(2, 1) # (B, 64, N) -> (B, N, 64)
        
        # ------------------- Feature T-Net -------------------
        # 6. Feature Transformation: BxNx64 -> Bx64x64
        feature_transform = self.feature_transform_net(net_to_transform) # (B, 64, 64)

        # 7. Apply Transformation
        # net_to_transform (B, N, 64) x feature_transform (B, 64, 64) -> (B, N, 64)
        net_transformed = torch.bmm(net_to_transform, feature_transform) # BxNx64

        # 8. Reshape for next Conv1D (B, N, 64) -> (B, 64, N)
        net = net_transformed.transpose(2, 1) # (B, 64, N)

        # 9. Shared MLP: Conv1D [1]
        net = self.conv3(net) # (B, 64, N) -> (B, 64, N)
        net = self.conv4(net) # (B, 64, N) -> (B, 128, N)
        net = self.conv5(net) # (B, 128, N) -> (B, 1024, N)

        # 10. Symmetric Function: Max Pooling (Global Feature)
        # tf_util.max_pool2d(net, [num_point,1]) 
        # In PyTorch, we pool over the N dimension (dim=2)
        global_feature = F.max_pool1d(net, kernel_size=num_point) # (B, 1024, 1)
        
        # 11. Reshape
        net = global_feature.view(batch_size, -1) # (B, 1024)

        net = self.tokenGenerator(net)

        # 12. Classification MLP
        # net = self.fc1(net) # (B, 512)
        # net = self.dropout1(net)
        
        # net = self.fc2(net) # (B, 256)
        # net = self.dropout2(net)
        
        # logits = self.fc3(net) # (B, 40)

        # 13. Return logits and end_points (transformation matrix)
        # end_points = {'transform': feature_transform}

        return net
        
        # return logits, end_points

# 示例调用
point_cloud_data = torch.randn(32, 1024, 3) # (B, N, 3)
model = PointNetCls(num_classes=40)
output = model(point_cloud_data)
print(output.shape) # torch.Size([32, 40])

torch.Size([32, 77, 768])


In [6]:
# ====================== 依赖 ======================
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

# ====================== 超参 ======================
BATCH_SIZE      = 2
SEQ_LEN         = 77
FEAT_DIM        = 768
NUM_EXPERTS     = 2
TOP_K           = 2
HIDDEN_DIM      = 512
OUT_DIM         = 768
EPOCHS          = 5
LR              = 1e-3
LAMBDA_BALANCE  = 0.01
DEVICE          = 'cuda' if torch.cuda.is_available() else 'cpu'

# ==================== 随机数据 =====================
# 制造 1000 条伪序列，仅用于演示
X = np.random.randn(1000, SEQ_LEN, FEAT_DIM).astype(np.float32)
train_ds = TensorDataset(torch.from_numpy(X))
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)

# ==================== 模型 =========================
class MoEEncoder(nn.Module):
    def __init__(self,
                 input_dim=FEAT_DIM,
                 hidden_dim=HIDDEN_DIM,
                 output_dim=OUT_DIM,
                 num_experts=NUM_EXPERTS,
                 top_k=TOP_K,
                 point_cloud_data = torch.randn(BATCH_SIZE, 1024, 3)):
        super().__init__()
        self.num_experts = num_experts
        self.top_k = top_k

        # 门控
        self.gate = nn.Linear(input_dim, num_experts)

        # 专家网络
        self.experts = nn.ModuleList([
            nn.Sequential(
                nn.Linear(input_dim, hidden_dim),
                nn.ReLU(),
                nn.Linear(hidden_dim, output_dim)
            ) for _ in range(num_experts)
        ])

        self.point_net = PointNetCls(num_classes=40)

        self.point_cloud_data = point_cloud_data
        # self.register_buffer('point_cloud_data', point_cloud_data)


    def forward(self, x):
        """
        x: [B, T, D]  ->  out: [B, T, D]
        同时返回负载均衡统计量
        """
        B, T, D = x.shape
        flat = x.reshape(B * T, D)                      # [B*T, D]
        # print(flat.shape)

        gate_logits = self.gate(flat)                   # [B*T, num_experts]
        # print(gate_logits.shape)
        # print(gate_logits[:5])

        gate_probs = F.softmax(gate_logits, dim=-1)     # [B*T, num_experts]
        top_val, top_idx = torch.topk(gate_probs, TOP_K, dim=-1)  # [B*T, top_k]
        # print(top_val.shape)
        # print(top_val[:5])
        # print(top_idx[:5])

        # 累加每个专家被选中权重的和，用于负载均衡损失
        importance = torch.zeros(self.num_experts, device=x.device)
        importance.index_add_(0, top_idx.view(-1), top_val.view(-1))

        # 加权求和专家输出
        out = torch.zeros_like(flat)

        # 初始化用于分别记录 expert 1 和 expert 2 输出的累加器
        # 它们的形状应该是 [D]，即单个样本的输出维度
        # 我们使用 flat.size(-1) 来获取输出维度 D
        D = flat.size(-1)
        expert_1_accumulated_output = torch.zeros_like(flat)
        expert_2_accumulated_output = torch.zeros_like(flat)

        pointfeature = self.point_net(self.point_cloud_data.to(x.device))

        for i in range(B * T):
            input_i = flat[i] # 当前的输入向量 [D]

            for k in range(self.top_k):
                expert_id = top_idx[i, k].item()
                weight = top_val[i, k]
                
                # 计算当前专家对当前输入的输出
                expert_output = self.experts[expert_id](input_i) # [D]

                # 累加到最终的 out
                out[i] += weight * expert_output


                if expert_id == 0:
                    expert_1_accumulated_output[i] += weight * expert_output
                elif expert_id == 1:
                    expert_2_accumulated_output[i] += weight * expert_output
                # -------------------------------------

        final_point = expert_1_accumulated_output.view(B, T, -1) + pointfeature
        final_out=torch.cat([final_point, expert_2_accumulated_output.view(B, T, -1)], dim=2)


        return importance, final_out


# ==================== 损失 =========================
def loss_fn(x, recon, importance, lambda_bal=LAMBDA_BALANCE):
    recon_loss = F.mse_loss(recon, x)                   # 重构
    balance_loss = torch.std(importance)                # 专家使用均衡程度
    return recon_loss + lambda_bal * balance_loss, recon_loss, balance_loss


# ==================== 训练 =========================
model = MoEEncoder().to(DEVICE)
opt   = torch.optim.Adam(model.parameters(), lr=LR)

for epoch in range(1, EPOCHS + 1):
    model.train()
    total_loss, total_recon, total_bal = 0., 0., 0.
    for (x,) in train_loader:
        x = x.to(DEVICE)
        opt.zero_grad()
        importance, out= model(x)
        print(out.shape)
    #     loss, recon_loss, bal_loss = loss_fn(x, out, importance)
    #     loss.backward()
    #     opt.step()

    #     total_loss  += loss.item()
    #     total_recon += recon_loss.item()
    #     total_bal   += bal_loss.item()

    # print(f"Epoch {epoch:02d} | total {total_loss/len(train_loader):.4f} | "
    #       f"recon {total_recon/len(train_loader):.4f} | balance {total_bal/len(train_loader):.4f}")

# ==================== 推理示例 =====================
model.eval()
with torch.no_grad():
    demo = torch.randn(5, SEQ_LEN, FEAT_DIM).to(DEVICE)
    feat, _ = model(demo)          # [5, 77, 768]
    print("输出形状:", feat.shape)  # 应输出 torch.Size([5, 77, 768])

torch.Size([2, 77, 1536])
torch.Size([2, 77, 1536])
torch.Size([2, 77, 1536])
torch.Size([2, 77, 1536])
torch.Size([2, 77, 1536])
torch.Size([2, 77, 1536])
torch.Size([2, 77, 1536])
torch.Size([2, 77, 1536])
torch.Size([2, 77, 1536])
torch.Size([2, 77, 1536])
torch.Size([2, 77, 1536])
torch.Size([2, 77, 1536])
torch.Size([2, 77, 1536])
torch.Size([2, 77, 1536])
torch.Size([2, 77, 1536])
torch.Size([2, 77, 1536])
torch.Size([2, 77, 1536])
torch.Size([2, 77, 1536])
torch.Size([2, 77, 1536])
torch.Size([2, 77, 1536])
torch.Size([2, 77, 1536])
torch.Size([2, 77, 1536])
torch.Size([2, 77, 1536])
torch.Size([2, 77, 1536])
torch.Size([2, 77, 1536])
torch.Size([2, 77, 1536])


KeyboardInterrupt: 