In [1]:
# build_catalog.py (全新版本，基于八叉树架构)

import pandas as pd
import numpy as np
import struct
from astropy.coordinates import SkyCoord
from astropy import units as u

# --- 八叉树核心实现 ---
# 我们在Python中实现一个功能对等的八叉树，用于构建和序列化
class OctreeNode:
    """八叉树的一个节点"""
    def __init__(self, center, size):
        self.center = center  # 节点的中心点 (x,y,z)
        self.size = size      # 节点包围盒的半边长
        self.children = [None] * 8 # 8个子节点
        self.is_leaf = True
        self.star_indices = [] # 如果是叶子节点，存储恒星在主列表中的索引

    def insert(self, star_pos, star_index, max_stars_per_leaf):
        """将恒星插入节点"""
        # 如果是叶子节点
        if self.is_leaf:
            self.star_indices.append(star_index)
            # 如果叶子节点超载，则分裂
            if len(self.star_indices) > max_stars_per_leaf:
                self.subdivide()
        # 如果是内部节点，则将恒星递归插入到对应的子节点
        else:
            octant_index = self._get_octant_containing_point(star_pos)
            self.children[octant_index].insert(star_pos, star_index, max_stars_per_leaf)

    def subdivide(self):
        """将当前叶子节点分裂为8个子节点"""
        self.is_leaf = False
        child_size = self.size / 2.0
        
        # 创建8个子节点
        for i in range(8):
            offset_x = -1 if (i & 4) == 0 else 1
            offset_y = -1 if (i & 2) == 0 else 1
            offset_z = -1 if (i & 1) == 0 else 1
            child_center = self.center + np.array([offset_x, offset_y, offset_z]) * child_size
            self.children[i] = OctreeNode(child_center, child_size)
            
        # 将当前节点的所有恒星重新分配到子节点中
        old_star_indices = self.star_indices
        self.star_indices = []
        for index in old_star_indices:
            star_pos = all_stars_cartesian[index]
            octant_index = self._get_octant_containing_point(star_pos)
            self.children[octant_index].insert(index, index, 0) # 插入时 max_stars_per_leaf 为0防止无限递归

    def _get_octant_containing_point(self, point):
        """计算一个点属于哪个象限(0-7)"""
        octant = 0
        if point[0] >= self.center[0]: octant |= 4
        if point[1] >= self.center[1]: octant |= 2
        if point[2] >= self.center[2]: octant |= 1
        return octant

    def flatten(self, node_list, star_data_list):
        """将树结构展平为两个列表，用于序列化"""
        node_index = len(node_list)
        node_list.append(self)
        
        if self.is_leaf:
            # 叶子节点
            star_start_index = len(star_data_list)
            star_data_list.extend(self.star_indices)
            return node_index, -1, star_start_index, len(self.star_indices)
        else:
            # 内部节点
            child_start_index = len(node_list)
            for child in self.children:
                child.flatten(node_list, star_data_list)
            return node_index, child_start_index, -1, 0

# 全局变量，用于存储所有恒星的位置信息
all_stars_cartesian = []

# --- 主程序 ---
def process_catalog():
    csv_filename = '2zhao_basic_catalog.csv'
    binary_filename = 'hipparcos_catalog.starcatalog' # 新的文件扩展名
    max_stars_per_leaf = 32 # 每个叶子节点最多容纳的恒星数

    print(f"正在从 {csv_filename} 加载星表...")
    try:
        df = pd.read_csv(csv_filename, comment='#', skipinitialspace=True)
    except FileNotFoundError:
        print(f"错误：找不到CSV文件 '{csv_filename}'。")
        return
        
    df.columns = df.columns.str.strip()
    print("CSV列名:", df.columns.tolist())

    # --- 1. 数据准备和坐标转换 ---
    print("正在转换坐标 (RA/Dec -> 笛卡尔单位向量)...")
    star_data_for_packing = []
    global all_stars_cartesian
    
    for index, row in df.iterrows():
        try:
            ra_deg = float(row['ra'])
            dec_deg = float(row['dec'])
            vmag = float(row['Vmag'])
            bv = float(row.get('B-V', 0))
            
            # 使用 astropy 进行精确坐标转换
            coord = SkyCoord(ra=ra_deg*u.degree, dec=dec_deg*u.degree, frame='icrs')
            cartesian = coord.cartesian.get_xyz().value
            
            # 存储笛卡尔坐标用于构建八叉树
            all_stars_cartesian.append(cartesian)

            # 准备用于最终打包的数据 (FStarDataPacked)
            # C++ struct: FVector3f Position, float Magnitude, float ColorIndex
            # Python format: 3f, f, f -> 'fffff'
            star_data_for_packing.append({
                'pos': cartesian,
                'mag': vmag,
                'bv': bv
            })
        except (ValueError, KeyError) as e:
            print(f"警告: 第 {index + 2} 行数据有问题，已跳过。错误: {e}")

    # --- 2. 构建八叉树 ---
    print("正在构建八叉树...")
    # 创建一个能包裹所有单位向量的根节点
    root = OctreeNode(center=np.array([0.0, 0.0, 0.0]), size=1.01) 
    for i in range(len(all_stars_cartesian)):
        root.insert(all_stars_cartesian[i], i, max_stars_per_leaf)

    # --- 3. 展平八叉树为列表 ---
    print("正在展平八叉树用于序列化...")
    flat_nodes = []
    flat_star_indices = [] # 这个列表存储的是指向主数据数组的索引
    root.flatten(flat_nodes, flat_star_indices)

    # --- 4. 序列化为二进制文件 ---
    print(f"正在写入二进制文件 {binary_filename}...")
    
    # 定义C++结构体的打包格式
    # 文件头: uint32, uint32, uint64, uint32
    header_packer = struct.Struct('IIQI') 
    # FOctreeNode: FBox(2*FVector(3f)), int32, int32, int32, float, FLinearColor(4f)
    # FBox(Min+Max) is 6 floats. CenterAndExtent is also 6 floats.
    # We will use FBox(Min/Max) for simplicity here. Min(x,y,z), Max(x,y,z)
    node_packer = struct.Struct('ffffffiiiiffff') # 6f for FBox, 3i for indices, 1f for mag, 4f for color
    # FStarDataPacked: FVector3f, float, float
    star_packer = struct.Struct('fffff') # 3f for pos, 1f for mag, 1f for bv

    with open(binary_filename, 'wb') as f:
        # 写入文件头
        magic_number = 0x43415453 # "STAC"
        version = 1
        star_count = len(star_data_for_packing)
        node_count = len(flat_nodes)
        
        f.write(header_packer.pack(magic_number, version, star_count, node_count))

        # 写入所有八叉树节点数据
        for node in flat_nodes:
            # 计算包围盒和聚合数据 (简化版)
            box_min = node.center - node.size
            box_max = node.center + node.size
            agg_mag = 10.0
            agg_color = (1.0, 1.0, 1.0, 1.0) # r,g,b,a

            # 在展平列表中查找子节点和恒星数据的索引
            _, child_start_idx, star_start_idx, num_stars = node.flatten_result # 我们需要修改flatten来返回这个
            
            # 这里需要对flatten稍作修改，让它能返回正确的值
            # 为简化，我们暂时用占位符
            # 正式实现时，flatten需要返回(node_idx, child_start, star_start, star_count)
            # 这里我们直接使用之前计算的值
            packed_node = node_packer.pack(
                box_min[0], box_min[1], box_min[2],
                box_max[0], box_max[1], box_max[2],
                0, # child_start_index
                0, # star_start_index
                0, # star_count
                agg_mag,
                agg_color[0], agg_color[1], agg_color[2], agg_color[3]
            )
            # f.write(packed_node) # 暂时禁用节点写入，因为逻辑较复杂

        # 写入所有恒星数据
        for star_idx in flat_star_indices:
             star = star_data_for_packing[star_idx]
             packed_star = star_packer.pack(
                 star['pos'][0], star['pos'][1], star['pos'][2],
                 star['mag'],
                 star['bv']
             )
             f.write(packed_star)


    print("\n--- 处理完成 ---")
    print(f"成功将 {star_count} 颗星的数据和 {node_count} 个八叉树节点信息写入 {binary_filename}")

if __name__ == '__main__':
    # 为了简化，我们先注释掉八叉树部分，只写入恒星数据
    # 这让我们可以先完成C++的加载和渲染，再回头完善八叉树
    def simple_process_catalog():
        csv_filename = '2zhao_basic_catalog.csv'
        binary_filename = 'hipparcos.stardata' # 只包含恒星数据
        
        print(f"正在从 {csv_filename} 加载星表 (简化流程)...")
        try:
            df = pd.read_csv(csv_filename, comment='#', skipinitialspace=True)
        except FileNotFoundError:
            print(f"错误：找不到CSV文件 '{csv_filename}'。")
            return
        df.columns = df.columns.str.strip()
        
        # C++ FStarDataPacked: FVector3f(x,y,z), float Magnitude, float ColorIndex
        star_packer = struct.Struct('fffff') 
        
        star_count = 0
        with open(binary_filename, 'wb') as f:
            for index, row in df.iterrows():
                try:
                    ra_deg = float(row['ra'])
                    dec_deg = float(row['dec'])
                    vmag = float(row['Vmag'])
                    bv = float(row.get('B-V', 0.5))
                    
                    coord = SkyCoord(ra=ra_deg*u.degree, dec=dec_deg*u.degree, frame='icrs')
                    # UE坐标系: X向前, Z向上。天文ICRS: X->春分点, Z->天球北极
                    # 我们需要一个转换: ICRS(x,y,z) -> UE(x,-y,z) (Y轴反转)
                    cartesian = coord.cartesian.get_xyz().value
                    ue_pos = (cartesian[0], -cartesian[1], cartesian[2])

                    f.write(star_packer.pack(ue_pos[0], ue_pos[1], ue_pos[2], vmag, bv))
                    star_count += 1
                except (ValueError, KeyError) as e:
                    print(f"警告: 第 {index + 2} 行数据有问题，已跳过。错误: {e}")
        
        print("\n--- 简化流程处理完成 ---")
        print(f"成功将 {star_count} 颗星的 FStarDataPacked 数据写入到 {binary_filename}")

    # 运行简化版的流程
    simple_process_catalog()

正在从 2zhao_basic_catalog.csv 加载星表 (简化流程)...

--- 简化流程处理完成 ---
成功将 7356417 颗星的 FStarDataPacked 数据写入到 hipparcos.stardata
