In [1]:
import os
import numpy as np

# 设置目标目录路径
raw_dir = "/home/jovyan/Desktop/Cu-DeePMD/00.data/Cu_raw/Cu_dataset/set.000"

# 定义每帧的形状规则（单位：原子数、原子数*3、1、3x3、…）
# 假设体系中原子数是 N（以 coord.raw 推算）
def infer_num_atoms(coord_file):
    coords = np.loadtxt(coord_file)
    total = coords.size
    try:
        return total // 3
    except ZeroDivisionError:
        return 0

# 加载 coord.raw 推断原子数和总帧数
coord_path = os.path.join(raw_dir, "coord.raw")
coord = np.loadtxt(coord_path)
natoms = coord.shape[1] // 3 if coord.ndim == 2 else coord.size // 3
nframes = coord.shape[0] if coord.ndim == 2 else coord.size // (3 * natoms)

print(f"📦 推断原子数: {natoms}")
print(f"📦 总帧数（以 coord.raw 为参考）: {nframes}")
print("-" * 50)

# 定义每个文件应有的元素数/行数规则
expected_shapes = {
    "coord.raw": (nframes, natoms * 3),
    "force.raw": (nframes, natoms * 3),
    "energy.raw": (nframes, 1),
    "virial.raw": (nframes, 9),
    "type.raw": (nframes, natoms),
    "box.raw": (nframes, 9),
    "type_map.raw": "text"
}

# 检查所有文件的形状
for fname, expected in expected_shapes.items():
    fpath = os.path.join(raw_dir, fname)
    if not os.path.exists(fpath):
        print(f"❌ 缺失文件: {fname}")
        continue

    if expected == "text":
        with open(fpath) as f:
            lines = f.readlines()
        print(f"📄 {fname} 包含 {len(lines)} 行 (type_map)")
        continue

    try:
        data = np.loadtxt(fpath)
        if data.ndim == 1:
            data = data.reshape(1, -1)
        actual_shape = data.shape
        if actual_shape != expected:
            print(f"⚠️ {fname} 实际形状: {actual_shape}，预期: {expected}")
        else:
            print(f"✅ {fname} OK，帧数: {actual_shape[0]}")
    except Exception as e:
        print(f"❌ {fname} 加载失败: {e}")


📦 推断原子数: 4
📦 总帧数（以 coord.raw 为参考）: 500
--------------------------------------------------
✅ coord.raw OK，帧数: 500
✅ force.raw OK，帧数: 500
⚠️ energy.raw 实际形状: (1, 500)，预期: (500, 1)
✅ virial.raw OK，帧数: 500
✅ type.raw OK，帧数: 500
✅ box.raw OK，帧数: 500
📄 type_map.raw 包含 1 行 (type_map)
