In [None]:
import pickle  
import numpy as np  
from pathlib import Path  
  ###适用于我自己的脚本
def read_and_print_pkl(pkl_file_path):  
    """读取并打印pkl文件的内容"""  
      
    # 读取pkl文件  
    with open(pkl_file_path, 'rb') as f:  
        data = pickle.load(f)  
      
    print(f"PKL文件路径: {pkl_file_path}")  
    print(f"数据类型: {type(data)}")  
    print(f"包含 {len(data)} 个复合物\n")  
      
    # 遍历每个复合物  
    for i, complex_dict in enumerate(data):  
        print(f"=== 复合物 {i+1}: {complex_dict.get('pdbid', 'Unknown')} ===")  
          
        # 基本信息  
        print(f"PDB ID: {complex_dict.get('pdbid', 'N/A')}")  
        print(f"结合亲和力 (pK): {complex_dict.get('pk', 'N/A')}")  
        print(f"RMSD: {complex_dict.get('rmsd', 'N/A')}")  
        print(f"SMILES: {complex_dict.get('smiles', 'N/A')}")  
          
        # 图结构信息  
        print(f"\n图结构信息:")  
        print(f"  节点特征形状: {complex_dict.get('node_feat', np.array([])).shape}")  
        print(f"  边索引形状: {complex_dict.get('edge_index', np.array([])).shape}")  
        print(f"  边特征形状: {complex_dict.get('edge_feat', np.array([])).shape}")  
        print(f"  坐标形状: {complex_dict.get('coords', np.array([])).shape}")  
          
        # 节点和边统计  
        num_node = complex_dict.get('num_node', [])  
        num_edge = complex_dict.get('num_edge', [])  
        if len(num_node) >= 2:  
            print(f"  配体节点数: {num_node[0]}")  
            print(f"  蛋白质节点数: {num_node[1]}")  
        if len(num_edge) >= 3:  
            print(f"  配体边数: {num_edge[0]}")  
            print(f"  蛋白质边数: {num_edge[1]}")  
            print(f"  相互作用边数: {num_edge[2]}")  
          
        # 蛋白质特征  
        pro_names = complex_dict.get('pro_name', [])  
        aa_names = complex_dict.get('AA_name', [])  
        if len(pro_names) > 0:  
            print(f"\n蛋白质原子信息:")  
            print(f"  原子名称数量: {len(pro_names)}")  
            print(f"  前5个原子名称: {pro_names[:5] if len(pro_names) >= 5 else pro_names}")  
            print(f"  氨基酸名称数量: {len(aa_names)}")  
            print(f"  前5个氨基酸: {aa_names[:5] if len(aa_names) >= 5 else aa_names}")  
          
        # 交互特征  
        rfscore = complex_dict.get('rfscore', np.array([]))  
        gbscore = complex_dict.get('gbscore', np.array([]))  
        ecif = complex_dict.get('ecif', np.array([]))  
        print(f"\n交互特征:")  
        print(f"  RF-Score形状: {rfscore.shape}")  
        print(f"  GB-Score形状: {gbscore.shape}")  
        print(f"  ECIF形状: {ecif.shape}")  
          
        print("-" * 50)  
          
        # 只打印前3个复合物的详细信息，避免输出过长  
        if i >= 2:  
            print(f"... 还有 {len(data) - 3} 个复合物")  
            break  
  
def print_detailed_complex(complex_dict, complex_name=""):  
    """打印单个复合物的详细信息"""  
    print(f"=== 详细信息: {complex_name} ===")  
      
    for key, value in complex_dict.items():  
        print(f"{key}:")  
        if isinstance(value, np.ndarray):  
            print(f"  类型: numpy.ndarray")  
            print(f"  形状: {value.shape}")  
            print(f"  数据类型: {value.dtype}")  
            if value.size <= 10:  # 只打印小数组的内容  
                print(f"  内容: {value}")  
            else:  
                print(f"  前5个元素: {value.flat[:5]}")  
        elif isinstance(value, (list, tuple)):  
            print(f"  类型: {type(value).__name__}")  
            print(f"  长度: {len(value)}")  
            if len(value) <= 10:  
                print(f"  内容: {value}")  
            else:  
                print(f"  前5个元素: {value[:5]}")  
        else:  
            print(f"  类型: {type(value).__name__}")  
            print(f"  值: {value}")  
        print()  
  
if __name__ == "__main__":  
    # 使用示例  
    pkl_file_path = "/xcfhome/zncao02/AffinSculptor/preprocess/test.pkl"  # 替换为您的pkl文件路径  
      
    if Path(pkl_file_path).exists():  
        # 读取并打印基本信息  
        read_and_print_pkl(pkl_file_path)  
          
        # 如果需要查看某个复合物的详细信息，取消下面的注释  
        # with open(pkl_file_path, 'rb') as f:  
        #     data = pickle.load(f)  
        #     if len(data) > 0:  
        #         print_detailed_complex(data[0], data[0].get('pdbid', 'Complex_0'))  
    else:  
        print(f"文件不存在: {pkl_file_path}")

PKL文件路径: /xcfhome/zncao02/AffinSculptor/preprocess/test.pkl
数据类型: <class 'list'>
包含 1 个复合物

=== 复合物 1: 6uux-QHM ===
PDB ID: 6uux-QHM
结合亲和力 (pK): 6.63
RMSD: 1.25
SMILES: CC[NH+](CC)CCNC1=C/C=C(CO)\C2=C\1C(=O)C1=CC=CC=C1S2

图结构信息:
  节点特征形状: (163, 9)
  边索引形状: (2, 2436)
  边特征形状: (2436, 3)
  坐标形状: (163, 3)
  配体节点数: 25
  蛋白质节点数: 138
  配体边数: 54
  蛋白质边数: 220
  相互作用边数: 494

蛋白质原子信息:
  原子名称数量: 815
  前5个原子名称: ['N' 'CA' 'C' 'O' 'N']
  氨基酸名称数量: 815
  前5个氨基酸: ['GLY' 'GLY' 'GLY' 'GLY' 'LEU']

交互特征:
  RF-Score形状: (100,)
  GB-Score形状: (400,)
  ECIF形状: (0,)
--------------------------------------------------


In [3]:
import pickle  
import torch  
from pathlib import Path  
  
def print_graph_info(graph, index):  
    """打印单个图对象的详细信息"""  
    print(f"\n{'='*50}")  
    print(f"复合物 {index + 1}: {graph.pdbid}")  
    print(f"{'='*50}")  
      
    # 基本信息  
    print(f"节点数量: {graph.x.shape[0]}")  
    print(f"边数量: {graph.edge_index.shape[1]}")  
    print(f"结合亲和力 (pK): {graph.y.item():.2f}")  
      
    # 节点特征信息  
    print(f"\n节点特征 (x):")  
    print(f"  形状: {graph.x.shape}")  
    print(f"  数据类型: {graph.x.dtype}")  
    print(f"  前5个节点的特征:")  
    for i in range(min(5, graph.x.shape[0])):  
        print(f"    节点 {i}: {graph.x[i].tolist()}")  
      
    # 边特征信息  
    print(f"\n边索引 (edge_index):")  
    print(f"  形状: {graph.edge_index.shape}")  
    print(f"  前10条边:")  
    for i in range(min(10, graph.edge_index.shape[1])):  
        src, dst = graph.edge_index[:, i]  
        print(f"    边 {i}: {src.item()} -> {dst.item()}")  
      
    print(f"\n边特征 (edge_attr):")  
    print(f"  形状: {graph.edge_attr.shape}")  
    print(f"  数据类型: {graph.edge_attr.dtype}")  
    print(f"  前5条边的特征:")  
    for i in range(min(5, graph.edge_attr.shape[0])):  
        print(f"    边 {i}: {graph.edge_attr[i].tolist()}")  
      
    # 3D坐标信息  
    print(f"\n3D坐标 (pos):")  
    print(f"  形状: {graph.pos.shape}")  
    print(f"  前5个原子的坐标:")  
    for i in range(min(5, graph.pos.shape[0])):  
        x, y, z = graph.pos[i]  
        print(f"    原子 {i}: ({x:.3f}, {y:.3f}, {z:.3f})")  
      
    # 分子相互作用评分  
    print(f"\n分子相互作用评分:")  
    print(f"  RF-Score 维度: {graph.rfscore.shape[0]}")  
    print(f"  GB-Score 维度: {graph.gbscore.shape[0]}")  
    print(f"  ECIF 维度: {graph.ecif.shape[0]}")  
    print(f"  RF-Score 前10个值: {graph.rfscore[:10].tolist()}")  
    print(f"  GB-Score 前10个值: {graph.gbscore[:10].tolist()}")  
    print(f"  ECIF 前10个值: {graph.ecif[:10].tolist()}")  
      
    # 节点和边数量统计  
    if hasattr(graph, 'num_node') and hasattr(graph, 'num_edge'):  
        print(f"\n分子组成统计:")  
        print(f"  配体节点数: {graph.num_node[0].item()}")  
        print(f"  蛋白质节点数: {graph.num_node[1].item()}")  
        print(f"  配体边数: {graph.num_edge[0].item()}")  
        print(f"  蛋白质边数: {graph.num_edge[1].item()}")  
        print(f"  相互作用边数: {graph.num_edge[2].item()}")  
  
def main():  
    # 请修改为您的pkl文件路径  
    pkl_file_path = "/xcfhome/zncao02/model_bap/Dynafomer/preprocess/test.pkl"
    try:  
        # 加载pkl文件  
        with open(pkl_file_path, 'rb') as f:  
            graphs = pickle.load(f)  
          
        print(f"成功加载 {len(graphs)} 个复合物")  
          
        # 打印前两个复合物的信息  
        for i in range(min(2, len(graphs))):  
            print_graph_info(graphs[i], i)  
              
    except FileNotFoundError:  
        print(f"错误: 找不到文件 {pkl_file_path}")  
        print("请确保文件路径正确")  
    except Exception as e:  
        print(f"加载文件时出错: {e}")  
  
if __name__ == "__main__":  
    main()

成功加载 256 个复合物

复合物 1: 5xkr-BZE
节点数量: 72
边数量: 1120
结合亲和力 (pK): 5.15

节点特征 (x):
  形状: torch.Size([72, 9])
  数据类型: torch.int64
  前5个节点的特征:
    节点 0: [5, 0, 2, 5, 0, 0, 2, 0, 1]
    节点 1: [5, 0, 2, 5, 0, 0, 2, 0, 1]
    节点 2: [5, 0, 2, 5, 0, 0, 2, 0, 1]
    节点 3: [5, 0, 2, 5, 0, 0, 2, 0, 1]
    节点 4: [6, 0, 3, 6, 0, 0, 2, 0, 1]

边索引 (edge_index):
  形状: torch.Size([2, 1120])
  前10条边:
    边 0: 0 -> 1
    边 1: 1 -> 0
    边 2: 0 -> 4
    边 3: 4 -> 0
    边 4: 1 -> 2
    边 5: 2 -> 1
    边 6: 2 -> 3
    边 7: 3 -> 2
    边 8: 3 -> 4
    边 9: 4 -> 3

边特征 (edge_attr):
  形状: torch.Size([1120, 3])
  数据类型: torch.int64
  前5条边的特征:
    边 0: [0, 0, 0]
    边 1: [0, 0, 0]
    边 2: [0, 0, 0]
    边 3: [0, 0, 0]
    边 4: [0, 0, 0]

3D坐标 (pos):
  形状: torch.Size([72, 3])
  前5个原子的坐标:
    原子 0: (10.279, 16.884, 16.923)
    原子 1: (10.550, 18.375, 17.086)
    原子 2: (10.640, 18.860, 15.645)
    原子 3: (9.693, 17.937, 14.867)
    原子 4: (9.387, 16.778, 15.747)

分子相互作用评分:
  RF-Score 维度: 100
  GB-Score 维度: 400
  ECIF 维度: 0


In [6]:
import pickle  
import torch  
from pathlib import Path  
  
def print_graph_info(graph, index):  
    """打印单个图对象的详细信息"""  
    print(f"\n{'='*50}")  
    print(f"复合物 {index + 1}: {graph.pdbid}")  
    print(f"{'='*50}")  
      
    # 基本信息  
    print(f"节点数量: {graph.x.shape[0]}")  
    print(f"边数量: {graph.edge_index.shape[1]}")  
    print(f"结合亲和力 (pK): {graph.y.item():.2f}")  
      
    # 节点特征信息  
    print(f"\n节点特征 (x):")  
    print(f"  形状: {graph.x.shape}")  
    print(f"  数据类型: {graph.x.dtype}")  
    print(f"  前5个节点的特征:")  
    for i in range(min(5, graph.x.shape[0])):  
        print(f"    节点 {i}: {graph.x[i].tolist()}")  
      
    # 边特征信息  
    print(f"\n边索引 (edge_index):")  
    print(f"  形状: {graph.edge_index.shape}")  
    print(f"  前10条边:")  
    for i in range(min(10, graph.edge_index.shape[1])):  
        src, dst = graph.edge_index[:, i]  
        print(f"    边 {i}: {src.item()} -> {dst.item()}")  
      
    print(f"\n边特征 (edge_attr):")  
    print(f"  形状: {graph.edge_attr.shape}")  
    print(f"  数据类型: {graph.edge_attr.dtype}")  
    print(f"  前5条边的特征:")  
    for i in range(min(5, graph.edge_attr.shape[0])):  
        print(f"    边 {i}: {graph.edge_attr[i].tolist()}")  
      
    # 3D坐标信息  
    print(f"\n3D坐标 (pos):")  
    print(f"  形状: {graph.pos.shape}")  
    print(f"  前5个原子的坐标:")  
    for i in range(min(5, graph.pos.shape[0])):  
        x, y, z = graph.pos[i]  
        print(f"    原子 {i}: ({x:.3f}, {y:.3f}, {z:.3f})")  
      
    # 分子相互作用评分  
    print(f"\n分子相互作用评分:")  
    print(f"  RF-Score 维度: {graph.rfscore.shape[0]}")  
    print(f"  GB-Score 维度: {graph.gbscore.shape[0]}")  
    print(f"  ECIF 维度: {graph.ecif.shape[0]}")  
    print(f"  RF-Score 前10个值: {graph.rfscore[:10].tolist()}")  
    print(f"  GB-Score 前10个值: {graph.gbscore[:10].tolist()}")  
    print(f"  ECIF 前10个值: {graph.ecif[:10].tolist()}")  
      
    # 节点和边数量统计  
    if hasattr(graph, 'num_node') and hasattr(graph, 'num_edge'):  
        print(f"\n分子组成统计:")  
        print(f"  配体节点数: {graph.num_node[0].item()}")  
        print(f"  蛋白质节点数: {graph.num_node[1].item()}")  
        print(f"  配体边数: {graph.num_edge[0].item()}")  
        print(f"  蛋白质边数: {graph.num_edge[1].item()}")  
        print(f"  相互作用边数: {graph.num_edge[2].item()}")  
  
def main():  
    # 请修改为您的pkl文件路径  
    pkl_file_path = "/xcfhome/zncao02/Dynaformer/preprocess/test.pkl"
    try:  
        # 加载pkl文件  
        with open(pkl_file_path, 'rb') as f:  
            graphs = pickle.load(f)  
          
        print(f"成功加载 {len(graphs)} 个复合物")  
          
        # 打印前两个复合物的信息  
        for i in range(min(2, len(graphs))):  
            print_graph_info(graphs[i], i)  
              
    except FileNotFoundError:  
        print(f"错误: 找不到文件 {pkl_file_path}")  
        print("请确保文件路径正确")  
    except Exception as e:  
        print(f"加载文件时出错: {e}")  
  
if __name__ == "__main__":  
    main()

成功加载 1 个复合物

复合物 1: 5xkr-BZE
节点数量: 30
边数量: 670
结合亲和力 (pK): 5.15

节点特征 (x):
  形状: torch.Size([30, 9])
  数据类型: torch.int64
  前5个节点的特征:
    节点 0: [5, 0, 2, 5, 0, 0, 2, 0, 1]
    节点 1: [5, 0, 2, 5, 0, 0, 2, 0, 1]
    节点 2: [5, 0, 2, 5, 0, 0, 2, 0, 1]
    节点 3: [5, 0, 2, 5, 0, 0, 2, 0, 1]
    节点 4: [6, 0, 3, 6, 0, 0, 2, 0, 1]

边索引 (edge_index):
  形状: torch.Size([2, 670])
  前10条边:
    边 0: 0 -> 1
    边 1: 1 -> 0
    边 2: 0 -> 4
    边 3: 4 -> 0
    边 4: 1 -> 2
    边 5: 2 -> 1
    边 6: 2 -> 3
    边 7: 3 -> 2
    边 8: 3 -> 4
    边 9: 4 -> 3

边特征 (edge_attr):
  形状: torch.Size([670, 3])
  数据类型: torch.int64
  前5条边的特征:
    边 0: [0, 0, 0]
    边 1: [0, 0, 0]
    边 2: [0, 0, 0]
    边 3: [0, 0, 0]
    边 4: [0, 0, 0]

3D坐标 (pos):
  形状: torch.Size([30, 3])
  前5个原子的坐标:
    原子 0: (10.279, 16.884, 16.923)
    原子 1: (10.550, 18.375, 17.086)
    原子 2: (10.640, 18.860, 15.645)
    原子 3: (9.693, 17.937, 14.867)
    原子 4: (9.387, 16.778, 15.747)

分子相互作用评分:
  RF-Score 维度: 100
  GB-Score 维度: 400
  ECIF 维度: 1540
  