In [None]:
#!/usr/bin/env python3
"""
print_pkl_keys.py - 打印指定PKL文件中所有键值对及其形状
"""

import pickle
import numpy as np
import torch
from pathlib import Path

# 固定的PKL文件路径
PKL_FILE = Path("/xcfhome/jzlu/For/czn/misato/part4/3JQG_frame95/output/3jqg_frame95_features_with_masif.pkl")

def print_pkl_structure(pkl_file_path):
    """打印PKL文件的完整结构，包括所有键值对及其形状"""
    try:
        with open(pkl_file_path, 'rb') as f:
            data = pickle.load(f)

        print(f"PKL文件路径: {pkl_file_path}")
        print(f"数据类型: {type(data)}")
        print("=" * 80)

        if isinstance(data, list):
            print(f"包含 {len(data)} 个复合物")

            if len(data) > 0:
                print("\n第一个复合物的键值对结构:")
                print_dict_structure(data[0], indent=0)

                if len(data) > 1:
                    print("\n检查所有复合物的键结构一致性...")
                    first_keys = set(data[0].keys()) if isinstance(data[0], dict) else set()
                    all_consistent = True

                    for i, item in enumerate(data[1:], 1):
                        if isinstance(item, dict):
                            current_keys = set(item.keys())
                            if current_keys != first_keys:
                                print(f"  复合物 {i+1} 的键与第一个不一致")
                                print(f"    缺少的键: {first_keys - current_keys}")
                                print(f"    额外的键: {current_keys - first_keys}")
                                all_consistent = False

                    if all_consistent:
                        print("  ✓ 所有复合物的键结构一致")

        elif isinstance(data, dict):
            print("数据是单个字典结构")
            print_dict_structure(data, indent=0)

        else:
            print(f"未知的数据结构类型: {type(data)}")

    except Exception as e:
        print(f"读取PKL文件时出错: {e}")

def print_dict_structure(data_dict, indent=0):
    """递归打印字典结构"""
    indent_str = "  " * indent

    for key, value in data_dict.items():
        print(f"{indent_str}{key}:")

        if isinstance(value, np.ndarray):
            print(f"{indent_str}  类型: numpy.ndarray")
            print(f"{indent_str}  形状: {value.shape}")
            print(f"{indent_str}  数据类型: {value.dtype}")
            if value.size <= 10:
                print(f"{indent_str}  内容: {value}")
            else:
                print(f"{indent_str}  前5个元素: {value.flat[:5]}")

        elif isinstance(value, torch.Tensor):
            print(f"{indent_str}  类型: torch.Tensor")
            print(f"{indent_str}  形状: {value.shape}")
            print(f"{indent_str}  数据类型: {value.dtype}")
            if value.numel() <= 10:
                print(f"{indent_str}  内容: {value}")
            else:
                print(f"{indent_str}  前5个元素: {value.flatten()[:5]}")

        elif isinstance(value, (list, tuple)):
            print(f"{indent_str}  类型: {type(value).__name__}")
            print(f"{indent_str}  长度: {len(value)}")
            if len(value) <= 10:
                print(f"{indent_str}  内容: {value}")
            else:
                print(f"{indent_str}  前5个元素: {value[:5]}")

        elif isinstance(value, dict):
            print(f"{indent_str}  类型: dict")
            print(f"{indent_str}  键数量: {len(value)}")
            if indent < 2:
                print_dict_structure(value, indent + 1)
            else:
                print(f"{indent_str}  键: {list(value.keys())}")

        else:
            print(f"{indent_str}  类型: {type(value).__name__}")
            print(f"{indent_str}  值: {value}")

        print()

def print_masif_features_summary(pkl_file_path):
    """专门打印MaSIF相关特征的摘要"""
    try:
        with open(pkl_file_path, 'rb') as f:
            data = pickle.load(f)

        print("\n" + "=" * 80)
        print("MaSIF特征摘要:")
        print("=" * 80)

        masif_keys = []
        sample_data = data[0] if isinstance(data, list) and len(data) > 0 else data

        if isinstance(sample_data, dict):
            for key in sample_data.keys():
                if 'masif' in key.lower():
                    masif_keys.append(key)

        if masif_keys:
            print(f"发现 {len(masif_keys)} 个MaSIF相关特征:")
            for key in sorted(masif_keys):
                value = sample_data[key]
                if hasattr(value, 'shape'):
                    print(f"  {key}: {value.shape} ({type(value).__name__})")
                else:
                    print(f"  {key}: {type(value).__name__}")
        else:
            print("未发现MaSIF相关特征")

    except Exception as e:
        print(f"分析MaSIF特征时出错: {e}")

def main():
    if not PKL_FILE.exists():
        print(f"错误: 文件不存在: {PKL_FILE}")
        return 1

    print_pkl_structure(PKL_FILE)
    print_masif_features_summary(PKL_FILE)
    return 0

if __name__ == "__main__":
    exit(main())

PKL文件路径: /xcfhome/jzlu/For/czn/misato/part4/3JQG_frame95/output/3jqg_frame95_features_with_masif.pkl
数据类型: <class 'list'>
包含 1 个复合物

第一个复合物的键值对结构:
edge_index:
  类型: numpy.ndarray
  形状: (2, 960)
  数据类型: int64
  前5个元素: [0 4 1 0 1]

edge_feat:
  类型: numpy.ndarray
  形状: (960, 4)
  数据类型: float32
  前5个元素: [1.       0.       0.       1.339217 1.      ]

node_feat:
  类型: numpy.ndarray
  形状: (93, 9)
  数据类型: int64
  前5个元素: [5 0 3 5 0]

coords:
  类型: numpy.ndarray
  形状: (93, 3)
  数据类型: float64
  前5个元素: [73.07  54.306 51.865 71.724 54.561]

pro_name:
  类型: numpy.ndarray
  形状: (544,)
  数据类型: <U3
  前5个元素: ['N' 'CA' 'C' 'O' 'CB']

AA_name:
  类型: numpy.ndarray
  形状: (544,)
  数据类型: <U3
  前5个元素: ['THR' 'THR' 'THR' 'THR' 'THR']

smiles:
  类型: str
  值: COC1=CC=C(CSC2=NC(N)=NC(N)=C2)C=C1

rmsd:
  类型: float
  值: 3.348

gbscore:
  类型: numpy.ndarray
  形状: (400,)
  数据类型: float64
  前5个元素: [ 55.74593417  60.98127269  52.50335771 193.64677401  27.23606257]

pk:
  类型: float
  值: 4.74

pdbid:
  类型: str
  值: 3jqg_fr

: 