In [1]:
%load_ext autoreload
%autoreload 2

%cd /home/huayuchen/Neurl-voxel/funcmol/notebooks

/home/huayuchen/Neurl-voxel/funcmol/notebooks


In [2]:
import os
import sys
os.environ['CUDA_VISIBLE_DEVICES'] = "3"

import torch
import hydra
import numpy as np
import random
from pathlib import Path
from omegaconf import OmegaConf

# 设置 torch.compile 兼容性
try:
    import torch._dynamo
    torch._dynamo.config.suppress_errors = True
except ImportError:
    # PyTorch 版本 < 2.0 不支持 torch._dynamo
    print("Warning: torch._dynamo not available in this PyTorch version")

## set up environment
# 当前目录是 funcmol/notebooks，需要将 funcmol 的父目录添加到路径中
# 这样 funcmol 才能作为包被正确导入
notebook_dir = Path(os.getcwd())  # funcmol/notebooks
project_root = notebook_dir.parent.parent  # funcmol 的父目录
sys.path.insert(0, str(project_root))
print(f"Project root: {project_root}")
print(f"Python path: {sys.path[0]}")

from funcmol.dataset.dataset_field import create_gnf_converter, prepare_data_with_sample_idx
from funcmol.utils.utils_nf import load_neural_field
from funcmol.utils.utils_fm import load_checkpoint_fm
from funcmol.utils.constants import PADDING_INDEX
from funcmol.utils.gnf_visualizer import (
    visualize_1d_gradient_field_comparison, 
    GNFVisualizer,
    visualize_generated_molecule,
    create_visualization_callback,
    create_gif_from_frames
)
from funcmol.utils.misc import load_nf_config, load_funcmol_config, create_field_function
from funcmol.models.funcmol import FuncMol
    
seed = 1234
random.seed(seed)
torch.manual_seed(seed)
np.random.seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)

# 模型根目录
model_root = "/home/huayuchen/Neurl-voxel/exps/neural_field"
config = load_nf_config("train_nf_qm9")

Project root: /home/huayuchen/Neurl-voxel
Python path: /home/huayuchen/Neurl-voxel
Dataset directory: /home/huayuchen/Neurl-voxel/funcmol/dataset/data
Config loaded successfully: train_nf_qm9
n_iter from converter config: 600


In [3]:
##### SETTINGS #####
# TODO：手动指定是 gt_only、gt_pred 还是 denoiser_only 模式
option = 'gt_pred'  # 'gt_only', 'gt_pred', 'denoiser_only'

# TODO：手动指定 checkpoint 文件路径，会根据ckpt_path自动提取exp_name
nf_ckpt_path = '/home/huayuchen/Neurl-voxel/exps/neural_field/nf_qm9/20251121/lightning_logs/version_1/checkpoints/model-epoch=999.ckpt'
fm_ckpt_path = '/home/huayuchen/Neurl-voxel/exps/funcmol/fm_qm9/20251108/lightning_logs/version_1/checkpoints/model-epoch=364.ckpt'
# nf_ckpt_path = '/home/huayuchen/Neurl-voxel/exps/neural_field/nf_qm9/20250911/lightning_logs/version_1/checkpoints/model-epoch=39.ckpt'
# fm_ckpt_path = '/home/huayuchen/Neurl-voxel/exps/funcmol/fm_qm9/20250917/lightning_logs/version_22/checkpoints/model-epoch=144.ckpt'

# TODO：手动指定 sample_idx（仅用于 gt_only 和 gt_pred 模式）
sample_idx = 587  # 2,7,74,83,108,158,186,375,404,433
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# TODO：手动指定 codes 编号 （仅用于 denoiser_only 模式）
codes_source = 'load'  # 'load' 或 'sample' 
codes_idx = 501  # 例如：0 表示 code_0000_tanh.pt

In [4]:
if option == 'denoiser_only':
    # 对于 denoiser_only 模式，使用 FuncMol 的路径
    ckpt_parts = Path(fm_ckpt_path).parts
    funcmol_idx = ckpt_parts.index('funcmol')
    exp_name = f"{ckpt_parts[funcmol_idx + 1]}/{ckpt_parts[funcmol_idx + 2]}"  # fm_qm9/20250912
    ckpt_name = Path(fm_ckpt_path).stem  # funcmol-epoch=319
    model_dir = os.path.join("/home/huayuchen/Neurl-voxel/exps/funcmol", exp_name)
    output_dir = os.path.join(model_dir, ckpt_name)
    os.makedirs(output_dir, exist_ok=True)
    print(f"Option: {option}")
    print(f"FuncMol model directory: {model_dir}")
    print(f"FuncMol checkpoint: {ckpt_name}")
    print(f"Neural Field checkpoint: {nf_ckpt_path}")
    print(f"Output directory: {output_dir}")
else:
    # 对于 gt_only 和 gt_pred 模式，使用 Neural Field 的路径
    ckpt_parts = Path(nf_ckpt_path).parts
    neural_field_idx = ckpt_parts.index('neural_field')
    exp_name = f"{ckpt_parts[neural_field_idx + 1]}/{ckpt_parts[neural_field_idx + 2]}"  # nf_qm9/20250911
    ckpt_name = Path(nf_ckpt_path).stem  # model-epoch=39
    model_dir = os.path.join(model_root, exp_name)
    output_dir = os.path.join(model_dir, ckpt_name)
    os.makedirs(output_dir, exist_ok=True)
    batch, gt_coords, gt_types = prepare_data_with_sample_idx(config, device, sample_idx)
    print(f"Data loaded for sample {sample_idx}: {gt_coords.shape}, {gt_types.shape}")
    print(f"Option: {option}")
    print(f"Model directory: {model_dir}")
    print(f"Checkpoint: {ckpt_name}")
    print(f"Output directory: {output_dir}")

>> val set size: 20042
Data loaded for sample 587: torch.Size([1, 12, 3]), torch.Size([1, 12])
Option: gt_pred
Model directory: /home/huayuchen/Neurl-voxel/exps/neural_field/nf_qm9/20251121
Checkpoint: model-epoch=999
Output directory: /home/huayuchen/Neurl-voxel/exps/neural_field/nf_qm9/20251121/model-epoch=999


In [5]:
## Load model, generate or load codes
if option == 'denoiser_only':       
    print(f"Loading Neural Field model from: {nf_ckpt_path}")
    encoder, decoder = load_neural_field(nf_ckpt_path, config)
    # 确保模型在正确的设备上
    encoder = encoder.to(device)
    decoder = decoder.to(device)
    encoder.eval()
    decoder.eval()
    
    # 使用YAML配置文件加载FuncMol配置
    funcmol_config = load_funcmol_config("train_fm_qm9", config)
    
    # 创建FuncMol模型
    funcmol = FuncMol(funcmol_config)
    funcmol = funcmol.to(device)
    
    # 加载checkpoint并获取code_stats
    funcmol, code_stats = load_checkpoint_fm(funcmol, fm_ckpt_path)
    funcmol.eval()
    
    # 设置decoder的code_stats
    decoder.set_code_stats(code_stats)
    
    print(">> FuncMol model loaded successfully!")
    configs_dir = project_root / "funcmol" / "configs"
    with hydra.initialize_config_dir(config_dir=str(configs_dir), version_base=None):
        sample_fm_config = hydra.compose(config_name="sample_fm")
    
    # 转换为字典格式（与 sample_fm.py 第51行完全一致）
    config_dict = OmegaConf.to_container(sample_fm_config, resolve=True)
        
    # 创建 converter（与 sample_fm.py 第151行完全一致）
    converter = create_gnf_converter(config_dict)
    
    # 获取 codes 的维度信息
    grid_size = config_dict.get('dset', {}).get('grid_size', 9)  # 与 sample_fm.py 一致
    code_dim = config_dict.get('encoder', {}).get('code_dim', 128)  # 与 sample_fm.py 一致
    
    if codes_source == 'load':
        # 从保存的codes文件加载
        mol_save_dir = Path(model_dir) / "molecule"
        code_path = mol_save_dir / f"code_{codes_idx:04d}_tanh.pt"
        print(f"根据索引自动构建路径:")
        print(f"  mol_save_dir: {mol_save_dir}")
        print(f"  文件名格式: code_{codes_idx:04d}_tanh.pt")
        print(f"\n最终使用的codes路径: {code_path}")
        
        if not code_path.exists():
            raise FileNotFoundError(
                f"Codes file not found: {code_path}\n"
                f"Please check if the file exists or run sample_fm.py first to generate codes."
            )
        
        print(f"Loading codes from: {code_path}")
        codes = torch.load(code_path, map_location=device)
        # 确保codes的形状正确 [1, grid_size^3, code_dim]
        if codes.dim() == 2:
            # 如果是 [grid_size^3, code_dim]，添加batch维度
            codes = codes.unsqueeze(0)
        print(f"Loaded codes shape: {codes.shape}")
        
    else:
        # 随机采样 codes
        print("Sampling codes using DDPM...")
        with torch.no_grad():
            codes = funcmol.sample_ddpm(shape=(1, grid_size**3, code_dim), progress=False)
        print(f"Sampled codes shape: {codes.shape}")

    # 使用统一的场计算函数（ddpm模式，使用已加载/采样的codes）
    field_func = create_field_function(
        mode='ddpm',
        decoder=decoder,
        codes=codes
    )
    print("Codes loaded/sampled and field_func set.")
    
    
elif option == 'gt_pred':
    # 使用手动指定的 checkpoint 文件路径
    if not os.path.exists(nf_ckpt_path):
        raise FileNotFoundError(f"Checkpoint file not found: {nf_ckpt_path}")
    
    print(f"Loading model from: {nf_ckpt_path}")
    encoder, decoder = load_neural_field(nf_ckpt_path, config)
    
    # 确保模型在正确的设备上
    encoder = encoder.to(device)
    decoder = decoder.to(device)
    encoder.eval()
    decoder.eval()
    
    # 生成 codes
    print(f"Batch device: {batch.pos.device}")
    print(f"Encoder device: {next(encoder.parameters()).device}")
    print(f"Batch size (number of graphs): {batch.num_graphs}")
    with torch.no_grad():
        codes = encoder(batch)
    print(f"Codes shape: {codes.shape}")
    
    # 使用统一的场计算函数
    field_func = create_field_function(
        mode='predicted',
        decoder=decoder,
        codes=codes
    )
else:  # gt only
    encoder, decoder = None, None
    codes = None

converter = create_gnf_converter(config)

# 打印 converter 的关键参数
print(f"\n=== Converter 参数 ===")
print(f"step_size: {converter.step_size}")
print(f"eps: {converter.eps}")
print(f"min_samples: {converter.min_samples}")
# field 参数
print(f"field_variance_k_neighbors: {converter.field_variance_k_neighbors}")
print(f"field_variance_weight: {converter.field_variance_weight}")

# 创建场函数（在converter定义之后）
if option == 'gt_only':
    field_func = create_field_function(
        mode='gt',
        converter=converter,
        gt_coords=gt_coords,
        gt_types=gt_types
    )
print(f"Model loaded successfully!")

Loading model from: /home/huayuchen/Neurl-voxel/exps/neural_field/nf_qm9/20251121/lightning_logs/version_1/checkpoints/model-epoch=999.ckpt
Loading Lightning checkpoint from: /home/huayuchen/Neurl-voxel/exps/neural_field/nf_qm9/20251121/lightning_logs/version_1/checkpoints/model-epoch=999.ckpt
>> loaded dec
>> loaded enc
Model loaded successfully!
Batch device: cuda:0
Encoder device: cuda:0
Batch size (number of graphs): 1
Codes shape: torch.Size([1, 729, 128])

=== Converter 参数 ===
step_size: 0.04
eps: 0.05
min_samples: 20
field_variance_k_neighbors: 10
field_variance_weight: 0.01
Model loaded successfully!


In [6]:
if option != 'denoiser_only':
    # 可视化一维梯度场对比（所有原子类型）
    atom_types = [0, 1, 2, 3, 4]  # C, H, O, N, F
    save_path = os.path.join(output_dir, f"field1d_sample_{sample_idx}")

    gradient_results = visualize_1d_gradient_field_comparison(
        gt_coords=gt_coords,
        gt_types=gt_types,
        converter=converter,
        field_func=field_func,
        sample_idx=0,  # 数据中只有1个样本，所以用索引0
        atom_types=atom_types,  # 传入列表，不需要循环
        x_range=None,
        y_coord=0.0,
        z_coord=0.0,
        save_path=save_path,
        display_sample_idx=sample_idx,  # 用于文件名和显示的原始样本索引
    )

    if gradient_results:
        print(f"Gradient field comparison (model: {model_dir}):")
        print(f"  Available atom types: {gradient_results['available_atom_types']}")
        
        # 打印每个原子类型的统计信息
        for atom_name, stats in gradient_results['all_results'].items():
            print(f"  {atom_name}: MSE={stats['mse']:.6f}, MAE={stats['mae']:.6f}")
            print(f"    Saved to: {stats['save_path']}")

elif option == 'denoiser_only':
    # 可视化denoiser生成的codes对应的梯度场在1维上的变化曲线
    print("\n=== 可视化1D梯度场（仅预测） ===")
    
    # 使用统一的场计算函数
    atom_types = [0, 1, 2, 3, 4]  # C, H, O, N, F
    save_path = os.path.join(output_dir, f"field1d_gen_sample_0")
    
    # 调用修改后的函数，不传入gt_coords和gt_types，只绘制预测的梯度场
    gradient_results = visualize_1d_gradient_field_comparison(
        gt_coords=None,  # 无ground truth
        gt_types=None,   # 无ground truth
        converter=None,  # 无ground truth时converter可以为None
        field_func=field_func,
        sample_idx=0,
        atom_types=atom_types,
        x_range=None,  # 使用默认范围(-5.0, 5.0)
        y_coord=0.0,
        z_coord=0.0,
        save_path=save_path,
        display_sample_idx=0,
    )
    
    if gradient_results:
        print(f"Gradient field visualization (generation mode):")
        print(f"  Available atom types: {gradient_results['available_atom_types']}")
        
        # 打印每个原子类型的统计信息
        for atom_name, stats in gradient_results['all_results'].items():
            print(f"  {atom_name}:")
            print(f"    Magnitude: Mean={stats.get('magnitude_mean', 'N/A'):.6f}, Std={stats.get('magnitude_std', 'N/A'):.6f}")
            print(f"    Saved to: {stats['save_path']}")

警告：样本 587 中没有类型为 O 的原子
警告：样本 587 中没有类型为 F 的原子
自动计算 x 轴范围: (-3.0959600448608398, 3.0959600448608398)
Field 1D comparison (atom_type=C) saved to: /home/huayuchen/Neurl-voxel/exps/neural_field/nf_qm9/20251121/model-epoch=999/field_1d_sample_587_atom_C.png
Field 1D comparison (atom_type=H) saved to: /home/huayuchen/Neurl-voxel/exps/neural_field/nf_qm9/20251121/model-epoch=999/field_1d_sample_587_atom_H.png
Field 1D comparison (atom_type=N) saved to: /home/huayuchen/Neurl-voxel/exps/neural_field/nf_qm9/20251121/model-epoch=999/field_1d_sample_587_atom_N.png
Gradient field comparison (model: /home/huayuchen/Neurl-voxel/exps/neural_field/nf_qm9/20251121):
  Available atom types: [0, 1, 3]
  C: MSE=0.000262, MAE=0.011883
    Saved to: /home/huayuchen/Neurl-voxel/exps/neural_field/nf_qm9/20251121/model-epoch=999/field_1d_sample_587_atom_C.png
  H: MSE=0.000196, MAE=0.008832
    Saved to: /home/huayuchen/Neurl-voxel/exps/neural_field/nf_qm9/20251121/model-epoch=999/field_1d_sample_587_atom_H.png

In [7]:
if option == 'denoiser_only':
    # 对于 denoiser_only 模式，使用DDPM采样得到固定codes并可视化
    print("\n=== 执行 DDPM 分子生成 ===")

    grid_size = config.dset.grid_size
    code_dim = config.encoder.code_dim

    # 使用gnf_visualizer中的函数创建可视化回调
    visualization_callback, frame_paths, fixed_axis_limits_dict = create_visualization_callback(
        output_dir=output_dir,
        frame_prefix="frame_gen_sample_0",
        codes_device=codes.device,
        n_atom_types=5
    )

    # 使用上一个单元已加载的 codes，重建分子，使用可视化
    print("Generating molecular field and reconstructing molecule (loaded codes)...")
    save_interval = 100
    recon_coords, recon_types = converter.gnf2mol(
        decoder,
        codes,
        save_interval=save_interval,
        visualization_callback=visualization_callback
    )
    
    print(f"Generated molecule: {recon_coords[0].shape[0]} atoms")
    print(f"Atom types: {recon_types[0].unique().tolist()}")

    # 创建 GIF 动画（使用gnf_visualizer中的函数）
    print("Creating generation process animation from saved frames...")
    gif_path = os.path.join(output_dir, f"funcmol_gen_sample_0.gif")
    create_gif_from_frames(
        frame_paths=frame_paths,
        gif_path=gif_path,
        duration=0.1,
        fps=15,
        loop=1,
        cleanup_frames=True
    )

    # 保存最终生成的分子
    final_path = os.path.join(output_dir, f"funcmol_gen_sample_0_final.png")
    # 过滤掉填充的原子（类型为-1的原子）
    valid_mask = recon_types[0] != -1
    if valid_mask.any():
        final_coords_valid = recon_coords[0][valid_mask]
        final_types_valid = recon_types[0][valid_mask]
        visualize_generated_molecule(
            final_coords_valid, final_types_valid, save_path=final_path
        )
    else:
        print("Warning: No valid atoms generated")

    print(f"\n=== DDPM Field 生成结果 ===")
    print(f"Generated atoms: {recon_coords[0].shape[0]}")
    print(f"Atom type distribution: {dict(zip(*torch.unique(recon_types[0], return_counts=True)))}")
    print(f"最终分子图: {final_path}")
    print(f"生成过程动画: {gif_path}")
    
else:
    # 根据option设置重建列表
    if option == 'gt_only':
        rec_list = ['gt_field']
    else:
        rec_list = ['predicted_field', 'gt_field']

    # 创建可视化器
    visualizer = GNFVisualizer(output_dir)

    # 为每种重建类型执行可视化
    for rec_type in rec_list:
        print(f"\n=== 执行 {rec_type} 重建 ===")
        
        # 根据重建类型设置decoder和codes（使用与gnf2mol相同的方法）
        if rec_type == 'gt_field':
            # 对于gt_field模式，创建dummy decoder和codes（与field_recon.py一致）
            grid_size = config.dset.grid_size
            code_dim = config.encoder.code_dim
            dummy_codes = torch.randn(1, grid_size**3, code_dim, device=gt_coords.device)
            
            # 创建dummy decoder，返回ground truth field
            class DummyDecoder:
                def __init__(self, converter, gt_coords, gt_types):
                    self.converter = converter
                    self.gt_coords = gt_coords
                    self.gt_types = gt_types
                
                def __call__(self, query_points, codes):
                    return self.converter.mol2gnf(
                        self.gt_coords.unsqueeze(0), 
                        self.gt_types.unsqueeze(0), 
                        query_points
                    )
            
            # 过滤掉padding的原子
            gt_mask = (gt_types[0] != PADDING_INDEX)
            gt_valid_coords_for_decoder = gt_coords[0][gt_mask]
            gt_valid_types_for_decoder = gt_types[0][gt_mask]
            
            dummy_decoder = DummyDecoder(converter, gt_valid_coords_for_decoder, gt_valid_types_for_decoder)
            rec_decoder = dummy_decoder
            rec_codes = dummy_codes
        else:  # predicted_field
            # 对于predicted_field模式，直接使用decoder和codes
            rec_decoder = decoder
            rec_codes = codes
        
        # 执行重建可视化（使用gnf2mol方法，与field_recon.py完全一致）
        results = visualizer.create_reconstruction_animation(
            gt_coords=gt_coords,
            gt_types=gt_types,
            converter=converter,
            decoder=rec_decoder,
            codes=rec_codes,
            save_interval=100,
            animation_name=f"recon_sample_{sample_idx}_{rec_type}",
            sample_idx=0
        )

        print(f"\n=== {rec_type} 重建结果 ===")
        print(f"RMSD: {results['final_rmsd']:.4f}")
        print(f"Reconstruction Loss: {results['final_loss']:.4f}")
        print(f"KL Divergence (orig->recon): {results['final_kl_1to2']:.4f}")
        print(f"KL Divergence (recon->orig): {results['final_kl_2to1']:.4f}")
        print(f"GIF动画: {results['gif_path']}")
        print(f"对比图: {results['comparison_path']}")


=== 执行 predicted_field 重建 ===

Starting reconstruction for molecule 0
Ground truth atoms: 12
[DBSCAN] Total points: 6000, Clusters found: 2, Noise points: 123
[DBSCAN] Total points: 1000, Clusters found: 5, Noise points: 3
[DBSCAN] Total points: 200, Clusters found: 0, Noise points: 200
[DBSCAN] Total points: 200, Clusters found: 4, Noise points: 7
[DBSCAN] Total points: 100, Clusters found: 0, Noise points: 100

=== predicted_field 重建结果 ===
RMSD: 0.2527
Reconstruction Loss: 0.6198
KL Divergence (orig->recon): 8.9214
KL Divergence (recon->orig): -2.4932
GIF动画: /home/huayuchen/Neurl-voxel/exps/neural_field/nf_qm9/20251121/model-epoch=999/recon/recon_sample_587_predicted_field.gif
对比图: /home/huayuchen/Neurl-voxel/exps/neural_field/nf_qm9/20251121/model-epoch=999/recon/recon_sample_587_predicted_field_final.png

=== 执行 gt_field 重建 ===

Starting reconstruction for molecule 0
Ground truth atoms: 12
[DBSCAN] Total points: 6000, Clusters found: 2, Noise points: 0
[DBSCAN] Total points: 1000,