In [12]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
import os
import sys
import torch
from pathlib import Path
from lightning import Fabric

# 设置 torch.compile 兼容性
try:
    import torch._dynamo
    torch._dynamo.config.suppress_errors = True
except ImportError:
    # PyTorch 版本 < 2.0 不支持 torch._dynamo
    print("Warning: torch._dynamo not available in this PyTorch version")

## set up environment
project_root = Path(os.getcwd()).parent
sys.path.insert(0, str(project_root))

from dataset.dataset_field import create_gnf_converter, prepare_data_with_sample_idx
from funcmol.utils.utils_nf import load_neural_field
from funcmol.utils.utils_fm import load_checkpoint_fm
from funcmol.utils.constants import PADDING_INDEX
from funcmol.utils.gnf_visualizer import (
    visualize_1d_gradient_field_comparison,
    GNFVisualizer
)
from funcmol.utils.misc import load_nf_config, load_funcmol_config

# 模型根目录
model_root = "/datapool/data3/storage/pengxingang/pxg/hyc/funcmol-main-neuralfield/exps/neural_field"

In [14]:
# TODO：手动指定是 gt_only、gt_pred 还是 denoiser_only 模式
option = 'denoiser_only'  # 'gt_only', 'gt_pred', 'denoiser_only'

# TODO：手动指定 checkpoint 文件路径，会根据ckpt_path自动提取exp_name
nf_ckpt_path = '/datapool/data3/storage/pengxingang/pxg/hyc/funcmol-main-neuralfield/exps/neural_field/nf_qm9/20250911/lightning_logs/version_1/checkpoints/model-epoch=39.ckpt'
fm_ckpt_path = '/datapool/data2/home/pxg/data/hyc/funcmol-main-neuralfield/exps/funcmol/fm_qm9/20250929/lightning_logs/version_2/checkpoints/model-epoch=16004.ckpt'

# TODO：手动指定 sample_idx（仅用于 gt_only 和 gt_pred 模式）
sample_idx = 4000
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

if option == 'denoiser_only':
    # 对于 denoiser_only 模式，使用 FuncMol 的路径
    ckpt_parts = Path(fm_ckpt_path).parts
    funcmol_idx = ckpt_parts.index('funcmol')
    exp_name = f"{ckpt_parts[funcmol_idx + 1]}/{ckpt_parts[funcmol_idx + 2]}"  # fm_qm9/20250912
    ckpt_name = Path(fm_ckpt_path).stem  # funcmol-epoch=319
    model_dir = os.path.join("/datapool/data3/storage/pengxingang/pxg/hyc/funcmol-main-neuralfield/exps/funcmol", exp_name)
    output_dir = os.path.join(model_dir, ckpt_name)
    os.makedirs(output_dir, exist_ok=True)
    print(f"Option: {option}")
    print(f"FuncMol model directory: {model_dir}")
    print(f"FuncMol checkpoint: {ckpt_name}")
    print(f"Neural Field checkpoint: {nf_ckpt_path}")
    print(f"Output directory: {output_dir}")
else:
    # 对于 gt_only 和 gt_pred 模式，使用 Neural Field 的路径
    ckpt_parts = Path(nf_ckpt_path).parts
    neural_field_idx = ckpt_parts.index('neural_field')
    exp_name = f"{ckpt_parts[neural_field_idx + 1]}/{ckpt_parts[neural_field_idx + 2]}"  # nf_qm9/20250911
    ckpt_name = Path(nf_ckpt_path).stem  # model-epoch=39
    model_dir = os.path.join(model_root, exp_name)
    output_dir = os.path.join(model_dir, ckpt_name)
    os.makedirs(output_dir, exist_ok=True)
    print(f"Option: {option}")
    print(f"Model directory: {model_dir}")
    print(f"Checkpoint: {ckpt_name}")
    print(f"Output directory: {output_dir}")

Option: denoiser_only
FuncMol model directory: /datapool/data3/storage/pengxingang/pxg/hyc/funcmol-main-neuralfield/exps/funcmol/fm_qm9/20250929
FuncMol checkpoint: model-epoch=16004
Neural Field checkpoint: /datapool/data3/storage/pengxingang/pxg/hyc/funcmol-main-neuralfield/exps/neural_field/nf_qm9/20250911/lightning_logs/version_1/checkpoints/model-epoch=39.ckpt
Output directory: /datapool/data3/storage/pengxingang/pxg/hyc/funcmol-main-neuralfield/exps/funcmol/fm_qm9/20250929/model-epoch=16004


In [15]:
## Load data
fabric = Fabric(
    accelerator="auto",
    devices=1,
    precision="32-true",
    strategy="auto"
)
fabric.launch()

# 使用 load_nf_config 函数从 configs 目录加载配置
config = load_nf_config("train_nf_qm9")

if option == 'denoiser_only':
    # 对于 denoiser_only 模式，不需要加载数据集
    batch, gt_coords, gt_types = None, None, None
    print("Denoiser-only mode: No dataset loading required")
else:
    # 准备包含特定样本的数据
    batch, gt_coords, gt_types = prepare_data_with_sample_idx(fabric, config, device, sample_idx)
    print(f"Data loaded for sample {sample_idx}: {gt_coords.shape}, {gt_types.shape}")

Dataset directory: /datapool/data2/home/pxg/data/hyc/funcmol-main-neuralfield/funcmol/dataset/data
Config loaded successfully: train_nf_qm9
n_iter from converter config: 1000
Denoiser-only mode: No dataset loading required


In [None]:
# 修复后的模型加载代码 - 使用YAML配置文件加载参数
print(f"\nProcessing model from: {model_dir}")

## Load model
if option == 'denoiser_only':
    # 加载 Neural Field 模型和 FuncMol 模型
    from funcmol.models.funcmol import FuncMol
    
    # 加载 Neural Field 模型
    print(f"Loading Neural Field model from: {nf_ckpt_path}")
    encoder, decoder = load_neural_field(nf_ckpt_path, fabric, config)
    encoder.eval()
    decoder.eval()
    
    # 使用YAML配置文件加载FuncMol配置
    funcmol_config = load_funcmol_config("train_fm_qm9", config)
    
    # 创建FuncMol模型
    funcmol = FuncMol(funcmol_config, fabric)
    funcmol = funcmol.cuda()
    
    # 加载checkpoint
    funcmol, _ = load_checkpoint_fm(funcmol, fm_ckpt_path, fabric=fabric)
    funcmol.eval()
    
    print(">> FuncMol model loaded successfully!")
    
    # 定义 denoiser 场函数
    def denoiser_field_func(points):
        # 生成随机噪声代码
        grid_size = config.dset.grid_size
        code_dim = config.encoder.code_dim
        batch_size = 1
        
        # 创建随机噪声代码
        noise_codes = torch.randn(batch_size, grid_size**3, code_dim, device=points.device)
        
        # 通过 denoiser 生成分子代码
        with torch.no_grad():
            denoised_codes = funcmol(noise_codes)
        
        # 使用 decoder 生成场
        if points.dim() == 2:  # [n_points, 3]
            points = points.unsqueeze(0)  # [1, n_points, 3]
        elif points.dim() == 3:  # [batch, n_points, 3]
            pass
        else:
            raise ValueError(f"Unexpected points shape: {points.shape}")
        
        result = decoder(points, denoised_codes[0:1])
        if result.dim() == 4:  # [batch, n_points, n_atom_types, 3]
            return result[0]  # 取第一个batch
        else:
            return result
    
    field_func = denoiser_field_func
    codes = None  # denoiser 模式不需要预计算的 codes
    
elif option == 'gt_pred':
    # 使用手动指定的 checkpoint 文件路径
    if not os.path.exists(nf_ckpt_path):
        raise FileNotFoundError(f"Checkpoint file not found: {nf_ckpt_path}")
    
    print(f"Loading model from: {nf_ckpt_path}")
    encoder, decoder = load_neural_field(nf_ckpt_path, fabric, config)
    
    encoder.eval()
    decoder.eval()
    
    # 生成 codes
    print(f"Batch device: {batch.pos.device}")
    print(f"Encoder device: {next(encoder.parameters()).device}")
    with torch.no_grad():
        codes = encoder(batch)
    # 定义预测场函数
    def predicted_field_func(points):
        # 确保 points 是正确的形状
        if points.dim() == 2:  # [n_points, 3]
            points = points.unsqueeze(0)  # [1, n_points, 3]
        elif points.dim() == 3:  # [batch, n_points, 3]
            pass
        else:
            raise ValueError(f"Unexpected points shape: {points.shape}")
        
        result = decoder(points, codes[0:1])  # 现在codes只有1个样本，所以用索引0
        # 确保返回 [n_points, n_atom_types, 3] 形状
        if result.dim() == 4:  # [batch, n_points, n_atom_types, 3]
            return result[0]  # 取第一个batch
        else:
            return result
    field_func = predicted_field_func
else:  # gt only
    encoder, decoder = None, None
    # 定义真实场函数
    def gt_field_func(points):
        gt_mask = (gt_types[0] != PADDING_INDEX)  # 现在只有1个样本，所以用索引0
        gt_valid_coords = gt_coords[0][gt_mask]
        gt_valid_types = gt_types[0][gt_mask]
        
        # 确保 points 是正确的形状
        if points.dim() == 2:  # [n_points, 3]
            points = points.unsqueeze(0)  # [1, n_points, 3]
        elif points.dim() == 3:  # [batch, n_points, 3]
            pass
        else:
            raise ValueError(f"Unexpected points shape: {points.shape}")
        
        result = converter.mol2gnf(
            gt_valid_coords.unsqueeze(0),
            gt_valid_types.unsqueeze(0),
            points
        )
        # 确保返回 [n_points, n_atom_types, 3] 形状
        if result.dim() == 4:  # [batch, n_points, n_atom_types, 3]
            return result[0]  # 取第一个batch
        else:
            return result
    field_func = gt_field_func
    codes = None

converter = create_gnf_converter(config)
print(f"Model loaded successfully!")



Processing model from: /datapool/data3/storage/pengxingang/pxg/hyc/funcmol-main-neuralfield/exps/funcmol/fm_qm9/20250929
Loading Neural Field model from: /datapool/data3/storage/pengxingang/pxg/hyc/funcmol-main-neuralfield/exps/neural_field/nf_qm9/20250911/lightning_logs/version_1/checkpoints/model-epoch=39.ckpt
Loading Lightning checkpoint from: /datapool/data3/storage/pengxingang/pxg/hyc/funcmol-main-neuralfield/exps/neural_field/nf_qm9/20250911/lightning_logs/version_1/checkpoints/model-epoch=39.ckpt
>> loaded dec
>> loaded enc
Model loaded successfully!
Loading configuration from YAML: train_fm_qm9
>> Using diffusion_method: new
>> DDPM config: {'hidden_dim': 128, 'num_layers': 4, 'time_emb_dim': 64, 'dropout': 0.1, 'beta_start': 0.0001, 'beta_end': 0.02, 'num_timesteps': 1000, 'schedule': 'linear', 's1': 0.008, 'sT': 0.008, 'w': 1.0}
>> loaded denoiser
>> loaded model trained for 16004 epochs
>> FuncMol model loaded successfully!
GNF Converter created with n_iter: 1000, gradient_

In [None]:
# 使用DDPM(new)方式预采样固定codes并设置场函数
if option == 'denoiser_only':
    from funcmol.models.funcmol import FuncMol
    from funcmol.utils.utils_fm import load_checkpoint_fm
    from omegaconf import OmegaConf

    # 确保与当前NF配置一致（保持decoder/encoder/dset对齐）
    funcmol_config["encoder"] = OmegaConf.to_container(config.encoder, resolve=True)
    funcmol_config["decoder"] = OmegaConf.to_container(config.decoder, resolve=True)
    funcmol_config["dset"] = OmegaConf.to_container(config.dset, resolve=True)

    funcmol_ddpm = FuncMol(funcmol_config, fabric)
    funcmol_ddpm = funcmol_ddpm.cuda()
    funcmol_ddpm, code_stats = load_checkpoint_fm(funcmol_ddpm, fm_ckpt_path, fabric=fabric)
    funcmol_ddpm.eval()

    # 同步 code_stats 到 decoder，保证数值尺度一致
    try:
        decoder.set_code_stats(code_stats)
    except Exception:
        pass

    # 使用与 sample_fm.py 完全一致的 converter 配置方式
    
    # 由于 sample_fm.yaml 使用 Hydra defaults，我们需要手动构建完整配置
    # 加载 converter 配置
    converter_cfg = OmegaConf.load(str(project_root / 'configs' / 'converter' / 'gnf_converter_qm9.yaml'))
    converter_config = OmegaConf.to_container(converter_cfg, resolve=True)
    
    # 关键：动态设置 gradient_field_method，与 sample_fm.py 保持一致
    field_method = 'tanh'  # 与 sample_fm.py 中的 field_methods = ['tanh'] 一致
    converter_config['gradient_field_method'] = field_method
    
    # 构建与 sample_fm.py 完全相同的配置结构
    method_config = {
        'converter': converter_config,
        'dset': OmegaConf.to_container(config.dset, resolve=True),
        'encoder': OmegaConf.to_container(config.encoder, resolve=True),
        'decoder': OmegaConf.to_container(config.decoder, resolve=True)
    }
    
    converter = create_gnf_converter(method_config)

    # 采样 codes，grid/code_dim 与 sample_fm.py 完全一致
    grid_size = method_config.get('dset', {}).get('grid_size', 9)  # 与 sample_fm.py 一致
    code_dim = method_config.get('encoder', {}).get('code_dim', 128)  # 与 sample_fm.py 一致
    with torch.no_grad():
        codes = funcmol_ddpm.sample_ddpm(shape=(1, grid_size**3, code_dim), progress=False)

    # 使用固定codes定义场函数
    def ddpm_field_func(points):
        if points.dim() == 2:
            points = points.unsqueeze(0)
        elif points.dim() == 3:
            pass
        else:
            raise ValueError(f"Unexpected points shape: {points.shape}")
        result = decoder(points, codes[0:1])
        return result[0] if result.dim() == 4 else result

    field_func = ddpm_field_func
    print("DDPM fixed codes sampled and field_func set.")

FileNotFoundError: [Errno 2] No such file or directory: '/datapool/data3/storage/pengxingang/pxg/hyc/funcmol-main-neuralfield/funcmol/configs/ddpm_config.yaml'

In [None]:
if option != 'denoiser_only':
    # 可视化一维梯度场对比（所有原子类型）
    atom_types = [0, 1, 2, 3, 4]  # C, H, O, N, F
    save_path = os.path.join(output_dir, f"field1d_sample_{sample_idx}")

    gradient_results = visualize_1d_gradient_field_comparison(
        gt_coords=gt_coords,
        gt_types=gt_types,
        converter=converter,
        field_func=field_func,
        sample_idx=0,  # 数据中只有1个样本，所以用索引0
        atom_types=atom_types,  # 传入列表，不需要循环
        x_range=None,
        y_coord=0.0,
        z_coord=0.0,
        save_path=save_path,  # save_path已经包含了正确的sample_idx (14441)
        display_sample_idx=sample_idx,  # 用于文件名和显示的原始样本索引
    )

    if gradient_results:
        print(f"Gradient field comparison (model: {model_dir}):")
        print(f"  Available atom types: {gradient_results['available_atom_types']}")
        
        # 打印每个原子类型的统计信息
        for atom_name, stats in gradient_results['all_results'].items():
            print(f"  {atom_name}: MSE={stats['mse']:.6f}, MAE={stats['mae']:.6f}")
            print(f"    Saved to: {stats['save_path']}")
else:
    print("Denoiser-only mode: Skipping gradient field comparison")

Denoiser-only mode: Skipping gradient field comparison


In [None]:
if option == 'denoiser_only':
    # 对于 denoiser_only 模式，使用DDPM采样得到固定codes并可视化
    print("\n=== 执行 DDPM(new) 分子生成 ===")

    grid_size = config.dset.grid_size
    code_dim = config.encoder.code_dim

    # 使用上一个单元已采样的固定 codes
    print("Generating molecular field and reconstructing molecule (fixed DDPM codes)...")
    recon_coords, recon_types = converter.gnf2mol(
        decoder,
        codes,
        fabric=fabric
    )

    print(f"Generated molecule: {recon_coords[0].shape[0]} atoms")
    print(f"Atom types: {recon_types[0].unique().tolist()}")

    # 创建生成过程动画（固定 codes，避免每帧变化）
    print("Creating generation process animation with fixed DDPM codes...")
    visualizer = GNFVisualizer(output_dir)

    def fixed_generation_field_func(points):
        if points.dim() == 2:
            points = points.unsqueeze(0)
        elif points.dim() == 3:
            pass
        else:
            raise ValueError(f"Unexpected points shape: {points.shape}")
        result = decoder(points, codes[0:1])
        return result[0] if result.dim() == 4 else result

    results = visualizer.create_generation_animation(
        converter=converter,
        field_func=fixed_generation_field_func,
        sample_idx=0,
        save_interval=100,
        create_1d_plots=False,
        use_recon_dir=False,
        fixed_axis=True,
        use_intelligent_sampling=True,
        decoder=decoder,
        codes=codes,
        fabric=fabric
    )

    print(f"\n=== DDPM Field 生成结果 ===")
    print(f"Generated atoms: {recon_coords[0].shape[0]}")
    print(f"Atom type distribution: {dict(zip(*torch.unique(recon_types[0], return_counts=True)))}")
    print(f"最终分子图: {results['final_path']}")
    print(f"最终分子图: {results['final_path']}")
    print(f"生成过程动画: {results['gif_path']}")
    
else:
    # 根据option设置重建列表
    if option == 'gt_only':
        rec_list = ['gt_field']
    else:
        rec_list = ['predicted_field', 'gt_field']

    # 创建可视化器
    visualizer = GNFVisualizer(output_dir)

    # 为每种重建类型执行可视化
    for rec_type in rec_list:
        print(f"\n=== 执行 {rec_type} 重建 ===")
        
        # 根据重建类型设置场函数
        if rec_type == 'gt_field':
            # 定义真实场函数
            def gt_field_func(points):
                gt_mask = (gt_types[0] != PADDING_INDEX)  # 现在只有1个样本，所以用索引0
                gt_valid_coords = gt_coords[0][gt_mask]
                gt_valid_types = gt_types[0][gt_mask]
                return converter.mol2gnf(
                    gt_valid_coords.unsqueeze(0),
                    gt_valid_types.unsqueeze(0),
                    points
                )
            field_func = gt_field_func
        else:  # predicted_field
            # 定义预测场函数
            def predicted_field_func(points):
                if points.dim() == 2:
                    points = points.unsqueeze(0)
                elif points.dim() == 3:
                    pass
                else:
                    raise ValueError(f"Unexpected points shape: {points.shape}")
                result = decoder(points, codes[0:1])  # 现在codes只有1个样本，所以用索引0
                return result[0] if result.dim() == 4 else result
            field_func = predicted_field_func
        
        # 执行重建可视化
        results = visualizer.create_reconstruction_animation(
            gt_coords=gt_coords,
            gt_types=gt_types,
            converter=converter,
            field_func=field_func,
            save_interval=100,
            animation_name=f"recon_sample_{sample_idx}_{rec_type}",
            sample_idx=sample_idx
        )

        print(f"\n=== {rec_type} 重建结果 ===")
        print(f"RMSD: {results['final_rmsd']:.4f}")
        print(f"Reconstruction Loss: {results['final_loss']:.4f}")
        print(f"KL Divergence (orig->recon): {results['final_kl_1to2']:.4f}")
        print(f"KL Divergence (recon->orig): {results['final_kl_2to1']:.4f}")
        print(f"GIF动画: {results['gif_path']}")
        print(f"对比图: {results['comparison_path']}")


=== 执行 DDPM(new) 分子生成 ===
Generating molecular field and reconstructing molecule (fixed DDPM codes)...
>>     Memory status at iteration 0: Allocated=0.02GB, Reserved=0.39GB
>>     Memory status at iteration 50: Allocated=0.02GB, Reserved=0.39GB
>>     Memory status at iteration 100: Allocated=0.02GB, Reserved=0.39GB
>>     Memory status at iteration 150: Allocated=0.02GB, Reserved=0.39GB
>>     Memory status at iteration 200: Allocated=0.02GB, Reserved=0.39GB
>>     Memory status at iteration 250: Allocated=0.02GB, Reserved=0.39GB
>>     Memory status at iteration 300: Allocated=0.02GB, Reserved=0.39GB
>>     Memory status at iteration 350: Allocated=0.02GB, Reserved=0.39GB
>>     Memory status at iteration 400: Allocated=0.02GB, Reserved=0.39GB
>>     Memory status at iteration 450: Allocated=0.02GB, Reserved=0.39GB
>>     Memory status at iteration 500: Allocated=0.02GB, Reserved=0.39GB
>>     Memory status at iteration 550: Allocated=0.02GB, Reserved=0.39GB
>>     Memory status at