In [1]:
import pandas as pd
from Bio.PDB import PDBParser
import numpy as np
from rfdiff.chemical import num2aa, aa_321
import torch

def are_collinear(p1, p2, p3, tol=1e-3):
    """Checks if three points are collinear."""
    if p1 is None or p2 is None or p3 is None:
        return False, "One or more points are missing"
    v1 = p2 - p1
    v2 = p3 - p1
    cross_product = np.cross(v1, v2)
    norm = np.linalg.norm(cross_product)
    if norm < tol:
        return True, f"Collinear, cross product norm: {norm:.4f}"
    return False, f"Not collinear, cross product norm: {norm:.4f}"

# --- Configuration ---
pdb_file = "/home/fit/lulei/WORK/xjt/Protein_design/BondFlow/BondFlow/tests/output_new.pdb"
link_csv_path = "/home/fit/lulei/WORK/xjt/Protein_design/BondFlow/BondFlow/config/link.csv"
bonds_in_batch = []

# --- Main script ---
print(f"Analyzing {pdb_file} for collinear atoms...")
link_df = pd.read_csv(link_csv_path)
parser = PDBParser(QUIET=True)
try:
    structure = parser.get_structure("generated", pdb_file)
    model = structure[0]
except FileNotFoundError:
    print(f"Error: PDB file not found at {pdb_file}. Please ensure model_forward() has been run.")
    # Exit if PDB doesn't exist.
    exit()


# Populate `bonds_in_batch` from the `batch_data` variable if it exists.
if 'batch_data' in locals() and not bonds_in_batch:
    print("Found 'batch_data', extracting bond information...")
    bond_matrix = batch_data['full_bond_matrix'].cpu()
    seq_target = batch_data['full_seq'].cpu()
    B, L = seq_target.shape
    # Exclude diagonal
    eye = torch.eye(L).repeat(B, 1, 1)
    bond_indices = torch.where((bond_matrix == 1) & (eye == 0))

    for b, i, j in zip(bond_indices[0], bond_indices[1], bond_indices[2]):
        res_name_i = aa_321.get(num2aa[seq_target[b, i].item()], 'UNK')
        res_name_j = aa_321.get(num2aa[seq_target[b, j].item()], 'UNK')
        bonds_in_batch.append((b.item(), i.item(), j.item(), res_name_i, res_name_j))

if not bonds_in_batch:
    print("Warning: `bonds_in_batch` is empty. No bonds to analyze.")
    print("Please run the cell that defines `batch_data` and calls `model_forward`.")

for bond_info in bonds_in_batch:
    b, resi_i, resj_j, resn_i, resn_j = bond_info
    print(f"\n--- Analyzing bond: Batch {b}, {resn_i}{resi_i+1} <-> {resn_j}{resj_j+1} ---")

    # Find rule in link.csv, including fallbacks for 'ALL'
    rule = link_df[((link_df['res1'] == resn_i) & (link_df['res2'] == resn_j)) |
                   ((link_df['res1'] == resn_j) & (link_df['res2'] == resn_i))]
    if rule.empty:
        rule = link_df[((link_df['res1'] == resn_i) & (link_df['res2'] == 'ALL')) |
                       ((link_df['res1'] == 'ALL') & (link_df['res2'] == resn_j))]
    if rule.empty:
        print(f"No rule found for {resn_i}-{resn_j}")
        continue
    
    rule = rule.iloc[0]

    def get_coords(res_idx, atom_name):
        try:
            # PDB residue numbering is 1-based, our indices are 0-based
            return model['A'][res_idx + 1][atom_name].get_coord()
        except KeyError:
            print(f"Warning: Atom '{atom_name}' not found in residue {res_idx+1}")
            return None

    # Check dihedral_1
    if pd.notna(rule.get('dihedral_1_anchor_i')) and pd.notna(rule.get('dihedral_1_anchor_j')):
        p1_name, p2_name = rule['atom1'], rule['atom2']
        p0_name, p3_name = rule['dihedral_1_anchor_i'], rule['dihedral_1_anchor_j']
        
        p0, p1 = get_coords(resi_i, p0_name), get_coords(resi_i, p1_name)
        p2, p3 = get_coords(resj_j, p2_name), get_coords(resj_j, p3_name)

        print(f"Dihedral 1 points: {p0_name}({resi_i+1}), {p1_name}({resi_i+1}), {p2_name}({resj_j+1}), {p3_name}({resj_j+1})")
        is_col, msg = are_collinear(p0, p1, p2)
        if is_col: print(f"  COLLINEARITY DETECTED in ({p0_name}, {p1_name}, {p2_name}): {msg}")
        is_col, msg = are_collinear(p1, p2, p3)
        if is_col: print(f"  COLLINEARITY DETECTED in ({p1_name}, {p2_name}, {p3_name}): {msg}")

    # Check dihedral_2
    if pd.notna(rule.get('dihedral_2_anchor_i')) and pd.notna(rule.get('dihedral_2_anchor_j')):
        p1_name, p2_name = rule['atom1'], rule['atom2']
        p0_name, p3_name = rule['dihedral_2_anchor_i'], rule['dihedral_2_anchor_j']

        p0, p1 = get_coords(resi_i, p0_name), get_coords(resi_i, p1_name)
        p2, p3 = get_coords(resj_j, p2_name), get_coords(resj_j, p3_name)

        print(f"Dihedral 2 points: {p0_name}({resi_i+1}), {p1_name}({resi_i+1}), {p2_name}({resj_j+1}), {p3_name}({resj_j+1})")
        is_col, msg = are_collinear(p0, p1, p2)
        if is_col: print(f"  COLLINEARITY DETECTED in ({p0_name}, {p1_name}, {p2_name}): {msg}")
        is_col, msg = are_collinear(p1, p2, p3)
        if is_col: print(f"  COLLINEARITY DETECTED in ({p1_name}, {p2_name}, {p3_name}): {msg}")



Analyzing /home/fit/lulei/WORK/xjt/Protein_design/BondFlow/BondFlow/tests/output_new.pdb for collinear atoms...
Please run the cell that defines `batch_data` and calls `model_forward`.


In [2]:
link = LinkInfo("/home/fit/lulei/WORK/xjt/Protein_design/BondFlow/BondFlow/config/link.csv")

In [3]:
vars(link)

{'bond_spec': {(3, 15): [{'atom1': 'CG', 'atom2': 'OG', 'dist': 1.36},
   {'atom1': 'C', 'atom2': 'N', 'dist': 1.33},
   {'atom1': 'N', 'atom2': 'C', 'dist': 1.33},
   {'atom1': 'CG', 'atom2': 'N', 'dist': 1.33}],
  (15, 3): [{'atom1': 'OG', 'atom2': 'CG', 'dist': 1.36},
   {'atom1': 'N', 'atom2': 'C', 'dist': 1.33},
   {'atom1': 'C', 'atom2': 'N', 'dist': 1.33},
   {'atom1': 'N', 'atom2': 'CG', 'dist': 1.33}],
  (3, 16): [{'atom1': 'CG', 'atom2': 'OG1', 'dist': 1.36},
   {'atom1': 'C', 'atom2': 'N', 'dist': 1.33},
   {'atom1': 'N', 'atom2': 'C', 'dist': 1.33},
   {'atom1': 'CG', 'atom2': 'N', 'dist': 1.33}],
  (16, 3): [{'atom1': 'OG1', 'atom2': 'CG', 'dist': 1.36},
   {'atom1': 'N', 'atom2': 'C', 'dist': 1.33},
   {'atom1': 'C', 'atom2': 'N', 'dist': 1.33},
   {'atom1': 'N', 'atom2': 'CG', 'dist': 1.33}],
  (3, 18): [{'atom1': 'CG', 'atom2': 'OH', 'dist': 1.36},
   {'atom1': 'C', 'atom2': 'N', 'dist': 1.33},
   {'atom1': 'N', 'atom2': 'C', 'dist': 1.33},
   {'atom1': 'CG', 'atom2': '

In [None]:
_get_bond_info("/home/fit/lulei/WORK/xjt/Protein_design/BondFlow/BondFlow/config/link.csv")

({(3, 15): (' CG ', ' OG ', 1.36),
  (15, 3): (' OG ', ' CG ', 1.36),
  (3, 16): (' CG ', ' OG1', 1.36),
  (16, 3): (' OG1', ' CG ', 1.36),
  (3, 18): (' CG ', ' OH ', 1.36),
  (18, 3): (' OH ', ' CG ', 1.36),
  (6, 15): (' CD ', ' OG ', 1.36),
  (15, 6): (' OG ', ' CD ', 1.36),
  (6, 16): (' CD ', ' OG1', 1.36),
  (16, 6): (' OG1', ' CD ', 1.36),
  (6, 18): (' CD ', ' OH ', 1.36),
  (18, 6): (' OH ', ' CD ', 1.36),
  (3, 11): (' CG ', ' NZ ', 1.33),
  (11, 3): (' NZ ', ' CG ', 1.33),
  (6, 11): (' CD ', ' NZ ', 1.33),
  (11, 6): (' NZ ', ' CD ', 1.33),
  (2, 11): (' CG ', ' NZ ', 1.33),
  (11, 2): (' NZ ', ' CG ', 1.33),
  (5, 11): (' CD ', ' NZ ', 1.33),
  (11, 5): (' NZ ', ' CD ', 1.33),
  (4, 4): (' SG ', ' SG ', 2.05),
  (0, 0): (' C  ', ' N  ', 1.33),
  (0, 1): (' C  ', ' N  ', 1.33),
  (1, 0): (' N  ', ' C  ', 1.33),
  (0, 2): (' C  ', ' N  ', 1.33),
  (2, 0): (' N  ', ' C  ', 1.33),
  (0, 3): (' C  ', ' N  ', 1.33),
  (3, 0): (' N  ', ' C  ', 1.33),
  (0, 4): (' C  ', ' N  ', 1