In [18]:
import os
from pathlib import Path

from typing import Literal
from tempfile import gettempdir, NamedTemporaryFile

import datamol as dm
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import rdDepictor

from scrubber import Scrub
from prody import parsePDB, writePDB, calcCenter

In [19]:
def cdpk_fixer(input_sdf: Path, mode: Literal["Dock", "Minimize"]) -> Path:
    """
    Fix ligand structures using CDPK utilities.

    Args:
        input_sdf: Path to the input SDF file
        mode: Mode of operation, either "Dock" or "Minimize"

    Returns:
        Path to the fixed SDF file

    Raises:
        ValueError: If mode is not "Dock" or "Minimize"
        FileNotFoundError: If input SDF file doesn't exist
    """
    if mode not in ["Dock", "Minimize"]:
        raise ValueError("Mode must be either 'Dock' or 'Minimize'")
        
    if not input_sdf.exists():
        raise FileNotFoundError(f"Input SDF file not found: {input_sdf}")
    
    try:
        TMP_fixed_sdf: _TemporaryFileWrapper[bytes] = NamedTemporaryFile(suffix=".sdf", delete=False)
        
        if mode == "Dock":
            cdpk_runner = CDPK_Runner(standardize=True, protonate=True, gen3d=True)
            cdpk_runner.prepare_ligands(input_sdf, TMP_fixed_sdf.name)
            
        elif mode == "Minimize":
            cdpk_runner = CDPK_Runner(standardize=True, protonate=True, gen3d=False)
            cdpk_runner.prepare_ligands(input_sdf, TMP_fixed_sdf.name)
            
        logger.info(f"CDPK fixed ligands saved to: {TMP_fixed_sdf.name}")
        return Path(TMP_fixed_sdf.name)
        
    except Exception as e:
        if TMP_fixed_sdf and os.path.exists(TMP_fixed_sdf.name):
            os.unlink(TMP_fixed_sdf.name)
        raise RuntimeError(f"Error in CDPK fixer: {e}") from e

In [20]:
def meeko_fixer(input_sdf: Path, mode: Literal["Dock", "Minimize"]) -> Path:
    """
    Fix ligand structures using Meeko/Scrub for protonation.

    Args:
        input_sdf: Path to the input SDF file
        mode: Mode of operation, either "Dock" or "Minimize" (parameter maintained for compatibility)

    Returns:
        Path to the fixed SDF file

    Raises:
        ValueError: If mode is not "Dock" or "Minimize"
        FileNotFoundError: If input SDF file doesn't exist
    """
    if mode not in ["Dock", "Minimize"]:
        raise ValueError("Mode must be either 'Dock' or 'Minimize'")
        
    if not input_sdf.exists():
        raise FileNotFoundError(f"Input SDF file not found: {input_sdf}")
    
    try:
        TMP_fixed_sdf: _TemporaryFileWrapper[bytes] = NamedTemporaryFile(suffix=".sdf", delete=False)
        
        # Mode parameter is kept for API compatibility but doesn't affect functionality
        # in this implementation
        
        scrub = Scrub(
            ph_low=7.2,  # Default pH
            ph_high=7.4,
        )
        
        # Read molecule from SDF
        mol = Chem.SDMolSupplier(str(input_sdf))[0]
        
        if mol is None:
            raise ValueError(f"Could not read molecule from {input_sdf}")
        
        mols_states = []
        for mol_state in scrub(mol):
            mols_states.append(mol_state)
        
        best_mol_state = mols_states[0]
        dm.to_sdf(best_mol_state, TMP_fixed_sdf.name)
        
        return Path(TMP_fixed_sdf.name)
        
    except Exception as e:
        if TMP_fixed_sdf and os.path.exists(TMP_fixed_sdf.name):
            os.unlink(TMP_fixed_sdf.name)
        raise RuntimeError(f"Error in Meeko fixer: {e}") from e

In [21]:
probe = "C1C(C(O)=O)=CN2C=CN=C2C=1C(C)C(CC)C"
probe = dm.to_mol(probe)

with NamedTemporaryFile(suffix=".sdf", delete=False) as tmp_sdf:
    dm.to_sdf(probe, tmp_sdf.name)

In [22]:
meeko_fixer(Path(tmp_sdf.name), "Dock")

PosixPath('/tmp/tmpjp0ftbia.sdf')