In [92]:
import os
from pathlib import Path

import datamol as dm
from rdkit import Chem

from openeye import oechem
from openeye import oequacpac

In [93]:
ligands_path = Path("../examples/Ligands_To_Dock.sdf")

In [116]:
def oe_protonate(smile, kekule=False) -> str:
    """
    Protonate a SMILES string using OpenEye toolkit.
    
    This function takes a SMILES string, converts it to an OpenEye molecule,
    removes salts, adds explicit hydrogens, applies reasonable protonation states,
    and optionally converts to Kekule form before returning the protonated SMILES.
    
    Parameters:
    -----------
    smile : str
        Input SMILES string to be protonated
    kekule : bool, optional
        If True (default), convert aromatic bonds to Kekule form
        
    Returns:
    --------
    str
        Protonated SMILES string
        
    Example:
    --------
    >>> smiles = "C1=CC=C(C=C1)C(=O)O"
    >>> protonated = protonate_smiles(smiles)
    """
    from openeye import oechem
    from openeye import oequacpac

    # Convert SMILES to OpenEye molecule
    oemol = oechem.OEMol()
    oechem.OESmilesToMol(oemol, smile)

    # Strip Salt
    oechem.OEDeleteEverythingExceptTheFirstLargestComponent(oemol)

    oechem.OEFindRingAtomsAndBonds(oemol)
    oechem.OEAssignAromaticFlags(oemol, oechem.OEAroModel_OpenEye)
    smiflag = oechem.OESMILESFlag_Canonical
    smiflag |= oechem.OESMILESFlag_ISOMERIC

    oechem.OEAddExplicitHydrogens(oemol)
    oequacpac.OEGetReasonableProtomer(oemol)

    if kekule:
        for bond in oemol.GetBonds(oechem.OEIsAromaticBond()):
            bond.SetIntType(5)
        oechem.OECanonicalOrderAtoms(oemol)
        oechem.OECanonicalOrderBonds(oemol)
        oechem.OEClearAromaticFlags(oemol)
        oechem.OEKekulize(oemol)

    smile = oechem.OECreateSmiString(oemol, smiflag)
    return smile

In [121]:
def cdp_protonate(smiles: str) -> str:
    """
    Takes a SMILES string and returns a protonated SMILES at physiological pH.
    
    Parameters:
    -----------
    smiles : str
        Input SMILES string
        
    Returns:
    --------
    str
        Protonated SMILES string at physiological pH
        
    Raises:
    -------
    Exception
        If SMILES parsing fails or protonation fails
    """
    import CDPL.Chem as Chem

    # Parse the SMILES string to create a molecule object
    mol = Chem.parseSMILES(smiles)
    
    if mol is None:
        raise Exception(f"Failed to parse SMILES: {smiles}")
    
    # Create a BasicMolecule to store the result
    out_mol = Chem.BasicMolecule(mol)
    
    # Prepare the molecule - calculate required properties
    # These are necessary for the protonation algorithm to work correctly
    Chem.calcImplicitHydrogenCounts(out_mol, False)
    Chem.perceiveHybridizationStates(out_mol, False)
    Chem.perceiveSSSR(out_mol, False)  # Smallest Set of Smallest Rings
    Chem.setRingFlags(out_mol, False)
    Chem.setAromaticityFlags(out_mol, False)
    Chem.perceiveComponents(out_mol, False)
    
    # Create and apply the protonation state standardizer
    prot_state_gen = Chem.ProtonationStateStandardizer()
    
    # Apply physiological condition protonation
    prot_state_gen.standardize(out_mol, Chem.ProtonationStateStandardizer.PHYSIOLOGICAL_CONDITION_STATE)
    
    # Update component perception as structure might have changed
    Chem.perceiveComponents(out_mol, True)
    
    # Generate and return the SMILES string
    # Using canonical SMILES for consistency
    return Chem.generateSMILES(out_mol, False, True)  # (molecule, with_stereo, canonical)

In [122]:
smiles = "C1C=C2C(=NS(=O)(=O)C2=CC=1CN)NC(C)C1C=C(C(O)=O)C=CC=1"

In [123]:
oe_smiles_prot = oe_protonate(smiles)
print(oe_smiles_prot)

CC(c1cccc(c1)C(=O)[O-])N=C2c3ccc(cc3S(=O)(=O)N2)C[NH3+]


In [124]:
cdp_smiles_prot = cdp_protonate(smiles)
print(cdp_smiles_prot)

c1cc2C(=NS(=O)(=O)c2cc1C[NH3+])NC(C)c1cc(C([O-])=O)ccc1
