diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 896cd215..c5a12b9d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -17,14 +17,12 @@ jobs: strategy: fail-fast: false matrix: - os: ['ubuntu-latest', 'macos-latest', 'windows-latest'] + os: ['ubuntu-latest', 'macos-13', 'windows-latest'] python-version: ['3.9', '3.10'] steps: - uses: actions/checkout@v2 - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 with: - miniconda-version: ${{ runner.os == 'macOS' && 'latest' || null }} - auto-update-conda: true channels: anaconda,ibmdecisionoptimization auto-activate-base: false python-version: ${{ matrix.python-version }} diff --git a/README.md b/README.md index 84e32d32..7036acaa 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ qFit is a collection of programs for modeling multi-conformer protein structures Electron density maps obtained from high-resolution X-ray diffraction data are a spatial and temporal average of all conformations within the crystal. qFit evaluates an extremely large number of combinations of sidechain conformers, backbone fragments and small-molecule ligands to locally explain the electron density. + ## Installation 1) Download qFit @@ -96,6 +97,25 @@ After *multiconformer_model2.pdb* has been generated, refine this model using: More advanced features of qFit (modeling single residue, more advanced options, and further explainations) are explained in [TUTORIAL](example/TUTORIAL.md). +To model alternate conformations of ligands using qFit, execute the following command: + +`qfit_ligand [COMPOSITE_OMIT_MAP_FILE] [PDB_FILE] -l [LABELS] [SELECTION] -sm [SMILES]` + +This command facilitates the incorporation of alternate ligand conformations into your protein model. The results are outputted to two files: *multiconformer_ligand_bound_with_protein.pdb*, which is the multiconformer model of the protein-ligand complex, and *multiconformer_ligand_only.pdb*, which is the multiconformer model of the ligand alone. + +After running qFit-ligand, it is recommended to perform a final refinement using the script found in [scripts](scripts/post). Run this in the same directory as your models. + +If you wish to specify the number of ligand conformers for qFit to sample, use the flag `-nc [NUM_CONFS]`. The default number is set to 10,000. + +Using the example 4MS6: + +`qfit_ligand example/qfit_ligand_example/4ms6_composit_map.mtz example/qfit_ligand_example/4ms6.pdb -l 2FOFCWT,PH2FOFCWT A,702 -sm 'C1C[C@H](NC1)C(=O)CCC(=O)N2CCC[C@H]2C(=O)O' -nc 10000` + +To refine *multiconformer_ligand_bound_with_protein.pdb*, use the following command + +`qfit_final_refine_ligand.sh 4ms6.mtz` + + ## Citations If you use this software, please cite: - [Wankowicz SA, Ravikumar A, Sharma S, Riley BT, Raju A, Hogan DW, van den Bedem H, Keedy DA, & Fraser JS. Uncovering Protein Ensembles: Automated Multiconformer Model Building for X-ray Crystallography and Cryo-EM. bioRxiv. (2023).](https://www.biorxiv.org/content/10.1101/2023.06.28.546963v2.abstract) diff --git a/environment.yml b/environment.yml index 98fe0ed1..8454e5c2 100644 --- a/environment.yml +++ b/environment.yml @@ -13,3 +13,4 @@ dependencies: - pyparsing>=2.2.0 - cvxopt - tqdm + - rdkit diff --git a/example/README.md b/example/README.md index 4369dda2..85fa0f2d 100644 --- a/example/README.md +++ b/example/README.md @@ -86,11 +86,19 @@ Using the example 3K0N, spawning 30 parallel processes: To model alternate conformers of ligands, the command line tool *qfit_ligand* should be used: -`qfit_ligand [COMPOSITE_OMIT_MAP_FILE] -l [LABEL] [PDB_FILE] [CHAIN,LIGAND]` +`qfit_ligand [COMPOSITE_OMIT_MAP_FILE] -l [LABEL] [PDB_FILE] [CHAIN,LIGAND] -sm [SMILES]` Where *LIGAND* corresponds to the numeric identifier of the ligand on the PDB -(aka res. number). The main output file is named *multiconformer_model_[CHAIN]_[LIGAND].pdb* +(aka res. number). The main output file is named *multiconformer_ligand_bound_with_protein.pdb* + + +If you wish to specify the number of ligand conformers for qFit to sample, use the flag `-nc [NUM_CONFS]`. The default number is set to 10,000. Using the example 4MS6: -`qfit_ligand qfit_ligand_example/4ms6_composite_map.mtz -l 2FOFCWT,PH2FOFCWT qfit_ligand_example/4ms6.pdb A,702` +`qfit_ligand qfit_ligand_example/4ms6_composite_map.mtz -l 2FOFCWT,PH2FOFCWT qfit_ligand_example/4ms6.pdb A,702 -sm 'C1C[C@H](NC1)C(=O)CCC(=O)N2CCC[C@H]2C(=O)O' -nc 10000` + + +To refine *multiconformer_ligand_bound_with_protein.pdb*, use the following command + +`qfit_final_refine_ligand.sh 4ms6.mtz` diff --git a/requirements.txt b/requirements.txt index c940bc59..0ed9928a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ cvxpy pyscipopt scipy scikit-learn +rdkit diff --git a/scripts/post/qfit_final_refine_ligand.sh b/scripts/post/qfit_final_refine_ligand.sh new file mode 100644 index 00000000..9e5b5a89 --- /dev/null +++ b/scripts/post/qfit_final_refine_ligand.sh @@ -0,0 +1,173 @@ +#!/bin/bash +# This script works with Phenix version 1.20. + +qfit_usage() { + echo >&2 "Usage:"; + echo >&2 " $0 mapfile.mtz [multiconformer_ligand_bound_with_protein.pdb] [multiconformer_ligand_only.pdb]"; + echo >&2 ""; + echo >&2 "mapfile.mtz, multiconformer_ligand_bound_with_protein.pdb, and multiconformer_ligand_only.pdb MUST exist in this directory."; + echo >&2 "Outputs will be written to mapfile_qFit.{pdb|mtz|log}."; + exit 1; +} + +#___________________________SOURCE__________________________________ +# Check that Phenix is loaded +if [ -z "${PHENIX}" ]; then + echo >&2 "I require PHENIX but it's not loaded."; + echo >&2 "Please \`source phenix_env.sh\` from where it is installed."; + exit 1; +else + export PHENIX_OVERWRITE_ALL=true; +fi + +# Check that qFit is loaded. +command -v remove_duplicates >/dev/null 2>&1 || { + echo >&2 "I require qFit (remove_duplicates) but it's not loaded."; + echo >&2 "Please activate the environment where qFit is installed."; + echo >&2 " e.g. conda activate qfit" + exit 1; +} + +# Assert required files exist +mapfile=$1 +multiconf=${2:-multiconformer_ligand_bound_with_protein.pdb} +multiconf_lig=${2:-multiconformer_ligand_only.pdb} +echo "mapfile : ${mapfile} $([[ -f ${mapfile} ]] || echo '[NOT FOUND]')"; +echo "qfit unrefined model : ${multiconf} $([[ -f ${multiconf} ]] || echo '[NOT FOUND]')"; +echo "qfit unrefined ligand model : ${multiconf_lig} $([[ -f ${multiconf_lig} ]] || echo '[NOT FOUND]')"; + + +echo ""; +if [[ ! -f "${mapfile}" ]] || [[ ! -f "${multiconf}" ]]; then + qfit_usage; +fi +pdb_name="${mapfile%.mtz}" +echo >&2 "PDB name is "${pdb_name}". "; +echo "${pdb_name}" + + +#__________________________________DETERMINE RESOLUTION AND (AN)ISOTROPIC REFINEMENT__________________________________ +mtzmetadata=`phenix.mtz.dump "${pdb_name}.mtz"` +resrange=`grep "Resolution range:" <<< "${mtzmetadata}"` + +echo "${resrange}" + +res=`echo "${resrange}" | cut -d " " -f 4 | cut -c 1-5` +res1000=`echo $res | awk '{tot = $1*1000}{print tot }'` + +if (( $res1000 < 1550 )); then + adp='adp.individual.anisotropic="not (water or element H)"' +else + adp='adp.individual.isotropic=all' +fi + +#__________________________________DETERMINE FOBS v IOBS v FP__________________________________ +# List of Fo types we will check for +obstypes=("FP" "FOBS" "F-obs" "I" "IOBS" "I-obs" "F(+)" "I(+)" "FSIM") + +# Get amplitude fields +ampfields=`grep -E "amplitude|intensity|F\(\+\)|I\(\+\)" <<< "${mtzmetadata}"` +ampfields=`echo "${ampfields}" | awk '{$1=$1};1' | cut -d " " -f 1` + +# Clear xray_data_labels variable +xray_data_labels="" + +# Is amplitude an Fo? +for field in ${ampfields}; do + # Check field in obstypes + if [[ " ${obstypes[*]} " =~ " ${field} " ]]; then + # Check SIGFo is in the mtz too! + if grep -F -q -w "SIG$field" <<< "${mtzmetadata}"; then + xray_data_labels="${field},SIG${field}"; + break + fi + fi +done +if [ -z "${xray_data_labels}" ]; then + echo >&2 "Could not determine Fo field name with corresponding SIGFo in .mtz."; + echo >&2 "Was not among "${obstypes[*]}". Please check .mtz file\!"; + exit 1; +else + echo "data labels: ${xray_data_labels}" + # Start writing refinement parameters into a parameter file + echo "refinement.input.xray_data.labels=$xray_data_labels" > ${pdb_name}_refine.params +fi + +#_____________________________DETERMINE R FREE FLAGS______________________________ +gen_Rfree=True +rfreetypes="FREE R-free-flags" +for field in ${rfreetypes}; do + if grep -F -q -w $field <<< "${mtzmetadata}"; then + gen_Rfree=False; + echo "Rfree column: ${field}"; + echo "refinement.input.xray_data.r_free_flags.label=${field}" >> ${pdb_name}_refine.params + break + fi +done +echo "refinement.input.xray_data.r_free_flags.generate=${gen_Rfree}" >> ${pdb_name}_refine.params + +#__________________________________REMOVE DUPLICATE HET ATOMS__________________________________ +remove_duplicates "${multiconf}" + +#__________________________________NORMALIZE OCCUPANCIES________________________________________ +echo "Normalizing occupancies, before refinement " + +redistribute_cull_low_occupancies -occ 0.09 "${multiconf}.fixed" +mv -v "${multiconf}.f_norm.pdb" "${multiconf}.fixed" + + +#________________________________REMOVE TRAILING HYDROGENS___________________________________ +phenix.pdbtools remove="element H" "${multiconf}.fixed" + +#__________________________________GET CIF FILE__________________________________ +echo "Getting the cif file with ready_set/elbow" +phenix.ready_set hydrogens=false \ + trust_residue_code_is_chemical_components_code=true \ + pdb_file_name="${multiconf}.f_modified.pdb" + +# If ready_set doesn't generate a ligand cif file, use elbow +if [ ! -f "${multiconf}.f_modified.ligands.cif" ]; then + phenix.elbow multiconformer_ligand_only.pdb --output ${multiconf}.f_modified.ligands +fi + +if [ ! -f "${multiconf}.f_modified.ligands.cif" ]; then + echo "Ligand CIF generation failed" +fi + +#__________________________________FINAL REFINEMENT__________________________________ + +# Write refinement parameters into parameters file +echo "refinement.refine.strategy=*individual_sites *individual_adp *occupancies" >> ${pdb_name}_refine.params +echo "refinement.output.prefix=${pdb_name}" >> ${pdb_name}_refine.params +echo "refinement.output.serial=2" >> ${pdb_name}_refine.params +echo "refinement.main.number_of_macro_cycles=5" >> ${pdb_name}_refine.params +echo "refinement.main.nqh_flips=True" >> ${pdb_name}_refine.params +echo "refinement.refine.${adp}" >> ${pdb_name}_refine.params +echo "refinement.output.write_maps=False" >> ${pdb_name}_refine.params +echo "refinement.hydrogens.refine=riding" >> ${pdb_name}_refine.params +echo "refinement.main.ordered_solvent=True" >> ${pdb_name}_refine.params +echo "refinement.target_weights.optimize_xyz_weight=true" >> ${pdb_name}_refine.params +echo "refinement.target_weights.optimize_adp_weight=true" >> ${pdb_name}_refine.params + +echo "refinement.input.monomers.file_name='${multiconf}.f_modified.ligands.cif'" >> ${pdb_name}_refine.params + +phenix.refine "${multiconf}.f_modified.pdb" \ + "${pdb_name}.mtz" \ + "${pdb_name}_refine.params" \ + --overwrite + +#__________________________________NAME FINAL FILES__________________________________ +cp -v "${pdb_name}_002.pdb" "${pdb_name}_qFit_ligand.pdb" +cp -v "${pdb_name}_002.mtz" "${pdb_name}_qFit_ligand.mtz" +cp -v "${pdb_name}_002.log" "${pdb_name}_qFit_ligand.log" + +#__________________________COMMENTARY FOR USER_______________________________________ +if [ -f "${pdb_name}_002.pdb" ]; then + echo "" + echo "[qfit_final_refine_ligand] Refinement is complete." + echo " Please be sure to INSPECT your refined structure, especially all new altconfs." + echo " The output can be found at ${pdb_name}_qFit_ligand.(pdb|mtz|log) ." +else + echo "Refinement did not complete." + echo "Please check for failure reports by examining log files." +fi diff --git a/setup.py b/setup.py index 4db5fadf..3e72494d 100644 --- a/setup.py +++ b/setup.py @@ -32,6 +32,7 @@ def main(): "pandas>=1.2", "pyparsing>=2.2.0", "tqdm>=4.0.0", + "rdkit", ] setup( @@ -70,6 +71,7 @@ def main(): scripts=[ "scripts/post/qfit_final_refine_xray.sh", "scripts/post/qfit_final_refine_cryoem.sh", + "scripts/post/qfit_final_refine_ligand.sh", "scripts/post/find_largest_ligand.py", "scripts/post/find_altlocs_near_ligand.py", "scripts/post/get_lig_chain_res.py", diff --git a/src/qfit/qfit.py b/src/qfit/qfit.py index f2dbb3a1..ac885c0d 100644 --- a/src/qfit/qfit.py +++ b/src/qfit/qfit.py @@ -26,6 +26,13 @@ from .relabel import RelabellerOptions, Relabeller from .structure.rotamers import ROTAMERS +from rdkit import Chem +from rdkit.Chem import AllChem +import random +from scipy.spatial.transform import Rotation as R +import math + + logger = logging.getLogger(__name__) @@ -112,12 +119,15 @@ def __init__(self): self.exclude_atoms = None ### From QFitLigandOptions - # Ligand sampling - self.local_search = True - self.sample_ligand = True # From QFitCovalentLigandOptions - self.sample_ligand_stepsize = 10 # Was 8 in QFitCovalentLigandOptions self.selection = None self.cif_file = None + # RDKit options + self.numConf = None + self.smiles = None + self.ligand_bic = None + self.rot_range = None + self.trans_range = None + self.rotation_step = None ### From QFitSegmentOptions self.fragment_length = None @@ -343,6 +353,8 @@ def _solve_miqp( ) # hyperparameter 1.5 determined to be the best cut off between too many conformations and improving Rfree if segment is not None: k = nconfs # for segment, we only care about the number of conformations come out of MIQP. Considering atoms penalizes this too much + if self.options.ligand_bic: + k = nconfs * natoms BIC = n * np.log(rss / n) + k * np.log(n) solution = MIQPSolutionStats( threshold=threshold, @@ -489,6 +501,72 @@ def write_maps(self): self._transformer.xmap.tofile(fname) self._transformer.reset(full=True) + def identify_core_and_sidechain(self, mol): + """ + Identify branched sections of ligand + """ + # Get the ring info of the molecule + ri = mol.GetRingInfo() + ring_atoms = ri.AtomRings() + + if len(ring_atoms) == 0: # No rings in the molecule + # Use the largest connected component as the core + components = Chem.rdmolops.GetMolFrags(mol, asMols=False) + core_atoms = max(components, key=len) + else: + # Use the largest ring system as the core + core_atoms = max(ring_atoms, key=len) + + # Identify terminal atoms, atoms bound to no more than one atom & not in the core + terminal_atoms = [atom.GetIdx() for atom in mol.GetAtoms() if atom.GetDegree() == 1 and atom.GetIdx() not in core_atoms] + + all_side_chain_atoms = [] + # loop through terminal atoms + for t_atom in terminal_atoms: + side_chain_atoms = [] + atom = mol.GetAtomWithIdx(t_atom) + while atom.GetIdx() not in core_atoms and atom.GetIdx() not in side_chain_atoms: + # Ensure the atom is not part of a ring + if atom.IsInRing(): + break + side_chain_atoms.append(atom.GetIdx()) + neighbors = [x.GetIdx() for x in atom.GetNeighbors() if x.GetIdx() not in core_atoms and x.GetIdx() not in side_chain_atoms] + if not neighbors: # No more atoms to explore + break + atom = mol.GetAtomWithIdx(neighbors[0]) # Move to the next atom in the chain + + # Check if the side chain is at least 4 atoms long + if len(side_chain_atoms) >= 4: + all_side_chain_atoms.extend(side_chain_atoms) + length_side_chain = len(all_side_chain_atoms) + return all_side_chain_atoms, length_side_chain + + def apply_translations(self, conformation, translation_range): + translation_range = int(translation_range) + translated_conformations = [] + # translate conformers in x, y, z directions based on input range + for dx in np.linspace(-translation_range, translation_range, num=3): + for dy in np.linspace(-translation_range, translation_range, num=3): + for dz in np.linspace(-translation_range, translation_range, num=3): + translation_vector = np.array([dx, dy, dz]) + translated_conformation = conformation + translation_vector + translated_conformations.append(translated_conformation) + return translated_conformations + + def apply_rotations(self, conformation, rotation_range, step): + rotation_range = int(rotation_range) + step = int(step) + rotated_conformations = [conformation] # Include the original conformation + center = conformation.mean(axis=0) # Compute the center of the conformation + for angle in range(-rotation_range, rotation_range + step, step): + for axis in ['x', 'y', 'z']: + r = R.from_euler(axis, np.radians(angle), degrees=False) + rotation_matrix = r.as_matrix() + # Apply rotation around the center + rotated_conformation = np.dot(conformation - center, rotation_matrix.T) + center + rotated_conformations.append(rotated_conformation) + return rotated_conformations + class QFitRotamericResidue(_BaseQFit): def __init__(self, residue, structure, xmap, options): @@ -1496,18 +1574,12 @@ def __init__(self, ligand, receptor, xmap, options): # Initialize using the base qfit class super().__init__(ligand, receptor, xmap, options) - # These lists will be used to combine coor_sets output for - # each of the clusters that we sample: - self._all_coor_set = [] - self._all_bs = [] - # Populate useful attributes: self.ligand = ligand self.receptor = receptor self.xmap = xmap self.options = options csf = self.options.clash_scaling_factor - self._trans_box = [(-0.2, 0.21, 0.1)] * 3 self._bs = [self.ligand.b] # External clash detection: @@ -1515,17 +1587,6 @@ def __init__(self, ligand, receptor, xmap, options): ligand, receptor, scaling_factor=self.options.clash_scaling_factor ) - # Determine which roots to start building from - self._rigid_clusters = ligand.rigid_clusters() - self.roots = None - if self.roots is None: - self._clusters_to_sample = [] - for cluster in self._rigid_clusters: - nhydrogen = (self.ligand.e[cluster] == "H").sum() - if len(cluster) - nhydrogen > 1: - self._clusters_to_sample.append(cluster) - logger.debug(f"Number of clusters to sample: {len(self._clusters_to_sample)}") - # Initialize the transformer if options.subtract: self._subtract_transformer(self.ligand, self.receptor) @@ -1533,303 +1594,683 @@ def __init__(self, ligand, receptor, xmap, options): self._starting_coor_set = [ligand.coor.copy()] self._starting_bs = [ligand.b.copy()] + # Read in ligand pdb file + self.ligand_pdb_file = "ligand.pdb" + + def run(self): - for self._cluster_index, self._cluster in enumerate(self._clusters_to_sample): - self._coor_set = list(self._starting_coor_set) - if self.options.local_search: - logger.info("Starting local search") - self._local_search() - self._coor_set.append(self._starting_coor_set) - self.ligand._active[self.ligand._selection] = True - logger.info("Starting sample internal dofs") - self._sample_internal_dofs() - self._all_coor_set += self._coor_set - self._all_bs += self._bs - prefix_tmp = "run_" + str(self._cluster) - self._write_intermediate_conformers(prefix=prefix_tmp) - logger.info(f"Number of conformers: {len(self._coor_set)}") - logger.info(f"Number of final conformers: {len(self._all_coor_set)}") - - # Find consensus across roots: - self.conformer = self.ligand - self.ligand._q[self.ligand._selection] = 1.0 - self.ligand._active[self.ligand._selection] = True - self._coor_set = self._all_coor_set - self._bs = self._all_bs + ligand = Chem.MolFromPDBFile(self.ligand_pdb_file) + # total number of conformers to generate + num_gen_conformers = self.options.numConf + + # check if ligand has long branch/side chain + logger.debug("Testing branching status of ligand") + branching_test = self.identify_core_and_sidechain(ligand) + branching_atoms = branching_test[0] + length_branching = branching_test[1] + + # run rdkit conformer generator + logger.info("Starting RDKit conformer generation") + + if not branching_atoms: + # if there are no branches, qFit will not run branching search or long chain search. Therefore, 3 methods of sampling remain + num_conf_for_method = round(num_gen_conformers / 3) + if branching_atoms: + if length_branching > 30: + logger.debug("Ligand has long branches, run long chain search") + num_conf_for_method = round(num_gen_conformers / 5) + elif length_branching <= 30: + num_conf_for_method = round(num_gen_conformers / 4) + + + self.num_conf_for_method = num_conf_for_method + self.random_unconstrained() + self.terminal_atom_const() + self.spherical_search() + if branching_atoms: + if length_branching > 30: + self.branching_search() + self.long_chain_search() + elif length_branching <= 30: + self.branching_search() + + logger.info(f"Number of generated conformers, before scoring: {len(self._coor_set)}") + if len(self._coor_set) < 1: logger.error("qFit-ligand failed to produce a valid conformer.") return + # QP score conformer occupancy logger.debug("Converting densities within run.") self._convert() logger.info("Solving QP within run.") self._solve_qp() + # self._write_intermediate_conformers(prefix="pre_qp") logger.debug("Updating conformers within run.") self._update_conformers() - if len(self._coor_set) < 1: - print( - f"{self.ligand.resn[0]}: QP {self._cluster_index}: {len(self._coor_set)} conformers" - ) - return + + # Only conformeres that pass QP scoring will be rotated and translated for additional sampling + self.rot_trans() # MIQP score conformer occupancy logger.info("Solving MIQP within run.") + self.sample_b() self._convert() - self._solve_miqp( - threshold=self.options.threshold, - cardinality=self.options.cardinality, - ) + if self.options.ligand_bic: + self._solve_miqp( + threshold=self.options.threshold, + cardinality=self.options.cardinality, + do_BIC_selection=True + ) + if not self.options.ligand_bic: + self._solve_miqp( + threshold=self.options.threshold, + cardinality=self.options.cardinality, + ) self._update_conformers() if self.options.write_intermediate_conformers: self._write_intermediate_conformers(prefix="miqp_solution") + # self._write_intermediate_conformers(prefix="miqp_solution") - def _local_search(self): - """Perform a local rigid body search on the cluster.""" - - # Set occupancies of rigid cluster and its direct neighboring atoms to - # 1 for clash detection and MIQP - selection = self.ligand._selection - self.ligand._active[selection] = True - center = self.ligand.coor[self._cluster].mean(axis=0) - new_coor_set = [] - new_bs = [] - for coor, b in zip(self._coor_set, self._bs): - self.ligand._coor[selection] = coor - self.ligand._b[selection] = b - rotator = GlobalRotator(self.ligand, center=center) - for rotmat in RotationSets.get_local_set(): - rotator(rotmat) - translator = Translator(self.ligand) - iterator = itertools.product( - *[np.arange(*trans) for trans in self._trans_box] - ) - for translation in iterator: - translator(translation) - new_coor = self.ligand.coor - if self.options.remove_conformers_below_cutoff: - values = self.xmap.interpolate(new_coor) - mask = self.ligand.e != "H" - if np.min(values[mask]) < self.options.density_cutoff: - continue - if self.options.external_clash: - if not self._cd() and not self.ligand.clashes(): - if new_coor_set: - delta = np.array(new_coor_set) - np.array(new_coor) - if ( - np.sqrt( - min(np.square((delta)).sum(axis=2).sum(axis=1)) - ) - >= self.options.rmsd_cutoff - ): - new_coor_set.append(new_coor) - new_bs.append(b) - else: - new_coor_set.append(new_coor) - new_bs.append(b) - elif not self.ligand.clashes(): - if new_coor_set: - delta = np.array(new_coor_set) - np.array(new_coor) - if ( - np.sqrt(min(np.square((delta)).sum(axis=2).sum(axis=1))) - >= self.options.rmsd_cutoff - ): - new_coor_set.append(new_coor) - new_bs.append(b) - else: - new_coor_set.append(new_coor) - new_bs.append(b) - self.ligand._active[self.ligand._selection] = False - selection = self.ligand._selection[self._cluster] - self.ligand._active[selection] = True - for atom in self._cluster: - atom_sel = self.ligand._selection[self.ligand.connectivity[atom]] - self.ligand._active[atom_sel] = True - self.conformer = self.ligand - self._coor_set = new_coor_set - self._bs = new_bs - if len(self._coor_set) < 1: - logger.warning( - f"{self.ligand.resn[0]}: " - f"Local search {self._cluster_index}: {len(self._coor_set)} conformers" - ) - return + logger.info(f"Number of final conformers: {len(self._coor_set)}") - # QP score conformer occupancy - logger.debug("Converting densities.") - self._convert() - self._solve_qp() - logger.debug("Updating conformers") - self._update_conformers() - if self.options.write_intermediate_conformers: - self._write_intermediate_conformers(prefix="localsearch_ligand_qp") - if len(self._coor_set) < 1: - logger.warning( - f"{self.ligand.resn[0]}: " - f"Local search QP {self._cluster_index}: {len(self._coor_set)} conformers" - ) - return - # MIQP score conformer occupancy - self._convert() - self._solve_miqp( - threshold=self.options.threshold, cardinality=self.options.cardinality - ) - self._update_conformers() - if self.options.write_intermediate_conformers: - self._write_intermediate_conformers(prefix="localsearch_ligand_miqp") + def random_unconstrained(self): + """ + Run RDKit with the minimum constraints -- only from the bounds matrix + """ + ligand = Chem.MolFromPDBFile(self.ligand_pdb_file) - def _sample_internal_dofs(self): - opt = self.options - sampling_range = np.deg2rad( - np.arange(0, 360, self.options.sample_ligand_stepsize) - ) - bond_order = BondOrder(self.ligand, self._cluster[0]) - bonds = bond_order.order - depths = bond_order.depth - nbonds = len(bonds) + # RDKit is bad at finding the corect bond types from pdb files, but good at doing so from SMILES string. Use SMILES string as templete for corecting bond orders + ref_mol = Chem.MolFromSmiles(self.options.smiles) + + # Assign bond orders from the template + ligand = Chem.AllChem.AssignBondOrdersFromTemplate(ref_mol, ligand) + ligand = Chem.AddHs(ligand) + num_conformers = self.num_conf_for_method # Number of conformers you want to generate + + # Create a copy of the 'ligand' object to generate conformers off of. They will later be aligned to 'ligand' object + mol = Chem.Mol(ligand) + + logger.info(f"Generating {num_conformers} conformers with no constraints") + Chem.rdDistGeom.EmbedMultipleConfs(mol, numConfs=num_conformers, useBasicKnowledge=True, pruneRmsThresh=self.options.rmsd_cutoff) + + # Minimize the energy of each conformer to find most stable structure + logger.info("Minimizing energy of each conformer") + mp = AllChem.MMFFGetMoleculeProperties(mol) + for conf_id in mol.GetConformers(): + ff = AllChem.MMFFGetMoleculeForceField(mol, mp, confId=conf_id.GetId()) + ff.Minimize() + + logger.info("Aligning molecules") + # Align the conformers in "mol" to "ligand" to ensure all structures are properly sitting in the binding site + ligand_crippen_contribs = Chem.rdMolDescriptors._CalcCrippenContribs(ligand) + mol_crippen_contribs = Chem.rdMolDescriptors._CalcCrippenContribs(mol) + + for conf_id in mol.GetConformers(): + o3a = Chem.rdMolAlign.GetCrippenO3A(mol, ligand, prbCrippenContribs=mol_crippen_contribs, refCrippenContribs=ligand_crippen_contribs, prbCid=conf_id.GetId()) + o3a.Align() + + mol = Chem.RemoveHs(mol) + ligand = Chem.RemoveHs(ligand) + + # Check for internal/external clashes + if mol.GetNumConformers() == 0: + logger.error(f"Unconstrained search generated no conformers. Moving onto next sampling function.") + if mol.GetNumConformers() != 0: + # Check for internal/external clashes + logger.info("Checking for clashes") + # Store the coordinates of each conformer into numpy array + new_conformer = mol.GetConformers() + new_coors = [] + for i, conformer in enumerate(new_conformer): + coords = conformer.GetPositions() + new_coors.append(coords) + + new_idx_set = [] + new_coor_set = [] + new_bs = [] + # loop through each rdkit generated conformer + for idx, conf in enumerate(new_coors): + b = self._bs + self.ligand.coor = conf + self.ligand.b = [b[0]] + if self.options.external_clash: + if not self._cd(): + if new_idx_set: # if there are already conformers in new_idx_set + new_idx_set.append(idx) + new_coor_set.append(conf) + new_bs.append(b[0]) + else: + new_idx_set.append(idx) + new_coor_set.append(conf) + new_bs.append(b[0]) + + elif not self.ligand.clashes(): + if new_idx_set: # if there are already conformers in new_idx_set + new_idx_set.append(idx) + new_coor_set.append(conf) + new_bs.append(b[0]) + else: + new_idx_set.append(idx) + new_coor_set.append(conf) + new_bs.append(b[0]) + + + # Save new conformers to self + merged_arr = np.concatenate((self._coor_set, new_coor_set), axis=0) + merged_bs = np.concatenate((self._bs, new_bs), axis=0) + + self._coor_set = merged_arr + self._bs = merged_bs + + logger.info(f"Random search generated: {len(self._coor_set)} plausible conformers") + + if len(self._coor_set) < 1: + logger.warning( + f"RDKit conformers not sufficiently diverse. Generated: {len(self._coor_set)} conformers" + ) + return + + return - starting_bond_index = 0 + def terminal_atom_const(self): + """ + Identify the terminal atoms of the ligand and learn distances between those atoms and the rest of the molecule. This results in the generated conformers + having a more reasonable shape, similar to the deposited model. + """ + ligand = Chem.MolFromPDBFile(self.ligand_pdb_file) - sel_str = f"chain {self.ligand.chain[0]} and resi {self.ligand.resi[0]}" - if self.ligand.icode[0]: - sel_str = f"{sel_str} and icode {self.ligand.icode[0]}" - selection = self.ligand._selection - iteration = 1 - while True: - if ( - iteration == 1 - and self.options.local_search - and self.options.dofs_per_iteration > 1 - ): - end_bond_index = ( - starting_bond_index + self.options.dofs_per_iteration - 1 - ) - else: - end_bond_index = min( - starting_bond_index + self.options.dofs_per_iteration, nbonds + # RDKit is bad at finding the corect bond types from pdb files, but good at doing so from SMILES string. Use SMILES string as templete for corecting bond orders + ref_mol = Chem.MolFromSmiles(self.options.smiles) + + # Assign bond orders from the template + ligand = Chem.AllChem.AssignBondOrdersFromTemplate(ref_mol, ligand) + + # Find the terminal atoms in the ligand, to be used in coordinate map + terminal_indices = [] + for atom in ligand.GetAtoms(): + if atom.GetDegree() == 1: # if only one atom is bound to the current atom then it is terminal + terminal_indices.append(atom.GetIdx()) + + ligand = Chem.AddHs(ligand) + + num_conformers = self.num_conf_for_method # Number of conformers you want to generate + # Create a copy of the 'ligand' object to generate conformers off of. They will later be aligned to 'ligand' object + mol = Chem.Mol(ligand) + + logger.info(f"Generating {num_conformers} conformers with terminal atom constraints") + # Generate conformers + AllChem.EmbedMultipleConfs(mol, numConfs=num_conformers, useBasicKnowledge=True, pruneRmsThresh=self.options.rmsd_cutoff, + coordMap={idx: ligand.GetConformer().GetAtomPosition(idx) for idx in terminal_indices}) + + if mol.GetNumConformers() == 0: + logger.error(f"terminal atom constrained search generated no conformers. Moving onto next sampling function.") + if mol.GetNumConformers() != 0: + # Minimize the energy of each conformer to find most stable structure + logger.info("Minimizing energy of each conformer") + mp = AllChem.MMFFGetMoleculeProperties(mol) + for conf_id in mol.GetConformers(): + ff = AllChem.MMFFGetMoleculeForceField(mol, mp, confId=conf_id.GetId()) + ff.Minimize() + + logger.info("Aligning molecules") + # Align the conformers in "mol" to "ligand" to ensure all structures are properly sitting in the binding site + ligand_crippen_contribs = Chem.rdMolDescriptors._CalcCrippenContribs(ligand) + mol_crippen_contribs = Chem.rdMolDescriptors._CalcCrippenContribs(mol) + + for conf_id in mol.GetConformers(): + o3a = Chem.rdMolAlign.GetCrippenO3A(mol, ligand, prbCrippenContribs=mol_crippen_contribs, refCrippenContribs=ligand_crippen_contribs, prbCid=conf_id.GetId()) + o3a.Align() + + mol = Chem.RemoveHs(mol) + ligand = Chem.RemoveHs(ligand) + + # Check for internal/external clashes + logger.info("Checking for clashes") + # Store the coordinates of each conformer into numpy array + new_conformer = mol.GetConformers() + new_coors = [] + for i, conformer in enumerate(new_conformer): + coords = conformer.GetPositions() + new_coors.append(coords) + + new_idx_set = [] + new_coor_set = [] + new_bs = [] + + + # loop through each rdkit generated conformer + for idx, conf in enumerate(new_coors): + b = self._bs + self.ligand.coor = conf + self.ligand.b = [b[0]] + if self.options.external_clash: + if not self._cd(): + if new_idx_set: # if there are already conformers in new_idx_set + new_idx_set.append(idx) + new_coor_set.append(conf) + new_bs.append(b[0]) + else: + new_idx_set.append(idx) + new_coor_set.append(conf) + new_bs.append(b[0]) + + elif not self.ligand.clashes(): + if new_idx_set: # if there are already conformers in new_idx_set + new_idx_set.append(idx) + new_coor_set.append(conf) + new_bs.append(b[0]) + else: + new_idx_set.append(idx) + new_coor_set.append(conf) + new_bs.append(b[0]) + + # Save new conformers to self + merged_arr = np.concatenate((self._coor_set, new_coor_set), axis=0) + merged_bs = np.concatenate((self._bs, new_bs), axis=0) + + self._coor_set = merged_arr + self._bs = merged_bs + + logger.info(f"Terminal atom search generated: {len(self._coor_set)} plausible conformers") + + if len(self._coor_set) < 1: + logger.warning( + f"RDKit conformers not sufficiently diverse. Generated: {len(self._coor_set)} conformers" ) - self.ligand._active[selection] = True - for bond_index in range(starting_bond_index, end_bond_index): - nbonds_sampled = bond_index + 1 + return + + return - bond = bonds[bond_index] - atoms = [self.ligand.name[bond[0]], self.ligand.name[bond[1]]] - new_coor_set = [] - new_bs = [] - for coor, b in zip(self._coor_set, self._bs): - self.ligand._coor[selection] = coor - self.ligand._b[selection] = b - rotator = BondRotator(self.ligand, *atoms) - for angle in sampling_range: - new_coor = rotator(angle) - if opt.remove_conformers_below_cutoff: - values = self.xmap.interpolate( - new_coor[self.ligand._active[selection]] - ) - mask = self.ligand.e[self.ligand._active[selection]] != "H" - if np.min(values[mask]) < self.options.density_cutoff: - continue - if self.options.external_clash: - if not self._cd() and not self.ligand.clashes(): - if new_coor_set: - delta = np.array(new_coor_set) - np.array(new_coor) - if ( - np.sqrt( - min( - np.square((delta)) - .sum(axis=2) - .sum(axis=1) - ) - ) - >= self.options.rmsd_cutoff - ): - new_coor_set.append(new_coor) - new_bs.append(b) - else: - new_coor_set.append(new_coor) - new_bs.append(b) - elif not self.ligand.clashes(): - if new_coor_set: - delta = np.array(new_coor_set) - np.array(new_coor) - if ( - np.sqrt( - min(np.square((delta)).sum(axis=2).sum(axis=1)) - ) - >= self.options.rmsd_cutoff - ): - new_coor_set.append(new_coor) - new_bs.append(b) - else: - new_coor_set.append(new_coor) - new_bs.append(b) - self._coor_set = new_coor_set - self._bs = new_bs + def spherical_search(self): + """ + Define a sphere around the deposited ligand, where the radius is the maximum distance from the geometric center to any atom in the molecule. Constrain conformer + generation to be within this sphere. This is a less restrictive constraint, while still biasing RDKit to generate conformers more likely to sit well in binding site + """ + ligand = Chem.MolFromPDBFile(self.ligand_pdb_file) - self.ligand._active[selection] = False - active = np.zeros_like(self.ligand._active[selection], dtype=bool) - # Activate all the atoms of the ligand that have been sampled - # up until the bond we are currently sampling: - for cluster in self._rigid_clusters: - for sampled_bond in bonds[:nbonds_sampled]: - if sampled_bond[0] in cluster or sampled_bond[1] in cluster: - active[cluster] = True - for atom in cluster: - active[self.ligand.connectivity[atom]] = True - self.ligand._active[selection] = active - self.conformer = self.ligand - - logger.info(f"Nconf: {len(self._coor_set)}") + # Create a sphere around the ligand to constrain the conformation generation + conf = ligand.GetConformer() + atom_positions = [conf.GetAtomPosition(i) for i in range(ligand.GetNumAtoms())] + # Calculate the geometric center of the molecule + geometric_center = np.mean([np.array(atom_position) for atom_position in atom_positions], axis=0) + # Calculate the radius of the sphere (max distance from center to any atom) + radius = max(np.linalg.norm(np.array(atom_position) - geometric_center) for atom_position in atom_positions) + + # RDKit is bad at finding the corect bond types from pdb files, but good at doing so from SMILES string. Use SMILES string as templete for corecting bond orders + ref_mol = Chem.MolFromSmiles(self.options.smiles) + + # Assign bond orders from the template + ligand = Chem.AllChem.AssignBondOrdersFromTemplate(ref_mol, ligand) + ligand = Chem.AddHs(ligand) + num_conformers = self.num_conf_for_method # Number of conformers you want to generate + + # Create a copy of the 'ligand' object to generate conformers off of. They will later be aligned to 'ligand' object + mol = Chem.Mol(ligand) + + diameter = 2 * radius + bounds = Chem.rdDistGeom.GetMoleculeBoundsMatrix(ligand) + # Modify the bounds matrix to set the upper bounds for all atom pairs + num_atoms = ligand.GetNumAtoms() + for i in range(num_atoms): + for j in range(i+1, num_atoms): # Only need to do this for the upper triangle + # Set the upper bound for the distance between atoms i and j + bounds[i, j] = min(bounds[i, j], diameter) + + # Set up the embedding parameters + ps = Chem.rdDistGeom.EmbedParameters() + ps = Chem.rdDistGeom.ETKDGv3() + ps.randomSeed = 0xf00d + ps.SetBoundsMat(bounds) + ps.useBasicKnowledge = True + ps.pruneRmsThresh = self.options.rmsd_cutoff + + logger.info(f"Generating {num_conformers} conformers with no/spherical constraints") + Chem.rdDistGeom.EmbedMultipleConfs(mol, numConfs=num_conformers, params=ps) + + # Minimize the energy of each conformer to find most stable structure + logger.info("Minimizing energy of each conformer") + mp = AllChem.MMFFGetMoleculeProperties(mol) + for conf_id in mol.GetConformers(): + ff = AllChem.MMFFGetMoleculeForceField(mol, mp, confId=conf_id.GetId()) + ff.Minimize() + + logger.info("Aligning molecules") + # Align the conformers in "mol" to "ligand" to ensure all structures are properly sitting in the binding site + ligand_crippen_contribs = Chem.rdMolDescriptors._CalcCrippenContribs(ligand) + mol_crippen_contribs = Chem.rdMolDescriptors._CalcCrippenContribs(mol) + + for conf_id in mol.GetConformers(): + o3a = Chem.rdMolAlign.GetCrippenO3A(mol, ligand, prbCrippenContribs=mol_crippen_contribs, refCrippenContribs=ligand_crippen_contribs, prbCid=conf_id.GetId()) + o3a.Align() + + mol = Chem.RemoveHs(mol) + ligand = Chem.RemoveHs(ligand) + + if mol.GetNumConformers() == 0: + logger.error(f"Spherical search generated no conformers. Moving onto next sampling function.") + if mol.GetNumConformers() != 0: + # Check for internal/external clashes + logger.info("Checking for clashes") + # Store the coordinates of each conformer into numpy array + new_conformer = mol.GetConformers() + new_coors = [] + for i, conformer in enumerate(new_conformer): + coords = conformer.GetPositions() + new_coors.append(coords) + + new_idx_set = [] + new_coor_set = [] + new_bs = [] + # loop through each rdkit generated conformer + for idx, conf in enumerate(new_coors): + b = self._bs + self.ligand.coor = conf + self.ligand.b = [b[0]] + if self.options.external_clash: + if not self._cd(): + if new_idx_set: # if there are already conformers in new_idx_set + new_idx_set.append(idx) + new_coor_set.append(conf) + new_bs.append(b[0]) + else: + new_idx_set.append(idx) + new_coor_set.append(conf) + new_bs.append(b[0]) + elif not self.ligand.clashes(): + if new_idx_set: # if there are already conformers in new_idx_set + new_idx_set.append(idx) + new_coor_set.append(conf) + new_bs.append(b[0]) + else: + new_idx_set.append(idx) + new_coor_set.append(conf) + new_bs.append(b[0]) + + + # Save new conformers to self + merged_arr = np.concatenate((self._coor_set, new_coor_set), axis=0) + merged_bs = np.concatenate((self._bs, new_bs), axis=0) + + + self._coor_set = merged_arr + self._bs = merged_bs + + logger.info(f"Spherical search generated: {len(self._coor_set)} plausible conformers") + if len(self._coor_set) < 1: logger.warning( - f"{self.ligand.resn[0]}: " - f"DOF search cluster {self._cluster_index} iteration {iteration}: " - f"{len(self._coor_set)} conformers." + f"RDKit conformers not sufficiently diverse. Generated: {len(self._coor_set)} conformers" ) return + + return - # QP score conformer occupancy - self._convert() - self._solve_qp() - self._update_conformers() - if self.options.write_intermediate_conformers: - self._write_intermediate_conformers( - prefix=f"sample_ligand_iter{iteration}_qp" - ) + def branching_search(self): + """ + This function is used to directly address cases where the ligand has branching disorder. Identify the atoms belonging to a branch, and fix all non-branch ligands in place + (by using coordinate map distance constraints). Allow the branches to randomly sample the conformational space. + """ + # Make RDKit mol object from the ligand pdb + # Starting structure + branching_ligand = Chem.MolFromPDBFile(self.ligand_pdb_file) + num_branched_confs = self.num_conf_for_method + + # Identify the branching sections of the ligand + side_chain = self.identify_core_and_sidechain(branching_ligand) + side_chain_atoms = side_chain[0] + + # Define core_atoms as all atoms not in side_chain_atoms + core_indices = [atom.GetIdx() for atom in branching_ligand.GetAtoms() if atom.GetIdx() not in side_chain_atoms] + core_atoms = tuple(core_indices) + + # create reference molecule from smiles string to copy the correct bond order from + ref_mol = Chem.MolFromSmiles(self.options.smiles) + branching_ligand = Chem.AllChem.AssignBondOrdersFromTemplate(ref_mol, branching_ligand) + + # add hydrogens + branching_ligand = Chem.AddHs(branching_ligand) + + # Create a coordinate map for the core atoms using the original ligand + coord_map = {idx: branching_ligand.GetConformer().GetAtomPosition(idx) for idx in core_atoms} + # Create a copy of the molecule for conformer generation + mol_copy = Chem.Mol(branching_ligand) + logger.info(f"Generating {num_branched_confs} conformers for branched ligand sampling") + + # Generate confromers with coordinate map "fixed" + AllChem.EmbedMultipleConfs(mol_copy, numConfs=num_branched_confs, coordMap=coord_map, useBasicKnowledge=True) + + # Minimize energy of each conformer + logger.info("Minimizing energy of new conformers") + mp = AllChem.MMFFGetMoleculeProperties(mol_copy) + for conf_id in mol_copy.GetConformers(): + ff = AllChem.MMFFGetMoleculeForceField(mol_copy, mp, confId=conf_id.GetId()) + ff.Minimize() + + # Compute Crippen contributions for both molecules to align the generated conformers to each other + logger.info("Aligning molecules for branched ligand sampling") + ligand_crippen_contribs = Chem.rdMolDescriptors._CalcCrippenContribs(branching_ligand) + mol_crippen_contribs = Chem.rdMolDescriptors._CalcCrippenContribs(mol_copy) + # Align + for conf_id in mol_copy.GetConformers(): + o3a = Chem.rdMolAlign.GetCrippenO3A(mol_copy, branching_ligand, prbCrippenContribs=mol_crippen_contribs, refCrippenContribs=ligand_crippen_contribs, prbCid=conf_id.GetId()) + o3a.Align() + + mol_copy = Chem.RemoveHs(mol_copy) + branching_ligand = Chem.RemoveHs(branching_ligand) + + # Check for internal/external clashes + if mol_copy.GetNumConformers() == 0: + logger.error(f"Branching search generated no conformers. Moving onto next sampling function.") + if mol_copy.GetNumConformers() != 0: + logger.info("Checking for clashes") + # Store the coordinates of each conformer into numpy array + new_conformer = mol_copy.GetConformers() + new_coors = [] + for i, conformer in enumerate(new_conformer): + coords = conformer.GetPositions() + new_coors.append(coords) + + new_idx_set = [] + new_coor_set = [] + new_bs = [] + + # loop through each rdkit generated conformer + for idx, conf in enumerate(new_coors): + b = self._bs + self.ligand.coor = conf + self.ligand.b = [b[0]] + if self.options.external_clash: + if not self._cd(): + if new_idx_set: # if there are already conformers in new_idx_set + new_idx_set.append(idx) + new_coor_set.append(conf) + new_bs.append(b[0]) + else: + new_idx_set.append(idx) + new_coor_set.append(conf) + new_bs.append(b[0]) + elif not self.ligand.clashes(): + if new_idx_set: # if there are already conformers in new_idx_set + new_idx_set.append(idx) + new_coor_set.append(conf) + new_bs.append(b[0]) + else: + new_idx_set.append(idx) + new_coor_set.append(conf) + new_bs.append(b[0]) + + # Save new conformers to self + merged_arr = np.concatenate((self._coor_set, new_coor_set), axis=0) + merged_bs = np.concatenate((self._bs, new_bs), axis=0) + + self._coor_set = merged_arr + self._bs = merged_bs + logger.info(f"Branched search generated: {len(new_coor_set)} plausible conformers") + + return + + def long_chain_search(self): + """ + When ligands have long branches with a high number of internal degrees of freedom, a random sampling of the conformational space can lead to + wildly undersirable configurations (i.e. the generated long branches are not supported by the density). It is useful to implement distance constraints + for these sections. + """ + # Starting strucutre from PDB + ligand = Chem.MolFromPDBFile(self.ligand_pdb_file) + # Refenerce mol from smiles, used to assign correct bond order + ref_mol = Chem.MolFromSmiles(self.options.smiles) + ligand = Chem.AllChem.AssignBondOrdersFromTemplate(ref_mol, ligand) + + # Identify the side chain/branched sections of the ligand + side_chain = self.identify_core_and_sidechain(ligand) + side_chain_atoms = side_chain[0] + + ligand = Chem.AddHs(ligand) + # Set the coordinate map as the brnaches + coord_map = {idx: ligand.GetConformer().GetAtomPosition(idx) for idx in side_chain_atoms} + + # Create a copy of the 'ligand' object to generate conformers off of. They will later be aligned to 'ligand' object + mol = Chem.Mol(ligand) + logger.info(f"Generating {self.options.numConf} conformers for long chain search") + # Generate conformers + AllChem.EmbedMultipleConfs(mol, numConfs=self.num_conf_for_method, coordMap=coord_map, useBasicKnowledge=True) + + logger.info("Minimizing long chain conformers") + # Minimize the energy of each conformer to find most stable structure + mp = AllChem.MMFFGetMoleculeProperties(mol) + for conf_id in mol.GetConformers(): + ff = AllChem.MMFFGetMoleculeForceField(mol, mp, confId=conf_id.GetId()) + ff.Minimize() + + logger.info("Aligning long chain conformers") + ligand_crippen_contribs = Chem.rdMolDescriptors._CalcCrippenContribs(ligand) + mol_crippen_contribs = Chem.rdMolDescriptors._CalcCrippenContribs(mol) + + for conf_id in mol.GetConformers(): + o3a = Chem.rdMolAlign.GetCrippenO3A(mol, ligand, prbCrippenContribs=mol_crippen_contribs, refCrippenContribs=ligand_crippen_contribs, prbCid=conf_id.GetId()) + o3a.Align() + + mol = Chem.RemoveHs(mol) + ligand = Chem.RemoveHs(ligand) + + if mol.GetNumConformers() == 0: + logger.error(f"Long chain search generated no conformers. Moving onto next sampling function.") + if mol.GetNumConformers() != 0: + # Check for internal/external clashes + logger.info("Checking for clashes") + # Store the coordinates of each conformer into numpy array + new_conformer = mol.GetConformers() + new_coors = [] + for i, conformer in enumerate(new_conformer): + coords = conformer.GetPositions() + new_coors.append(coords) + + new_idx_set = [] + new_coor_set = [] + new_bs = [] + # loop through each rdkit generated conformer + for idx, conf in enumerate(new_coors): + b = self._bs + self.ligand.coor = conf + self.ligand.b = [b[0]] + # self._cd() + if self.options.external_clash: + if not self._cd(): + if new_idx_set: # if there are already conformers in new_idx_set + new_idx_set.append(idx) + new_coor_set.append(conf) + new_bs.append(b[0]) + else: + new_idx_set.append(idx) + new_coor_set.append(conf) + new_bs.append(b[0]) + + elif not self.ligand.clashes(): + if new_idx_set: # if there are already conformers in new_idx_set + new_idx_set.append(idx) + new_coor_set.append(conf) + new_bs.append(b[0]) + else: + new_idx_set.append(idx) + new_coor_set.append(conf) + new_bs.append(b[0]) + + # Save new conformers to self + merged_arr = np.concatenate((self._coor_set, new_coor_set), axis=0) + merged_bs = np.concatenate((self._bs, new_bs), axis=0) + + + self._coor_set = merged_arr + self._bs = merged_bs + + logger.info(f"Long chain search generated: {len(new_coor_set)} plausible conformers") + logger.info(f"bfactor shape = {np.shape(self._bs)}") + + + # logger.info(f"After long chain QP there are {len(self._coor_set)} conformers") + if len(self._coor_set) < 1: logger.warning( - f"{self.ligand.resn[0]}: " - f"QP search cluster {self._cluster_index} iteration {iteration}: " - f"{len(self._coor_set)} conformers" + f"RDKit conformers not sufficiently diverse. Generated: {len(self._coor_set)} conformers" ) return + + return - # MIQP score conformer occupancy - self._convert() - self._solve_miqp( - threshold=self.options.threshold, cardinality=self.options.cardinality - ) - self._update_conformers() - if self.options.write_intermediate_conformers: - self._write_intermediate_conformers( - prefix=f"sample_ligand_iter{iteration}_miqp" - ) + def rot_trans(self): + """ + Rotate and translate all conformers that pass QP scoring for further sampling of conformational space. Rotate (by default) 15 degrees by 5 degree increments in x, y, z directions + and translate 0.3 angstroms in x, y, z directions. + """ + + # Initialize empty list to store rotated/translated conformers + b-factors + extended_coor_set = [] + extended_bs = [] + rotated_coor_set = [] + rotated_bs = [] + new_coor_set = self._coor_set + new_bs = self._bs + + # rotations + for conf, b in zip(self._coor_set, self._bs): + # Apply rotations to each initial conformation + rotated_conformations = self.apply_rotations(conf, self.options.rot_range, self.options.rotation_step) + rotated_coor_set.extend(rotated_conformations) + rotated_bs.extend([b] * len(rotated_conformations)) # Extend b values for each rotated conformation + + # translations + for conf, b in zip(self._coor_set, self._bs): + # Apply translations to each conformation + translated_conformations = self.apply_translations(conf, self.options.trans_range) + extended_coor_set.extend(translated_conformations) + extended_bs.extend([b] * len(translated_conformations)) # Extend b values for each translated conformation + + + self._coor_set = np.concatenate((new_coor_set, rotated_coor_set, extended_coor_set), axis=0) + self._bs = np.concatenate((new_bs, rotated_bs, extended_bs), axis=0) + + logger.info(f"Trans/rot search generated: {len(self._coor_set)} plausible conformers") + logger.info(f"bfactor shape = {np.shape(self._bs)}") + + self._convert() + logger.info("Solving QP after trans and rot search.") + self._solve_qp() + logger.debug("Updating conformers after trans and rot search.") + self._update_conformers() + # self._write_intermediate_conformers(prefix="trans_rot_sol") - # Check if we are done - if end_bond_index == nbonds: - break + logger.info(f"After rotation and translation QP there are {len(self._coor_set)} conformers") - # Go to the next bonds to be sampled - if ( - iteration == 1 - and self.options.local_search - and self.options.dofs_per_iteration > 1 - ): - starting_bond_index += self.options.dofs_per_iteration - 1 - else: - starting_bond_index += self.options.dofs_per_iteration - iteration += 1 + + if len(self._coor_set) < 1: + logger.warning( + f"RDKit conformers not sufficiently diverse. Generated: {len(self._coor_set)} conformers" + ) + return class QFitCovalentLigand(_BaseQFit): diff --git a/src/qfit/qfit_ligand.py b/src/qfit/qfit_ligand.py index 1ce8a9cc..e33e9bf4 100644 --- a/src/qfit/qfit_ligand.py +++ b/src/qfit/qfit_ligand.py @@ -44,6 +44,51 @@ def build_argparser(): type=str, help="Chain, residue id, and optionally insertion code for residue in structure, e.g. A,105, or A,105:A.", ) + + # RDKit input options + p.add_argument( + "-sm", + "--smiles", + type=str, + help="SMILES string for molecule", + ) + p.add_argument( + "-nc", + "--numConf", + type=int, + default=10000, + help="Number of RDKit conformers to generate", + ) + + p.add_argument( + "-lb", + "--ligand_bic", + action="store_true", + help="Flag to run with ligand BIC on", + ) + + p.add_argument( + "-rr", + "--rot_range", + type=float, + default=15.0, + help="Rotation range for RDKit conformers", + ) + p.add_argument( + "-tr", + "--trans_range", + type=float, + default=0.3, + help="Translation range for RDKit conformers", + ) + + p.add_argument( + "-rs", + "--rotation_step", + type=float, + default=5.0, + help="Rotation step size for RDKit conformers", + ) # Map input options p.add_argument( @@ -131,22 +176,7 @@ def build_argparser(): default=True, help="Consider waters for soft clash detection", ) - - # Sampling options - p.add_argument( - "--build", - action=ToggleActionFlag, - dest="build", - default=True, - help="Build ligand", - ) - p.add_argument( - "--local", - action=ToggleActionFlag, - dest="local_search", - default=True, - help="Perform a local search", - ) + p.add_argument( "--remove-conformers-below-cutoff", action="store_true", @@ -167,10 +197,10 @@ def build_argparser(): ) p.add_argument( "-ec", - "--external-clash", - action="store_true", + "--no-external-clash", + action="store_false", dest="external_clash", - help="Enable external clash detection during sampling", + help="Turn off external clash detection during sampling", ) p.add_argument( "-bs", @@ -180,22 +210,7 @@ def build_argparser(): type=float, help="Bulk solvent level in absolute values", ) - p.add_argument( - "-b", - "--dofs-per-iteration", - default=2, - metavar="", - type=int, - help="Number of internal degrees that are sampled/built per iteration", - ) - p.add_argument( - "-s", - "--dihedral-stepsize", - default=10, - metavar="", - type=float, - help="Stepsize for dihedral angle sampling in degrees", - ) + p.add_argument( "-c", "--cardinality", @@ -276,7 +291,9 @@ def build_argparser(): type=os.path.abspath, help="Directory to store results", ) - p.add_argument("-v", "--verbose", action="store_true", help="Be verbose") + p.add_argument( + "-v", "--verbose", action="store_true", help="Be verbose" + ) p.add_argument( "--debug", action="store_true", help="Log as much information as possible" ) @@ -355,6 +372,17 @@ def prepare_qfit_ligand(options): ligand.altloc = "" ligand.q = 1 + # save ligand pdb file to working directory + try: + os.makedirs(options.directory) + except OSError: + pass + + input_ligand = os.path.join( + options.directory, "ligand.pdb" + ) + ligand.tofile(input_ligand) + logger.info("Ligand atoms selected: {natoms}".format(natoms=ligand.natoms)) # Load and process the electron density map: @@ -389,7 +417,7 @@ def prepare_qfit_ligand(options): ) # this should be an option xmap.tofile(scaled_fname) - return QFitLigand(ligand, structure, xmap, options), chainid, resi, icode, receptor + return QFitLigand(ligand, receptor, xmap, options), chainid, resi, icode, receptor def main(): diff --git a/tests/test_qfit_ligand.py b/tests/test_qfit_ligand.py index c54c0cbb..58a904d7 100644 --- a/tests/test_qfit_ligand.py +++ b/tests/test_qfit_ligand.py @@ -28,6 +28,10 @@ def mock_main(self): "-l", "2FOFCWT,PH2FOFCWT", "B, 801", # selection + "-sm", + "CS(=O)CC(=N)NCCCC(C(=O)O)N", + "-nc", + "1000" ] # TODO: Add options to reduce computational load