Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow qFit to run with multiple backbone inputs #382

Merged
merged 14 commits into from
Jan 29, 2024
168 changes: 84 additions & 84 deletions src/qfit/qfit.py
Original file line number Diff line number Diff line change
Expand Up @@ -644,8 +644,15 @@ def _setup_clash_detector(self):

def run(self):
if self.options.sample_backbone:
self._sample_backbone()

for altloc in self.options.backbone_coor_dict['coords']:
self.residue.coor = self.options.backbone_coor_dict['coords'][altloc]
if self.residue.resn[0] == 'GLY':
atom = self.residue.extract("name", "O")
else:
atom = self.residue.extract("name", "CB")
if self.options.backbone_coor_dict['u_matrices'][altloc] is not None:
self.u_matrix = self.options.backbone_coor_dict['u_matrices'][altloc]
self._sample_backbone()
if self.options.sample_angle:
self._sample_angle()

Expand Down Expand Up @@ -703,6 +710,7 @@ def run(self):
)

def _sample_backbone(self):

# Check if residue has enough neighboring residues
index = self.segment.find(self.residue.id)
# active = self.residue.active
Expand All @@ -722,17 +730,18 @@ def _sample_backbone(self):

# Determine directions for backbone sampling
atom = self.residue.extract("name", atom_name)

try:
u_matrix = [
[atom.u00[0], atom.u01[0], atom.u02[0]],
[atom.u01[0], atom.u11[0], atom.u12[0]],
[atom.u02[0], atom.u12[0], atom.u22[0]],
]
directions = adp_ellipsoid_axes(u_matrix)
logger.debug(f"[_sample_backbone] u_matrix = {u_matrix}")
logger.debug(f"[_sample_backbone] directions = {directions}")
if not self.u_matrix:
self.u_matrix = [
[atom.u00[0], atom.u01[0], atom.u02[0]],
[atom.u01[0], atom.u11[0], atom.u12[0]],
[atom.u02[0], atom.u12[0], atom.u22[0]],
]
directions = adp_ellipsoid_axes(self.u_matrix)
logger.debug(f"[_sample_backbone] u_matrix = {self.u_matrix}")
except AttributeError:
logger.info(
logger.debug(
f"[{self.identifier}] Got AttributeError for directions at Cβ. Treating as isotropic B, using Cβ-Cα, C-N,(Cβ-Cα × C-N) vectors."
)
# Choose direction vectors as Cβ-Cα, C-N, and then (Cβ-Cα × C-N)
Expand Down Expand Up @@ -766,56 +775,56 @@ def _sample_backbone(self):
self._coor_set.append(self.segment[index].coor)
self._bs.append(self.conformer.b)
return

# Retrieve the amplitudes and stepsizes from options.
sigma = self.options.sample_backbone_sigma
bba, bbs = (
self.options.sample_backbone_amplitude,
self.options.sample_backbone_step,
)
assert bba >= bbs > 0

# Create an array of amplitudes to scan:
# We start from stepsize, making sure to stop only after bba.
# Also include negative amplitudes.
eps = ((bba / bbs) / 2) * np.finfo(
float
).epsneg # ε to avoid FP errors in arange
amplitudes = np.arange(start=bbs, stop=bba + bbs - eps, step=bbs)
amplitudes = np.concatenate([-amplitudes[::-1], amplitudes])

# Optimize in torsion space to achieve the target atom position
optimizer = NullSpaceOptimizer(segment)
start_coor = atom.coor[0] # We are working on a single atom.
torsion_solutions = []
for amplitude, direction in itertools.product(amplitudes, directions):
endpoint = start_coor + amplitude * direction
optimize_result = optimizer.optimize(atom_name, endpoint)
torsion_solutions.append(optimize_result["x"])

# Capture starting coordinates for the segment, so that we can restart after every rotator
starting_coor = segment.coor
for solution in torsion_solutions:
optimizer.rotator(solution)
self._coor_set.append(self.segment[index].coor)
self._bs.append(self.conformer.b)
segment.coor = starting_coor

logger.debug(
f"[_sample_backbone] Backbone sampling generated {len(self._coor_set)} conformers."
)
if self.options.write_intermediate_conformers:
self._write_intermediate_conformers(
prefix=f"sample_backbone_segment{index:03d}"

# Retrieve the amplitudes and stepsizes from options.
sigma = self.options.sample_backbone_sigma
bba, bbs = (
self.options.sample_backbone_amplitude,
self.options.sample_backbone_step,
)
assert bba >= bbs > 0

# Create an array of amplitudes to scan:
# We start from stepsize, making sure to stop only after bba.
# Also include negative amplitudes.
eps = ((bba / bbs) / 2) * np.finfo(
float
).epsneg # ε to avoid FP errors in arange
amplitudes = np.arange(start=bbs, stop=bba + bbs - eps, step=bbs)
amplitudes = np.concatenate([-amplitudes[::-1], amplitudes])

# Optimize in torsion space to achieve the target atom position
optimizer = NullSpaceOptimizer(segment)
start_coor = atom.coor[0] # We are working on a single atom.
torsion_solutions = []
for amplitude, direction in itertools.product(amplitudes, directions):
endpoint = start_coor + amplitude * direction
optimize_result = optimizer.optimize(atom_name, endpoint)
torsion_solutions.append(optimize_result["x"])

# Capture starting coordinates for the segment, so that we can restart after every rotator
starting_coor = segment.coor
for solution in torsion_solutions:
optimizer.rotator(solution)
self._coor_set.append(self.segment[index].coor)
self._bs.append(self.conformer.b)
segment.coor = starting_coor

logger.debug(
f"[_sample_backbone] Backbone sampling generated {len(self._coor_set)} conformers."
)
if self.options.write_intermediate_conformers:
self._write_intermediate_conformers(
prefix=f"sample_backbone_segment{index:03d}"
)

def _sample_angle(self):
"""Sample residue conformations by flexing α-β-γ angle.

Only operates on residues with large aromatic sidechains
(Trp, Tyr, Phe, His) where CG is a member of the aromatic ring.
Here, slight deflections of the ring are likely to lead to better-
scoring conformers when we scan χ(Cα-Cβ) and χ(Cβ-Cγ).
scoring conformers when we scan χ(Cα-Cβ) and χ(Cβ-Cγ) later.

This angle does not exist in {Gly, Ala}, and it does not make sense to
sample this angle in Pro.
Expand Down Expand Up @@ -855,45 +864,36 @@ def _sample_angle(self):
new_bs = []
for coor in self._coor_set:
self.residue.coor = coor
# Initialize rotator
perp_rotator = CBAngleRotator(self.residue)
# Rotate about the axis perpendicular to CB-CA and CB-CG vectors
for perp_angle in angles:
perp_rotator(perp_angle)
coor_rotated = self.residue.coor
# Initialize rotator
bisec_rotator = BisectingAngleRotator(self.residue)
# Rotate about the axis bisecting the CA-CA-CG angle for each angle you sample across the perpendicular axis
for bisec_angle in angles:
self.residue.coor = coor_rotated # Ensure that the second rotation is applied to the updated coordinates from first rotation
bisec_rotator(bisec_angle)
coor = self.residue.coor

# Move on if these coordinates are unsupported by density
if self.options.remove_conformers_below_cutoff:
values = self.xmap.interpolate(coor[active_mask])
mask = self.residue.e[active_mask] != "H"
if np.min(values[mask]) < self.options.density_cutoff:
continue

# Move on if these coordinates cause a clash
if self.options.external_clash:
if self._cd() and self.residue.clashes():
continue
elif self.residue.clashes():
rotator = CBAngleRotator(self.residue)
for angle in angles:
rotator(angle)
coor = self.residue.coor

# Move on if these coordinates are unsupported by density
if self.options.remove_conformers_below_cutoff:
values = self.xmap.interpolate(coor[active_mask])
mask = self.residue.e[active_mask] != "H"
if np.min(values[mask]) < self.options.density_cutoff:
continue

# Valid, non-clashing conformer found!
new_coor_set.append(self.residue.coor)
new_bs.append(self.conformer.b)

# Move on if these coordinates cause a clash
if self.options.external_clash:
if self._cd() and self.residue.clashes():
continue
elif self.residue.clashes():
continue

# Valid, non-clashing conformer found!
new_coor_set.append(self.residue.coor)
new_bs.append(self.conformer.b)

# Update sampled coords
self._coor_set = new_coor_set
self._bs = new_bs
logger.debug(f"Bond angle sampling generated {len(self._coor_set)} conformers.")
if self.options.write_intermediate_conformers:
self._write_intermediate_conformers(prefix=f"sample_angle")

def _sample_sidechain(self):
opt = self.options
start_chi_index = 1
Expand Down
41 changes: 39 additions & 2 deletions src/qfit/qfit_protein.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import os.path
import os
import sys
import numpy as np
import time
import argparse
from .custom_argparsers import (
Expand Down Expand Up @@ -420,7 +421,10 @@ def run(self):
self.pdb = self.options.pdb + "_"
else:
self.pdb = ""

# Get information about all backbone atoms. If the input structure has multiple backbones, we will initiate backbone sampling (and all residue sampling) using all backbone coordinates
if self.options.residue is not None:
chainid, resi = self.options.residue.split(",")

if self.options.residue is not None: #run qFit residue
multiconformer = self._run_qfit_residue_parallel()
elif self.options.only_segment:
Expand Down Expand Up @@ -460,6 +464,7 @@ def _run_qfit_residue_parallel(self):
waters = waters.extract("resn", "HOH", "==")
hetatms = hetatms.combine(waters)


# Create a list of residues from single conformations of proteinaceous residues.
# If we were to loop over all single_conformer_residues, then we end up adding HETATMs in two places
# First as we combine multiconformer_residues into multiconformer_model (because they won't be in ROTAMERS)
Expand Down Expand Up @@ -489,6 +494,31 @@ def _run_qfit_residue_parallel(self):
.single_conformer_residues
)

backbone_coor_dict = {}
for residue in residues:
grouped_coords = {}
grouped_u_matrices = {}
residue_chain_key = (residue.resi[0], residue.chain[0].replace('"', ''))
if residue_chain_key not in backbone_coor_dict:
backbone_coor_dict[residue_chain_key] = {}
for altloc in np.unique(self.structure.extract("chain", residue.chain[0], "==").extract("resi", residue.resi[0], "==").altloc):
grouped_coords[altloc] = self.structure.extract("chain", residue.chain[0], "==").extract("resi", residue.resi[0], "==").extract("altloc", altloc).coor
if residue.resn[0] == "GLY":
atom_name = "O"
else:
atom_name = "CB"
atom = self.structure.extract("chain", residue.chain[0], "==").extract("resi", residue.resi[0], "==").extract("name", atom_name).extract("altloc", altloc)
try:
u_matrix = [
[atom.u00[0], atom.u01[0], atom.u02[0]],
[atom.u01[0], atom.u11[0], atom.u12[0]],
[atom.u02[0], atom.u12[0], atom.u22[0]],
]
except:
u_matrix = None
grouped_u_matrices[altloc] = u_matrix
backbone_coor_dict[residue_chain_key]['coords'] = grouped_coords
backbone_coor_dict[residue_chain_key]['u_matrices'] = grouped_u_matrices
# Filter the residues: take only those not containing checkpoints.
def does_multiconformer_checkpoint_exist(residue):
fname = os.path.join(
Expand Down Expand Up @@ -553,6 +583,7 @@ def _error_cb(e):
"xmap": self.get_map_around_substructure(residue),
"options": self.options,
"logqueue": logqueue,
"backbone_coor_dict": backbone_coor_dict
},
callback=_cb,
error_callback=_error_cb,
Expand All @@ -579,6 +610,7 @@ def _error_cb(e):
xmap=self.get_map_around_substructure(residue),
options=self.options,
logqueue=logqueue,
backbone_coor_dict=backbone_coor_dict
)
_cb(result)
except Exception as e:
Expand Down Expand Up @@ -738,7 +770,7 @@ def _create_refine_restraints(self, multiconformer):
f.close()

@staticmethod
def _run_qfit_residue(residue, structure, xmap, options, logqueue):
def _run_qfit_residue(residue, structure, xmap, options, logqueue, backbone_coor_dict):
"""Run qfit on a single residue to determine density-supported conformers."""

# Don't run qfit if we have a ligand or water
Expand Down Expand Up @@ -847,6 +879,11 @@ def _run_qfit_residue(residue, structure, xmap, options, logqueue):
sel_str = f"not ({sel_str})"
structure_new = structure_new.extract(sel_str)

#add multiple backbone positions

chain_resi_id= f"{resi}, '{chainid}'"
chain_resi_id = (resi, chainid)
options.backbone_coor_dict = backbone_coor_dict[chain_resi_id]
# Exception handling in case qFit-residue fails:
qfit = QFitRotamericResidue(residue, structure_new, xmap, options)
try:
Expand Down