Skip to content

Commit

Permalink
Merge pull request #419 from ExcitedStates/split_qp_3000
Browse files Browse the repository at this point in the history
split QP into 3000 conformers
  • Loading branch information
Stephanie (Mullane) Wankowicz committed Apr 25, 2024
2 parents 474fa9c + 493d4dc commit b2a7425
Show file tree
Hide file tree
Showing 2 changed files with 111 additions and 157 deletions.
225 changes: 86 additions & 139 deletions src/qfit/qfit.py
Original file line number Diff line number Diff line change
Expand Up @@ -642,17 +642,8 @@ def _setup_clash_detector(self):

def run(self):
if self.options.sample_backbone:
for altloc in self.options.backbone_coor_dict["coords"]:
self.residue.coor = self.options.backbone_coor_dict["coords"][altloc]
if self.residue.resn[0] == "GLY":
atom = self.residue.extract("name", "O")
else:
atom = self.residue.extract("name", "CB")
if self.options.backbone_coor_dict["u_matrices"][altloc] is not None:
self.u_matrix = self.options.backbone_coor_dict["u_matrices"][
altloc
]
self._sample_backbone()
self._sample_backbone()

if self.options.sample_angle:
self._sample_angle()

Expand Down Expand Up @@ -729,18 +720,17 @@ def _sample_backbone(self):

# Determine directions for backbone sampling
atom = self.residue.extract("name", atom_name)

try:
if not self.u_matrix:
self.u_matrix = [
[atom.u00[0], atom.u01[0], atom.u02[0]],
[atom.u01[0], atom.u11[0], atom.u12[0]],
[atom.u02[0], atom.u12[0], atom.u22[0]],
]
directions = adp_ellipsoid_axes(self.u_matrix)
logger.debug(f"[_sample_backbone] u_matrix = {self.u_matrix}")
u_matrix = [
[atom.u00[0], atom.u01[0], atom.u02[0]],
[atom.u01[0], atom.u11[0], atom.u12[0]],
[atom.u02[0], atom.u12[0], atom.u22[0]],
]
directions = adp_ellipsoid_axes(u_matrix)
logger.debug(f"[_sample_backbone] u_matrix = {u_matrix}")
logger.debug(f"[_sample_backbone] directions = {directions}")
except AttributeError:
logger.debug(
logger.info(
f"[{self.identifier}] Got AttributeError for directions at Cβ. Treating as isotropic B, using Cβ-Cα, C-N,(Cβ-Cα × C-N) vectors."
)
# Choose direction vectors as Cβ-Cα, C-N, and then (Cβ-Cα × C-N)
Expand Down Expand Up @@ -777,55 +767,55 @@ def _sample_backbone(self):
self._bs.append(self.conformer.b)
return

# Retrieve the amplitudes and stepsizes from options.
sigma = self.options.sample_backbone_sigma
bba, bbs = (
self.options.sample_backbone_amplitude,
self.options.sample_backbone_step,
)
assert bba >= bbs > 0

# Create an array of amplitudes to scan:
# We start from stepsize, making sure to stop only after bba.
# Also include negative amplitudes.
eps = ((bba / bbs) / 2) * np.finfo(
float
).epsneg # ε to avoid FP errors in arange
amplitudes = np.arange(start=bbs, stop=bba + bbs - eps, step=bbs)
amplitudes = np.concatenate([-amplitudes[::-1], amplitudes])

# Optimize in torsion space to achieve the target atom position
optimizer = NullSpaceOptimizer(segment)
start_coor = atom.coor[0] # We are working on a single atom.
torsion_solutions = []
for amplitude, direction in itertools.product(amplitudes, directions):
endpoint = start_coor + amplitude * direction
optimize_result = optimizer.optimize(atom_name, endpoint)
torsion_solutions.append(optimize_result["x"])

# Capture starting coordinates for the segment, so that we can restart after every rotator
starting_coor = segment.coor
for solution in torsion_solutions:
optimizer.rotator(solution)
self._coor_set.append(self.segment[index].coor)
self._bs.append(self.conformer.b)
segment.coor = starting_coor
# Retrieve the amplitudes and stepsizes from options.
sigma = self.options.sample_backbone_sigma
bba, bbs = (
self.options.sample_backbone_amplitude,
self.options.sample_backbone_step,
)
assert bba >= bbs > 0

# Create an array of amplitudes to scan:
# We start from stepsize, making sure to stop only after bba.
# Also include negative amplitudes.
eps = ((bba / bbs) / 2) * np.finfo(
float
).epsneg # ε to avoid FP errors in arange
amplitudes = np.arange(start=bbs, stop=bba + bbs - eps, step=bbs)
amplitudes = np.concatenate([-amplitudes[::-1], amplitudes])

# Optimize in torsion space to achieve the target atom position
optimizer = NullSpaceOptimizer(segment)
start_coor = atom.coor[0] # We are working on a single atom.
torsion_solutions = []
for amplitude, direction in itertools.product(amplitudes, directions):
endpoint = start_coor + amplitude * direction
optimize_result = optimizer.optimize(atom_name, endpoint)
torsion_solutions.append(optimize_result["x"])

logger.debug(
f"[_sample_backbone] Backbone sampling generated {len(self._coor_set)} conformers."
# Capture starting coordinates for the segment, so that we can restart after every rotator
starting_coor = segment.coor
for solution in torsion_solutions:
optimizer.rotator(solution)
self._coor_set.append(self.segment[index].coor)
self._bs.append(self.conformer.b)
segment.coor = starting_coor

logger.debug(
f"[_sample_backbone] Backbone sampling generated {len(self._coor_set)} conformers."
)
if self.options.write_intermediate_conformers:
self._write_intermediate_conformers(
prefix=f"sample_backbone_segment{index:03d}"
)
if self.options.write_intermediate_conformers:
self._write_intermediate_conformers(
prefix=f"sample_backbone_segment{index:03d}"
)

def _sample_angle(self):
"""Sample residue conformations by flexing α-β-γ angle.
Only operates on residues with large aromatic sidechains
(Trp, Tyr, Phe, His) where CG is a member of the aromatic ring.
Here, slight deflections of the ring are likely to lead to better-
scoring conformers when we scan χ(Cα-Cβ) and χ(Cβ-Cγ) later.
scoring conformers when we scan χ(Cα-Cβ) and χ(Cβ-Cγ).
This angle does not exist in {Gly, Ala}, and it does not make sense to
sample this angle in Pro.
Expand Down Expand Up @@ -865,28 +855,37 @@ def _sample_angle(self):
new_bs = []
for coor in self._coor_set:
self.residue.coor = coor
rotator = CBAngleRotator(self.residue)
for angle in angles:
rotator(angle)
coor = self.residue.coor

# Move on if these coordinates are unsupported by density
if self.options.remove_conformers_below_cutoff:
values = self.xmap.interpolate(coor[active_mask])
mask = self.residue.e[active_mask] != "H"
if np.min(values[mask]) < self.options.density_cutoff:
continue
# Initialize rotator
perp_rotator = CBAngleRotator(self.residue)
# Rotate about the axis perpendicular to CB-CA and CB-CG vectors
for perp_angle in angles:
perp_rotator(perp_angle)
coor_rotated = self.residue.coor
# Initialize rotator
bisec_rotator = BisectingAngleRotator(self.residue)
# Rotate about the axis bisecting the CA-CA-CG angle for each angle you sample across the perpendicular axis
for bisec_angle in angles:
self.residue.coor = coor_rotated # Ensure that the second rotation is applied to the updated coordinates from first rotation
bisec_rotator(bisec_angle)
coor = self.residue.coor

# Move on if these coordinates are unsupported by density
if self.options.remove_conformers_below_cutoff:
values = self.xmap.interpolate(coor[active_mask])
mask = self.residue.e[active_mask] != "H"
if np.min(values[mask]) < self.options.density_cutoff:
continue

# Move on if these coordinates cause a clash
if self.options.external_clash:
if self._cd() and self.residue.clashes():
# Move on if these coordinates cause a clash
if self.options.external_clash:
if self._cd() and self.residue.clashes():
continue
elif self.residue.clashes():
continue
elif self.residue.clashes():
continue

# Valid, non-clashing conformer found!
new_coor_set.append(self.residue.coor)
new_bs.append(self.conformer.b)
# Valid, non-clashing conformer found!
new_coor_set.append(self.residue.coor)
new_bs.append(self.conformer.b)

# Update sampled coords
self._coor_set = new_coor_set
Expand Down Expand Up @@ -1046,65 +1045,13 @@ def _sample_sidechain(self):
prefix=f"sample_sidechain_iter{iteration}"
)

if len(self._coor_set) <= 15000:
# If <15000 conformers are generated, QP score conformer occupancy normally
self._convert()
self._solve_qp()
self._update_conformers()
if self.options.write_intermediate_conformers:
self._write_intermediate_conformers(
prefix=f"sample_sidechain_iter{iteration}_qp"
)
if len(self._coor_set) > 15000:
# If >15000 conformers are generated, split the QP conformer scoring into two
temp_coor_set = self._coor_set
temp_bs = self._bs

# Splitting the arrays into two sections
half_index = len(temp_coor_set) // 2 # Integer division for splitting
section_1_coor = temp_coor_set[:half_index]
section_1_bs = temp_bs[:half_index]
section_2_coor = temp_coor_set[half_index:]
section_2_bs = temp_bs[half_index:]

# Process the first section
self._coor_set = section_1_coor
self._bs = section_1_bs

# QP score the first section
self._convert()
self._solve_qp()
self._update_conformers()
if self.options.write_intermediate_conformers:
self._write_intermediate_conformers(
prefix=f"sample_sidechain_iter{iteration}_qp"
)

# Save results from the first section
qp_temp_coor = self._coor_set
qp_temp_bs = self._bs

# Process the second section
self._coor_set = section_2_coor
self._bs = section_2_bs

# QP score the second section
self._convert()
self._solve_qp()
self._update_conformers()
if self.options.write_intermediate_conformers:
self._write_intermediate_conformers(
prefix=f"sample_sidechain_iter{iteration}_qp"
)

# Save results from the second section
qp_2_temp_coor = self._coor_set
qp_2_temp_bs = self._bs

# Concatenate the results from both sections
self._coor_set = np.concatenate((qp_temp_coor, qp_2_temp_coor), axis=0)
self._bs = np.concatenate((qp_temp_bs, qp_2_temp_bs), axis=0)

self._convert()
self._solve_qp()
self._update_conformers()
if self.options.write_intermediate_conformers:
self._write_intermediate_conformers(
prefix=f"sample_sidechain_iter{iteration}_qp"
)
# MIQP score conformer occupancy
self.sample_b()
self._convert()
Expand Down
43 changes: 25 additions & 18 deletions src/qfit/solvers.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,13 +160,14 @@ def __init__(self, target, models, nthreads=1):
self.quad_obj = None
self.lin_obj = None

self.weights = None
self._objective_value = None

self.nconformers = models.shape[0]
self.valid_indices = []
self.redundant_indices = []

self._weights = None
self._objective_value = 0
self.weights = None

def find_redundant_conformers(self, threshold=1e-6):
for i in range(self.nconformers):
if i in self.redundant_indices:
Expand Down Expand Up @@ -200,6 +201,7 @@ def construct_weights(self):
self.weights.append(self._weights[j])
j += 1
self.weights = np.array(self.weights)
self.objective_value = self._objective_value
assert len(self.weights) == self.nconformers

def solve_miqp(self, threshold=0, cardinality=0):
Expand All @@ -225,27 +227,32 @@ def solve_miqp(self, threshold=0, cardinality=0):
self.objective_value = prob.value
# I'm not sure why objective_values is calculated this way, but doing
# so to be compatible with the former CPLEXSolver class
self.objective_value = 2 * prob.value + self.target.T @ self.target
self._objective_value = 2 * prob.value + self.target.T @ self.target
self._weights = w.value
self.construct_weights()

def solve_qp(self):
def solve_qp(self, split_threshold=3000):
if self.quad_obj is None or self.lin_obj is None:
self.compute_quadratic_coeffs()

m = len(self.valid_indices)
P = self.quad_obj
q = self.lin_obj
w = cp.Variable(m)
objective = cp.Minimize(0.5 * cp.quad_form(w, cp.psd_wrap(P)) + q.T @ w)
constraints = [w >= np.zeros(m), np.ones(m).T @ w <= 1]
prob = cp.Problem(objective, constraints)
prob.solve()
self.objective_value = prob.value
# I'm not sure why objective_values is calculated this way, but doing
# so to be compatible with the former CPLEXSolver class
self.objective_value = 2 * prob.value + self.target.T @ self.target
self._weights = w.value
valid_conformers = len(self.valid_indices)
self._weights = np.zeros(valid_conformers)
splits = valid_conformers // split_threshold + 1 # number of splits
for split in range(splits):
# take every splits-th element with split as an offset, guaranteeing full coverage
P = self.quad_obj[split::splits, split::splits]
q = self.lin_obj[split::splits]
m = len(P)
w = cp.Variable(m)
objective = cp.Minimize(0.5 * cp.quad_form(w, cp.psd_wrap(P)) + q.T @ w)
constraints = [w >= np.zeros(m), np.ones(m).T @ w <= 1]
prob = cp.Problem(objective, constraints)
prob.solve()
# I'm not sure why objective_values is calculated this way, but doing
# so to be compatible with the former CPLEXSolver class
self._objective_value += 2 * prob.value + self.target.T @ self.target
self._objective_value /= splits
self._weights[split::splits] = w.value / splits
self.construct_weights()


Expand Down

0 comments on commit b2a7425

Please sign in to comment.