Merge pull request #419 from ExcitedStates/split_qp_3000

split QP into 3000 conformers
ExcitedStates · Apr 25, 2024 · b2a7425 · b2a7425
2 parents 474fa9c + 493d4dc
commit b2a7425
Show file tree

Hide file tree

Showing 2 changed files with 111 additions and 157 deletions.
diff --git a/src/qfit/qfit.py b/src/qfit/qfit.py
@@ -642,17 +642,8 @@ def _setup_clash_detector(self):
 
     def run(self):
         if self.options.sample_backbone:
-            for altloc in self.options.backbone_coor_dict["coords"]:
-                self.residue.coor = self.options.backbone_coor_dict["coords"][altloc]
-                if self.residue.resn[0] == "GLY":
-                    atom = self.residue.extract("name", "O")
-                else:
-                    atom = self.residue.extract("name", "CB")
-                if self.options.backbone_coor_dict["u_matrices"][altloc] is not None:
-                    self.u_matrix = self.options.backbone_coor_dict["u_matrices"][
-                        altloc
-                    ]
-                self._sample_backbone()
+            self._sample_backbone()
+
         if self.options.sample_angle:
             self._sample_angle()
 
@@ -729,18 +720,17 @@ def _sample_backbone(self):
 
         # Determine directions for backbone sampling
         atom = self.residue.extract("name", atom_name)
-
         try:
-            if not self.u_matrix:
-                self.u_matrix = [
-                    [atom.u00[0], atom.u01[0], atom.u02[0]],
-                    [atom.u01[0], atom.u11[0], atom.u12[0]],
-                    [atom.u02[0], atom.u12[0], atom.u22[0]],
-                ]
-            directions = adp_ellipsoid_axes(self.u_matrix)
-            logger.debug(f"[_sample_backbone] u_matrix = {self.u_matrix}")
+            u_matrix = [
+                [atom.u00[0], atom.u01[0], atom.u02[0]],
+                [atom.u01[0], atom.u11[0], atom.u12[0]],
+                [atom.u02[0], atom.u12[0], atom.u22[0]],
+            ]
+            directions = adp_ellipsoid_axes(u_matrix)
+            logger.debug(f"[_sample_backbone] u_matrix = {u_matrix}")
+            logger.debug(f"[_sample_backbone] directions = {directions}")
         except AttributeError:
-            logger.debug(
+            logger.info(
                 f"[{self.identifier}] Got AttributeError for directions at Cβ. Treating as isotropic B, using Cβ-Cα, C-N,(Cβ-Cα × C-N) vectors."
             )
             # Choose direction vectors as Cβ-Cα, C-N, and then (Cβ-Cα × C-N)
@@ -777,55 +767,55 @@ def _sample_backbone(self):
                     self._bs.append(self.conformer.b)
                     return
 
-            # Retrieve the amplitudes and stepsizes from options.
-            sigma = self.options.sample_backbone_sigma
-            bba, bbs = (
-                self.options.sample_backbone_amplitude,
-                self.options.sample_backbone_step,
-            )
-            assert bba >= bbs > 0
-
-            # Create an array of amplitudes to scan:
-            #   We start from stepsize, making sure to stop only after bba.
-            #   Also include negative amplitudes.
-            eps = ((bba / bbs) / 2) * np.finfo(
-                float
-            ).epsneg  # ε to avoid FP errors in arange
-            amplitudes = np.arange(start=bbs, stop=bba + bbs - eps, step=bbs)
-            amplitudes = np.concatenate([-amplitudes[::-1], amplitudes])
-
-            # Optimize in torsion space to achieve the target atom position
-            optimizer = NullSpaceOptimizer(segment)
-            start_coor = atom.coor[0]  # We are working on a single atom.
-            torsion_solutions = []
-            for amplitude, direction in itertools.product(amplitudes, directions):
-                endpoint = start_coor + amplitude * direction
-                optimize_result = optimizer.optimize(atom_name, endpoint)
-                torsion_solutions.append(optimize_result["x"])
-
-            # Capture starting coordinates for the segment, so that we can restart after every rotator
-            starting_coor = segment.coor
-            for solution in torsion_solutions:
-                optimizer.rotator(solution)
-                self._coor_set.append(self.segment[index].coor)
-                self._bs.append(self.conformer.b)
-                segment.coor = starting_coor
+        # Retrieve the amplitudes and stepsizes from options.
+        sigma = self.options.sample_backbone_sigma
+        bba, bbs = (
+            self.options.sample_backbone_amplitude,
+            self.options.sample_backbone_step,
+        )
+        assert bba >= bbs > 0
+
+        # Create an array of amplitudes to scan:
+        #   We start from stepsize, making sure to stop only after bba.
+        #   Also include negative amplitudes.
+        eps = ((bba / bbs) / 2) * np.finfo(
+            float
+        ).epsneg  # ε to avoid FP errors in arange
+        amplitudes = np.arange(start=bbs, stop=bba + bbs - eps, step=bbs)
+        amplitudes = np.concatenate([-amplitudes[::-1], amplitudes])
+
+        # Optimize in torsion space to achieve the target atom position
+        optimizer = NullSpaceOptimizer(segment)
+        start_coor = atom.coor[0]  # We are working on a single atom.
+        torsion_solutions = []
+        for amplitude, direction in itertools.product(amplitudes, directions):
+            endpoint = start_coor + amplitude * direction
+            optimize_result = optimizer.optimize(atom_name, endpoint)
+            torsion_solutions.append(optimize_result["x"])
 
-            logger.debug(
-                f"[_sample_backbone] Backbone sampling generated {len(self._coor_set)} conformers."
+        # Capture starting coordinates for the segment, so that we can restart after every rotator
+        starting_coor = segment.coor
+        for solution in torsion_solutions:
+            optimizer.rotator(solution)
+            self._coor_set.append(self.segment[index].coor)
+            self._bs.append(self.conformer.b)
+            segment.coor = starting_coor
+
+        logger.debug(
+            f"[_sample_backbone] Backbone sampling generated {len(self._coor_set)} conformers."
+        )
+        if self.options.write_intermediate_conformers:
+            self._write_intermediate_conformers(
+                prefix=f"sample_backbone_segment{index:03d}"
             )
-            if self.options.write_intermediate_conformers:
-                self._write_intermediate_conformers(
-                    prefix=f"sample_backbone_segment{index:03d}"
-                )
 
     def _sample_angle(self):
         """Sample residue conformations by flexing α-β-γ angle.
 
         Only operates on residues with large aromatic sidechains
             (Trp, Tyr, Phe, His) where CG is a member of the aromatic ring.
         Here, slight deflections of the ring are likely to lead to better-
-            scoring conformers when we scan χ(Cα-Cβ) and χ(Cβ-Cγ) later.
+            scoring conformers when we scan χ(Cα-Cβ) and χ(Cβ-Cγ).
 
         This angle does not exist in {Gly, Ala}, and it does not make sense to
             sample this angle in Pro.
@@ -865,28 +855,37 @@ def _sample_angle(self):
         new_bs = []
         for coor in self._coor_set:
             self.residue.coor = coor
-            rotator = CBAngleRotator(self.residue)
-            for angle in angles:
-                rotator(angle)
-                coor = self.residue.coor
-
-                # Move on if these coordinates are unsupported by density
-                if self.options.remove_conformers_below_cutoff:
-                    values = self.xmap.interpolate(coor[active_mask])
-                    mask = self.residue.e[active_mask] != "H"
-                    if np.min(values[mask]) < self.options.density_cutoff:
-                        continue
+            # Initialize rotator
+            perp_rotator = CBAngleRotator(self.residue)
+            # Rotate about the axis perpendicular to CB-CA and CB-CG vectors
+            for perp_angle in angles:
+                perp_rotator(perp_angle)
+                coor_rotated = self.residue.coor
+                # Initialize rotator
+                bisec_rotator = BisectingAngleRotator(self.residue)
+                # Rotate about the axis bisecting the CA-CA-CG angle for each angle you sample across the perpendicular axis
+                for bisec_angle in angles:
+                    self.residue.coor = coor_rotated  # Ensure that the second rotation is applied to the updated coordinates from first rotation
+                    bisec_rotator(bisec_angle)
+                    coor = self.residue.coor
+
+                    # Move on if these coordinates are unsupported by density
+                    if self.options.remove_conformers_below_cutoff:
+                        values = self.xmap.interpolate(coor[active_mask])
+                        mask = self.residue.e[active_mask] != "H"
+                        if np.min(values[mask]) < self.options.density_cutoff:
+                            continue
 
-                # Move on if these coordinates cause a clash
-                if self.options.external_clash:
-                    if self._cd() and self.residue.clashes():
+                    # Move on if these coordinates cause a clash
+                    if self.options.external_clash:
+                        if self._cd() and self.residue.clashes():
+                            continue
+                    elif self.residue.clashes():
                         continue
-                elif self.residue.clashes():
-                    continue
 
-                # Valid, non-clashing conformer found!
-                new_coor_set.append(self.residue.coor)
-                new_bs.append(self.conformer.b)
+                    # Valid, non-clashing conformer found!
+                    new_coor_set.append(self.residue.coor)
+                    new_bs.append(self.conformer.b)
 
         # Update sampled coords
         self._coor_set = new_coor_set
@@ -1046,65 +1045,13 @@ def _sample_sidechain(self):
                     prefix=f"sample_sidechain_iter{iteration}"
                 )
 
-            if len(self._coor_set) <= 15000:
-                # If <15000 conformers are generated, QP score conformer occupancy normally
-                self._convert()
-                self._solve_qp()
-                self._update_conformers()
-                if self.options.write_intermediate_conformers:
-                    self._write_intermediate_conformers(
-                        prefix=f"sample_sidechain_iter{iteration}_qp"
-                    )
-            if len(self._coor_set) > 15000:
-                # If >15000 conformers are generated, split the QP conformer scoring into two
-                temp_coor_set = self._coor_set
-                temp_bs = self._bs
-
-                # Splitting the arrays into two sections
-                half_index = len(temp_coor_set) // 2  # Integer division for splitting
-                section_1_coor = temp_coor_set[:half_index]
-                section_1_bs = temp_bs[:half_index]
-                section_2_coor = temp_coor_set[half_index:]
-                section_2_bs = temp_bs[half_index:]
-
-                # Process the first section
-                self._coor_set = section_1_coor
-                self._bs = section_1_bs
-
-                # QP score the first section
-                self._convert()
-                self._solve_qp()
-                self._update_conformers()
-                if self.options.write_intermediate_conformers:
-                    self._write_intermediate_conformers(
-                        prefix=f"sample_sidechain_iter{iteration}_qp"
-                    )
-
-                # Save results from the first section
-                qp_temp_coor = self._coor_set
-                qp_temp_bs = self._bs
-
-                # Process the second section
-                self._coor_set = section_2_coor
-                self._bs = section_2_bs
-
-                # QP score the second section
-                self._convert()
-                self._solve_qp()
-                self._update_conformers()
-                if self.options.write_intermediate_conformers:
-                    self._write_intermediate_conformers(
-                        prefix=f"sample_sidechain_iter{iteration}_qp"
-                    )
-
-                # Save results from the second section
-                qp_2_temp_coor = self._coor_set
-                qp_2_temp_bs = self._bs
-
-                # Concatenate the results from both sections
-                self._coor_set = np.concatenate((qp_temp_coor, qp_2_temp_coor), axis=0)
-                self._bs = np.concatenate((qp_temp_bs, qp_2_temp_bs), axis=0)
-
+            self._convert()
+            self._solve_qp()
+            self._update_conformers()
+            if self.options.write_intermediate_conformers:
+                self._write_intermediate_conformers(
+                    prefix=f"sample_sidechain_iter{iteration}_qp"
+                )
             # MIQP score conformer occupancy
             self.sample_b()
             self._convert()

diff --git a/src/qfit/solvers.py b/src/qfit/solvers.py
@@ -160,13 +160,14 @@ def __init__(self, target, models, nthreads=1):
         self.quad_obj = None
         self.lin_obj = None
 
-        self.weights = None
-        self._objective_value = None
-
         self.nconformers = models.shape[0]
         self.valid_indices = []
         self.redundant_indices = []
 
+        self._weights = None
+        self._objective_value = 0
+        self.weights = None
+
     def find_redundant_conformers(self, threshold=1e-6):
         for i in range(self.nconformers):
             if i in self.redundant_indices:
@@ -200,6 +201,7 @@ def construct_weights(self):
                 self.weights.append(self._weights[j])
                 j += 1
         self.weights = np.array(self.weights)
+        self.objective_value = self._objective_value
         assert len(self.weights) == self.nconformers
 
     def solve_miqp(self, threshold=0, cardinality=0):
@@ -225,27 +227,32 @@ def solve_miqp(self, threshold=0, cardinality=0):
         self.objective_value = prob.value
         # I'm not sure why objective_values is calculated this way, but doing
         # so to be compatible with the former CPLEXSolver class
-        self.objective_value = 2 * prob.value + self.target.T @ self.target
+        self._objective_value = 2 * prob.value + self.target.T @ self.target
         self._weights = w.value
         self.construct_weights()
 
-    def solve_qp(self):
+    def solve_qp(self, split_threshold=3000):
         if self.quad_obj is None or self.lin_obj is None:
             self.compute_quadratic_coeffs()
 
-        m = len(self.valid_indices)
-        P = self.quad_obj
-        q = self.lin_obj
-        w = cp.Variable(m)
-        objective = cp.Minimize(0.5 * cp.quad_form(w, cp.psd_wrap(P)) + q.T @ w)
-        constraints = [w >= np.zeros(m), np.ones(m).T @ w <= 1]
-        prob = cp.Problem(objective, constraints)
-        prob.solve()
-        self.objective_value = prob.value
-        # I'm not sure why objective_values is calculated this way, but doing
-        # so to be compatible with the former CPLEXSolver class
-        self.objective_value = 2 * prob.value + self.target.T @ self.target
-        self._weights = w.value
+        valid_conformers = len(self.valid_indices)
+        self._weights = np.zeros(valid_conformers)
+        splits = valid_conformers // split_threshold + 1  # number of splits
+        for split in range(splits):
+            # take every splits-th element with split as an offset, guaranteeing full coverage
+            P = self.quad_obj[split::splits, split::splits]
+            q = self.lin_obj[split::splits]
+            m = len(P)
+            w = cp.Variable(m)
+            objective = cp.Minimize(0.5 * cp.quad_form(w, cp.psd_wrap(P)) + q.T @ w)
+            constraints = [w >= np.zeros(m), np.ones(m).T @ w <= 1]
+            prob = cp.Problem(objective, constraints)
+            prob.solve()
+            # I'm not sure why objective_values is calculated this way, but doing
+            # so to be compatible with the former CPLEXSolver class
+            self._objective_value += 2 * prob.value + self.target.T @ self.target
+            self._objective_value /= splits
+            self._weights[split::splits] = w.value / splits
         self.construct_weights()