Skip to content

Commit

Permalink
Merge pull request #401 from ReactionMechanismGenerator/rmse
Browse files Browse the repository at this point in the history
Feature: Enhance conformer comparison with RMSD score
  • Loading branch information
alongd committed Jun 11, 2020
2 parents b10352a + fb4fecf commit 7d19d9e
Show file tree
Hide file tree
Showing 2 changed files with 207 additions and 4 deletions.
65 changes: 61 additions & 4 deletions arc/species/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import numpy as np
import os
from typing import Optional, Tuple, Union
from typing import Dict, Iterable, Optional, Tuple, Union

import pybel
import qcelemental as qcel
Expand Down Expand Up @@ -1644,7 +1644,8 @@ def compare_confs(xyz1: dict,
xyz2: dict,
rtol: float = 1e-5,
atol: float = 1e-5,
) -> bool:
rmsd_score: bool = False,
) -> Union[float, bool]:
"""
Compare two Cartesian coordinates representing conformers using distance matrices.
Expand All @@ -1656,14 +1657,70 @@ def compare_confs(xyz1: dict,
xyz2 (dict): Conformer 2.
rtol (float): The relative tolerance parameter (see Notes).
atol (float): The absolute tolerance parameter (see Notes).
rmsd_score (bool): Whether to output a root-mean-square deviation score of the two distance matrices.
Returns:
bool: Whether the two conformers have almost equal atom distances. ``True`` if they do.
Union[float, bool]:
- If ``rmsd_score`` is ``False`` (default): Whether the two conformers have almost equal atom distances.
``True`` if they do.
- If ``rmsd_score`` is ``True``: The RMSD score of two distance matrices.
"""
xyz1, xyz2 = check_xyz_dict(xyz1), check_xyz_dict(xyz2)
dmat1, dmat2 = xyz_to_dmat(xyz1), xyz_to_dmat(xyz2)
return almost_equal_lists(dmat1, dmat2, rtol=rtol, atol=atol)
if rmsd_score:
# distance matrix is symmetric, only need the upper triangular part to compute rmsd
rmsd = calc_rmsd(np.triu(dmat1), np.triu(dmat2))
return rmsd
else:
return almost_equal_lists(dmat1, dmat2, rtol=rtol, atol=atol)

def calc_rmsd(x: np.array,
y: np.array,
) -> float:
"""
Compute the root-mean-square deviation between two matrices.
Args:
x (np.array): Matrix 1.
y (np.array): Matrix 2.
Returns:
float: The RMSD score of two matrices.
"""
d = x - y
n = x.shape[0]
sqr_sum = (d**2).sum()
rmsd = np.sqrt(sqr_sum/n)
return float(rmsd)

def cluster_confs_by_rmsd(xyzs: Iterable[Dict[str, tuple]],
rmsd_threshold: float = 1e-2,
) -> Tuple[Dict[str, tuple]]:
"""
Cluster conformers with the same atom orders using RMSD of distance matrices. Work for both TS and non-TS conformers.
Intended for finding structurally distinct conformers from a pool of conformers.
Suitable scenarios:
1. filter a pool of conformers with their geometry optimized at some level.
Not suitable for:
1. cluster conformers (not optimized) that are sampling of a well or a saddle point (these conformers may have
large difference in RMSE, but they really should be representing the same well or saddle point).
Args:
xyzs (Iterable): Conformers with the same atom orders.
rmsd_threshold (float): The minimum RMSD to consider two conformers as distinct
(i.e., if rmsd > rmsd_threshold, then two conformers are considered distinctive).
Returns:
Tuple[Dict[str, tuple]]: Conformers with distinctive geometries.
"""
xyzs = tuple(xyzs)
distinct_xyzs = [xyzs[0]]
for xyz in xyzs:
rmsd_list = [compare_confs(xyz, distinct_xyz, rmsd_score=True) for distinct_xyz in tuple(distinct_xyzs)]
if all([rmsd > rmsd_threshold for rmsd in tuple(rmsd_list)]):
distinct_xyzs.append(xyz)
return tuple(distinct_xyzs)

def ics_to_scan_constraints(ics: list,
software: Optional[str] = 'gaussian',
Expand Down
146 changes: 146 additions & 0 deletions arc/species/converterTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4075,6 +4075,7 @@ def test_compare_confs(self):
(-0.6300326, 0.6300326, -0.6300326),
(0.6300326, -0.6300326, -0.6300326))}
self.assertTrue(converter.compare_confs(ch4_1, ch4_1))
self.assertEqual(converter.compare_confs(ch4_1, ch4_1, rmsd_score=True), 0.0)

ch4_2 = {'symbols': ('C', 'H', 'H', 'H', 'H'),
'isotopes': (12, 1, 1, 1, 1),
Expand All @@ -4084,6 +4085,7 @@ def test_compare_confs(self):
(-0.6300326, 0.6300326, -0.6300326),
(0.6300326, -0.6300326, -0.6300326))}
self.assertTrue(converter.compare_confs(ch4_1, ch4_2))
self.assertAlmostEqual(converter.compare_confs(ch4_1, ch4_2, rmsd_score=True), 0.0, places=4)

ch4_3 = {'symbols': ('C', 'H', 'H', 'H', 'H'),
'isotopes': (12, 1, 1, 1, 1),
Expand All @@ -4093,6 +4095,8 @@ def test_compare_confs(self):
(-0.6300326, 0.6300326, -0.6300326),
(0.6300326, -0.6300326, -0.6300326))}
self.assertFalse(converter.compare_confs(ch4_1, ch4_3))
self.assertAlmostEqual(converter.compare_confs(ch4_1, ch4_3, rmsd_score=True), 0.0004480326076878692)


occco_1 = {'symbols': ('O', 'C', 'C', 'C', 'O', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H'),
'isotopes': (16, 12, 12, 12, 16, 1, 1, 1, 1, 1, 1, 1, 1),
Expand All @@ -4110,6 +4114,7 @@ def test_compare_confs(self):
(-0.4846097814890448, 3.4746190180148613, 1.0370826136306412),
(-0.7517118479102434, 2.0995465744609016, -1.4084474547843668))}
self.assertTrue(converter.compare_confs(occco_1, occco_1))
self.assertEqual(converter.compare_confs(occco_1, occco_1, rmsd_score=True), 0.0)

occco_2 = {'symbols': ('O', 'C', 'C', 'C', 'O', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H'),
'isotopes': (16, 12, 12, 12, 16, 1, 1, 1, 1, 1, 1, 1, 1),
Expand All @@ -4127,6 +4132,7 @@ def test_compare_confs(self):
(1.0743805139106757, -1.9882575918786236, 1.2595102280098387),
(0.35195568839394714, -2.3791987519096245, -0.81652943836054))}
self.assertFalse(converter.compare_confs(occco_1, occco_2))
self.assertAlmostEqual(converter.compare_confs(occco_1, occco_2, rmsd_score=True), 1.0094079844245747)

occco_3 = {'symbols': ('O', 'C', 'C', 'C', 'O', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H'),
'isotopes': (16, 12, 12, 12, 16, 1, 1, 1, 1, 1, 1, 1, 1),
Expand All @@ -4144,6 +4150,8 @@ def test_compare_confs(self):
(-0.6621011473071822, 3.6174144955483376, 0.18956005497753062),
(-1.544021016988015, 1.848253867499642, -1.8191893347265315))}
self.assertTrue(converter.compare_confs(occco_1, occco_3))
self.assertAlmostEqual(converter.compare_confs(occco_1, occco_3, rmsd_score=True), 0.0, places=4)


occco_4 = {'symbols': ('O', 'C', 'C', 'C', 'O', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H'),
'isotopes': (16, 12, 12, 12, 16, 1, 1, 1, 1, 1, 1, 1, 1),
Expand All @@ -4161,6 +4169,8 @@ def test_compare_confs(self):
(-1.759053255113454, 3.4819317812208626, 1.3557210162758644),
(-2.0785703072969466, 2.2346795710060765, -1.151280970188824))}
self.assertTrue(converter.compare_confs(occco_1, occco_4))
self.assertAlmostEqual(converter.compare_confs(occco_1, occco_4, rmsd_score=True), 0.0, places=4)


occco_5 = {'symbols': ('O', 'C', 'C', 'C', 'O', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H'),
'isotopes': (16, 12, 12, 12, 16, 1, 1, 1, 1, 1, 1, 1, 1),
Expand All @@ -4179,13 +4189,149 @@ def test_compare_confs(self):
(-2.07095189274604, 3.516416462694289, 0.8201309204066712),
(-2.650256378769789, 1.8823222470150054, -1.4017891959903757))}
self.assertTrue(converter.compare_confs(occco_1, occco_5))
self.assertAlmostEqual(converter.compare_confs(occco_1, occco_5, rmsd_score=True), 0.0, places=4)

def test_check_isomorphism(self):
"""Test checking for molecule isomorphism"""
mol1 = Molecule(smiles='[O-][N+]#N')
mol2 = Molecule(smiles='[N-]=[N+]=O')
self.assertTrue(converter.check_isomorphism(mol1, mol2))

def test_calc_rmsd(self):
"""Test compute the root-mean-square deviation between two matrices."""
a1 = np.array([1, 2, 3, 4])
b1 = np.array([1, 2, 3, 4])
rmsd1 = converter.calc_rmsd(a1, b1)
self.assertEqual(rmsd1, 0.0)

a2 = np.array([1, 2, 3, 4])
b2 = np.array([4, 3, 2, 1])
rmsd2 = converter.calc_rmsd(a2, b2)
self.assertAlmostEqual(rmsd2, 2.23606797749979)

a3 = np.array([[1, 2], [3, 4]])
b3 = np.array([[4, 3], [2, 1]])
rmsd3 = converter.calc_rmsd(a3, b3)
self.assertAlmostEqual(rmsd3, 3.1622776601683795)

def test_cluster_confs_by_rmsd(self):
nco_1 = {'symbols': ('C', 'H', 'H', 'O', 'H', 'N', 'H', 'H'),
'isotopes': (12, 1, 1, 16, 1, 14, 1, 1),
'coords': ((-3.59616665, 3.56051739, 1.66762912),
(-3.12243719, 4.49463523, 1.44875867),
(-4.24266183, 3.67770871, 2.51214684),
(-2.59966081, 2.57831173, 1.96283775),
(-3.00851627, 1.82896095, 2.40205367),
(-4.38319614, 3.12587525, 0.50462835),
(-5.34431488, 3.03821019, 0.76647869),
(-4.29744813, 3.80180325, -0.22733382))}

# translation of nco_1
nco_2 = {'symbols': ('C', 'H', 'H', 'O', 'H', 'N', 'H', 'H'),
'isotopes': (12, 1, 1, 16, 1, 14, 1, 1),
'coords': ((-3.55754715, 2.23398849, 1.33151513),
(-2.7610548, 2.7943938, 1.77473724),
(-4.16118773, 1.80160519, 2.10194536),
(-3.01210166, 1.19527628, 0.51391734),
(-3.69250942, 0.54458444, 0.32617974),
(-4.38319614, 3.12587525, 0.50462835),
(-5.32180727, 3.12620383, 0.84960522),
(-4.01329851, 4.05411698, 0.54390982))}

# another translation of nco_1
nco_3 = {'symbols': ('C', 'H', 'H', 'O', 'H', 'N', 'H', 'H'),
'isotopes': (12, 1, 1, 16, 1, 14, 1, 1),
'coords': ((-5.45323491, 3.44805617, 1.84781649),
(-4.89223537, 2.53778082, 1.80808843),
(-5.4721348, 3.81174765, 2.85393343),
(-6.79050042, 3.20357545, 1.40413854),
(-7.35590266, 3.93067122, 1.67478971),
(-4.8233168, 4.45029026, 0.97624893),
(-4.5553715, 5.24474862, 1.52126381),
(-4.01329851, 4.05411698, 0.54390982))}

# rotation of 0.01 deg from nco_1
nco_4 = {'symbols': ('C', 'H', 'H', 'O', 'H', 'N', 'H', 'H'),
'isotopes': (12, 1, 1, 16, 1, 14, 1, 1),
'coords': ((-5.45323491, 3.44805617, 1.84781649),
(-4.89221234, 2.53779365, 1.80811982),
(-5.47217144, 3.81176471, 2.85392657),
(-6.79048224, 3.20353551, 1.40410575),
(-7.35590949, 3.93062237, 1.6747286),
(-4.8233168, 4.45029026, 0.97624893),
(-4.55533726, 5.24473268, 1.52127022),
(-4.01332003, 4.05410499, 0.54388049))}

# rotation of 1 deg from nco_1
nco_5 = {'symbols': ('C', 'H', 'H', 'O', 'H', 'N', 'H', 'H'),
'isotopes': (12, 1, 1, 16, 1, 14, 1, 1),
'coords': ((-5.45323491, 3.44805617, 1.84781649),
(-4.88729365, 2.54057333, 1.81487118),
(-5.48004439, 3.81540593, 2.85242358),
(-6.78653401, 3.1949543, 1.39709159),
(-7.35731028, 3.92010236, 1.66161901),
(-4.8233168, 4.45029026, 0.97624893),
(-4.54798889, 5.24128648, 1.52261833),
(-4.01796774, 4.05155464, 0.53758868))}

# rotation of 8 deg from nco_1
nco_6 = {'symbols': ('C', 'H', 'H', 'O', 'H', 'N', 'H', 'H'),
'isotopes': (12, 1, 1, 16, 1, 14, 1, 1),
'coords': ((-5.45323491, 3.44805617, 1.84781649),
(-4.85441593, 2.56143528, 1.86262288),
(-5.53539016, 3.83957757, 2.84021837),
(-6.75650663, 3.13476922, 1.34958552),
(-7.3634016, 3.84494333, 1.57078959),
(-4.8233168, 4.45029026, 0.97624893),
(-4.49686401, 5.2159761, 1.53046347),
(-4.05189413, 4.03504741, 0.49408666))}

# very different conformer
nco_7 = {'symbols': ('C', 'H', 'H', 'O', 'H', 'N', 'H', 'H'),
'isotopes': (12, 1, 1, 16, 1, 14, 1, 1),
'coords': ((-5.45323491, 3.44805617, 1.84781649),
(-4.73789674, 2.77478016, 1.42368122),
(-5.44599565, 3.3513005, 2.91340831),
(-6.75650663, 3.13476922, 1.34958552),
(-7.22860243, 3.94740437, 1.15375478),
(-5.1062077, 4.82800097, 1.47873049),
(-4.81594257, 5.32827901, 2.29449273),
(-4.36133227, 4.8173364, 0.81161216))}

# another very different conformer
nco_8 = {'symbols': ('C', 'H', 'H', 'O', 'H', 'N', 'H', 'H'),
'isotopes': (12, 1, 1, 16, 1, 14, 1, 1),
'coords': ((-5.45323491, 3.44805617, 1.84781649),
(-4.85682312, 3.32188069, 2.72717576),
(-6.21615067, 4.17444368, 2.03548831),
(-6.05851635, 2.20023316, 1.49928799),
(-7.00518267, 2.24987143, 1.65080995),
(-4.60227374, 3.9061932, 0.74017234),
(-3.69226847, 4.12970364, 1.0893621),
(-4.52773502, 3.18138419, 0.05526659))}

# tiny change from nco_8
nco_9 = {'symbols': ('C', 'H', 'H', 'O', 'H', 'N', 'H', 'H'),
'isotopes': (12, 1, 1, 16, 1, 14, 1, 1),
'coords': ((-5.45323492, 3.44805618, 1.84781649),
(-4.85682312, 3.32188069, 2.72717576),
(-6.21615067, 4.17444368, 2.03548831),
(-6.05851635, 2.20023316, 1.49928799),
(-7.00518267, 2.24987143, 1.65080995),
(-4.60227374, 3.9061932, 0.74017234),
(-3.69226847, 4.12970364, 1.0893621),
(-4.52773502, 3.18138419, 0.05526659))}

xyzs1 = [nco_1, nco_2, nco_3, nco_4, nco_5]
self.assertEqual(len(converter.cluster_confs_by_rmsd(xyzs1)), 1)

xyzs2 = [nco_1, nco_2, nco_3, nco_4, nco_5, nco_6]
self.assertEqual(len(converter.cluster_confs_by_rmsd(xyzs2)), 2)

xyzs3 = [nco_1, nco_2, nco_6, nco_7, nco_8, nco_9]
self.assertEqual(len(converter.cluster_confs_by_rmsd(xyzs3)), 4)



if __name__ == '__main__':
unittest.main(testRunner=unittest.TextTestRunner(verbosity=2))

0 comments on commit 7d19d9e

Please sign in to comment.