From f464d3ce713651197cf98f3a98c1879f85df2c27 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <cedric.bouysset@unice.fr>
Date: Mon, 5 Oct 2020 10:32:37 +0200
Subject: [PATCH 01/47] fix carbonyl

---
 package/MDAnalysis/coordinates/RDKit.py             | 6 +++---
 testsuite/MDAnalysisTests/coordinates/test_rdkit.py | 2 ++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/package/MDAnalysis/coordinates/RDKit.py b/package/MDAnalysis/coordinates/RDKit.py
index 30175e7fbcf..f370c42e06e 100644
--- a/package/MDAnalysis/coordinates/RDKit.py
+++ b/package/MDAnalysis/coordinates/RDKit.py
@@ -735,7 +735,7 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
     """
     mol.UpdatePropertyCache(strict=False)
     Chem.Kekulize(mol)
-    pattern = Chem.MolFromSmarts("[*-;!O]-[*+0]=[*+0]")
+    pattern = Chem.MolFromSmarts("[*-]-[*+0]=[*+0;!O]")
     # number of unique matches with the pattern
     n_matches = len(set([match[0]
                          for match in mol.GetSubstructMatches(pattern)]))
@@ -744,13 +744,13 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
         return
     # check if there's an even number of anion-*=* patterns
     elif n_matches % 2 == 0:
-        end_pattern = Chem.MolFromSmarts("[*-;!O]-[*+0]=[*+0]-[*-]")
+        end_pattern = Chem.MolFromSmarts("[*-]-[*+0]=[*+0]-[*-]")
     else:
         # as a last resort, the only way to standardize is to find a nitrogen
         # that can accept a double bond and a positive charge
         # or a carbonyl that will become an enolate
         end_pattern = Chem.MolFromSmarts(
-            "[*-;!O]-[*+0]=[*+0]-[$([#7;X3;v3]),$([#6+0]=O)]")
+            "[*-]-[*+0]=[*+0]-[*-,$([#7;X3;v3]),$([#6+0]=O)]")
     backtrack = []
     for _ in range(max_iter):
         # simplest case where n=1
diff --git a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py
index 1fc1bea543d..41dc6da40db 100644
--- a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py
+++ b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py
@@ -490,6 +490,8 @@ def test_reassign_props_after_reaction(self, rdmol, product, name):
         "O=C([C@H](CC1=CNC=N1)N)O",
         "O=C([C@H](CC1=CN=CN1)N)O",
         "O=C([C@H](CC1=C[NH1+]=CN1)[NH3+])[O-]",
+        # fix conjugated carbonyl/carboxyl
+        "CCOC(=O)c1cc2cc(C(=O)O)ccc2[nH]1",
     ])
     def test_order_independant(self, smi_in):
         # generate mol with hydrogens but without bond orders

From d129537244204a83860711c385b374fd34804100 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <cedric.bouysset@unice.fr>
Date: Mon, 12 Oct 2020 19:27:34 +0200
Subject: [PATCH 02/47] fix charged mols

---
 package/MDAnalysis/coordinates/RDKit.py       | 40 +++++++++++--------
 .../MDAnalysisTests/coordinates/test_rdkit.py | 18 +++++++--
 2 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/package/MDAnalysis/coordinates/RDKit.py b/package/MDAnalysis/coordinates/RDKit.py
index f370c42e06e..e625c0b2cc9 100644
--- a/package/MDAnalysis/coordinates/RDKit.py
+++ b/package/MDAnalysis/coordinates/RDKit.py
@@ -513,8 +513,10 @@ def _infer_bo_and_charges(mol):
     one will be charged. It will also affect more complex conjugated systems.
     """
 
-    for atom in sorted(mol.GetAtoms(), reverse=True,
-                       key=lambda a: _get_nb_unpaired_electrons(a)[0]):
+    atoms = sorted([a for a in mol.GetAtoms() if a.GetAtomicNum() > 1],
+                   reverse=True,
+                   key=lambda a: _get_nb_unpaired_electrons(a)[0])
+    for atom in atoms:
         # get NUE for each possible valence
         nue = _get_nb_unpaired_electrons(atom)
         # if there's only one possible valence state and the corresponding
@@ -550,11 +552,11 @@ def _infer_bo_and_charges(mol):
                         # recalculate nue for atom
                         nue = _get_nb_unpaired_electrons(atom)
 
-            # if the atom still has unpaired electrons
-            nue = _get_nb_unpaired_electrons(atom)[0]
-            if nue > 0:
-                # transform it to a negative charge
-                atom.SetFormalCharge(-nue)
+            # if atom valence is still not filled
+            nue_list = _get_nb_unpaired_electrons(atom)
+            if not any(nue == 0 for nue in nue_list):
+                # transform nue to charge
+                atom.SetFormalCharge(-nue_list[0])
                 atom.SetNumRadicalElectrons(0)
                 mol.UpdatePropertyCache(strict=False)
 
@@ -611,19 +613,19 @@ def _standardize_patterns(mol, max_iter=200):
     +---------------+------------------------------------------------------------------------------+
     | conjugated-O- | ``[O:1]=[#6:2]-[*:3]=[*:4]-[*-:5]>>[O-:1]-[*:2]=[*:3]-[*:4]=[*+0:5]``        |
     +---------------+------------------------------------------------------------------------------+
-    | Cterm         | ``[C-;X2:1]=[O:2]>>[C+0:1]=[O:2]``                                           |
+    | Cterm         | ``[C-;X2;H0:1]=[O:2]>>[C+0:1]=[O:2]``                                        |
     +---------------+------------------------------------------------------------------------------+
     | Nterm         | ``[N-;X2;H1:1]>>[N+0:1]``                                                    |
     +---------------+------------------------------------------------------------------------------+
     | keto-enolate  | ``[#6-:1]-[#6:2]=[O:3]>>[#6+0:1]=[#6:2]-[O-:3]``                             |
     +---------------+------------------------------------------------------------------------------+
-    | arginine      | ``[N;H1:1]-[C-;X3;H0:2](-[N;H2:3])-[N;H2:4]>>[N:1]-[C+0:2](-[N:3])=[N+:4]``  |
+    | arginine      | ``[C-;v3:1]-[#7+0;v3;H2:2]>>[#6+0:1]=[#7+:2]``                               |
     +---------------+------------------------------------------------------------------------------+
     | histidine     | ``[#6+0;H0:1]=[#6+0:2]-[#7;X3:3]-[#6-;X3:4]>>[#6:1]=[#6:2]-[#7+:3]=[#6+0:4]``|
     +---------------+------------------------------------------------------------------------------+
     | sulfone       | ``[S;X4;v4:1](-[O-;X1:2])-[O-;X1:3]>>[S:1](=[O+0:2])=[O+0:3]``               |
     +---------------+------------------------------------------------------------------------------+
-    | nitro         | ``[N;X3;v3:1](-[O-;X1:2])-[O-;X1:3]>>[N+:1](-[O-:2])=[O+0:3]``               |
+    | charged N     | ``[#7+0;v3:1]-[*-:2]>>[#7+:1]=[*+0:2]``                                      |
     +---------------+------------------------------------------------------------------------------+
  
     """
@@ -635,17 +637,15 @@ def _standardize_patterns(mol, max_iter=200):
     for reactant in Chem.GetMolFrags(mol, asMols=True):
 
         for name, reaction in [
-            ("Cterm", "[C-;X2:1]=[O:2]>>[C+0:1]=[O:2]"),
+            ("Cterm", "[C-;X2;H0:1]=[O:2]>>[C+0:1]=[O:2]"),
             ("Nterm", "[N-;X2;H1:1]>>[N+0:1]"),
             ("keto-enolate", "[#6-:1]-[#6:2]=[O:3]>>[#6+0:1]=[#6:2]-[O-:3]"),
-            ("ARG", "[N;H1:1]-[C-;X3;H0:2](-[N;H2:3])-[N;H2:4]"
-                    ">>[N:1]-[C+0:2](-[N:3])=[N+:4]"),
+            ("ARG", "[C-;v3:1]-[#7+0;v3;H2:2]>>[#6+0:1]=[#7+:2]"),
             ("HIP", "[#6+0;H0:1]=[#6+0:2]-[#7;X3:3]-[#6-;X3:4]"
                     ">>[#6:1]=[#6:2]-[#7+:3]=[#6+0:4]"),
             ("sulfone", "[S;X4;v4:1](-[O-;X1:2])-[O-;X1:3]"
                         ">>[S:1](=[O+0:2])=[O+0:3]"),
-            ("nitro", "[N;X3;v3:1](-[O-;X1:2])-[O-;X1:3]"
-                      ">>[N+:1](-[O-:2])=[O+0:3]"),
+            ("charged-N", "[#7+0;v3:1]-[*-:2]>>[#7+:1]=[*+0:2]"),
         ]:
             reactant.UpdatePropertyCache(strict=False)
             Chem.Kekulize(reactant)
@@ -789,7 +789,7 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
             # check if we haven't already transformed this triplet
             for match in matches:
                 # sort the indices for the comparison
-                g = tuple(sorted(match))
+                g = set(match)
                 if g in backtrack:
                     # already transformed
                     continue
@@ -799,7 +799,13 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
                     backtrack.append(g)
                     break
             else:
-                anion, a1, a2 = matches[0]
+                # tried all paths and now blocked from backtracking
+                warnings.warn("The standardization got stuck in a dead-end, "
+                              "likely because of a charge in a conjugated "
+                              "system. Please check the output molecule "
+                              "carefully as it is likely to be an unreasonable"
+                              " resonance structure")
+                return
             # charges
             mol.GetAtomWithIdx(anion).SetFormalCharge(0)
             mol.GetAtomWithIdx(a2).SetFormalCharge(-1)
diff --git a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py
index 41dc6da40db..017954390f9 100644
--- a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py
+++ b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py
@@ -490,8 +490,11 @@ def test_reassign_props_after_reaction(self, rdmol, product, name):
         "O=C([C@H](CC1=CNC=N1)N)O",
         "O=C([C@H](CC1=CN=CN1)N)O",
         "O=C([C@H](CC1=C[NH1+]=CN1)[NH3+])[O-]",
-        # fix conjugated carbonyl/carboxyl
+        # fixes from PR #???
         "CCOC(=O)c1cc2cc(C(=O)O)ccc2[nH]1",
+        "[O-][n+]1cccnc1",
+        "C[n+]1ccccc1",
+        "[PH4+]",
     ])
     def test_order_independant(self, smi_in):
         # generate mol with hydrogens but without bond orders
@@ -516,9 +519,16 @@ def test_order_independant(self, smi_in):
             m = _standardize_patterns(m)
             Chem.SanitizeMol(m)
             m = Chem.RemoveHs(m)
-            assert m.HasSubstructMatch(ref) and ref.HasSubstructMatch(
-                m), (f"(input) {Chem.MolToSmiles(ref)} != "
-                     f"{Chem.MolToSmiles(m)} (output) root atom {a.GetIdx()}")
+            match = m.HasSubstructMatch(ref) and ref.HasSubstructMatch(m)
+            if not match:
+                # try resonance structures for charged conjugated systems
+                for mol in Chem.ResonanceMolSupplier(m, maxStructs=20):
+                    match = mol.HasSubstructMatch(ref) and ref.HasSubstructMatch(mol)
+                    if match:
+                        break
+            assert match, (f"(input) {Chem.MolToSmiles(ref)} != "
+                           f"{Chem.MolToSmiles(m)} (output) "
+                           f"root atom {a.GetIdx()}")
 
     def test_warn_conjugated_max_iter(self):
         smi = "[C-]C=CC=CC=CC=CC=CC=C[C-]"

From 24ddb3ead17fc15b6d7d482f1c1aaf900ff9f077 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <cedric.bouysset@unice.fr>
Date: Wed, 14 Oct 2020 17:59:03 +0200
Subject: [PATCH 03/47] remove false premature return

---
 package/MDAnalysis/coordinates/RDKit.py | 272 ++++++++++++------------
 1 file changed, 133 insertions(+), 139 deletions(-)

diff --git a/package/MDAnalysis/coordinates/RDKit.py b/package/MDAnalysis/coordinates/RDKit.py
index e625c0b2cc9..81e24fcb6ec 100644
--- a/package/MDAnalysis/coordinates/RDKit.py
+++ b/package/MDAnalysis/coordinates/RDKit.py
@@ -67,6 +67,7 @@
 import warnings
 import re
 import copy
+from functools import lru_cache
 
 import numpy as np
 
@@ -244,19 +245,13 @@ class RDKitConverter(base.ConverterBase):
     Since one of the main use case of the converter is converting trajectories
     and not just a topology, creating a new molecule from scratch for every
     frame would be too slow so the converter uses a caching system. The cache
-    only remembers the id of the last AtomGroup that was converted, as well
-    as the arguments that were passed to the converter. This means that using
-    ``u.select_atoms("protein").convert_to("RDKIT")`` will not benefit from the
-    cache since the selection is deleted from memory as soon as the conversion
-    is finished. Instead, users should do this in two steps by first saving the
-    selection in a variable and then converting the saved AtomGroup. It also
-    means that ``ag.convert_to("RDKIT")`` followed by
-    ``ag.convert_to("RDKIT", NoImplicit=False)`` will not use the cache.
-    Finally if you're modifying the AtomGroup in place between two conversions,
-    the id of the AtomGroup won't change and thus the converter will use the
-    cached molecule. For this reason, you can pass a ``cache=False`` argument
-    to the converter to bypass the caching system.
-    Note that the cached molecule doesn't contain the coordinates of the atoms.
+    only remembers the 2 most recent AtomGroups that were converted, as well
+    as the arguments that were passed to the converter. The number of objects
+    cached can be changed with the function :func:`set_converter_cache_size`.
+    However, ``ag.convert_to("RDKIT")`` followed by ``ag.convert_to("RDKIT", NoImplicit=False)``
+    will not use the cache since the arguments given are different.
+    You can pass a ``cache=False`` argument to the converter to bypass the
+    caching system.
 
 
     .. versionadded:: 2.0.0
@@ -265,7 +260,6 @@ class RDKitConverter(base.ConverterBase):
 
     lib = 'RDKIT'
     units = {'time': None, 'length': 'Angstrom'}
-    _cache = dict()
 
     def convert(self, obj, cache=True, NoImplicit=True, max_iter=200):
         """Write selection at current trajectory frame to
@@ -278,16 +272,14 @@ def convert(self, obj, cache=True, NoImplicit=True, max_iter=200):
         cache : bool
             Use a cached copy of the molecule's topology when available. To be
             used, the cached molecule and the new one have to be made from the
-            same AtomGroup object (same id) and with the same arguments passed
-            to the converter (with the exception of this `cache` argument)
+            same AtomGroup selection and with the same arguments passed
+            to the converter
         NoImplicit : bool
             Prevent adding hydrogens to the molecule
         max_iter : int
             Maximum number of iterations to standardize conjugated systems.
             See :func:`_rebuild_conjugated_bonds`
         """
-        # parameters passed to atomgroup_to_mol and used by the cache
-        kwargs = dict(NoImplicit=NoImplicit, max_iter=max_iter)
 
         try:
             from rdkit import Chem
@@ -303,22 +295,13 @@ def convert(self, obj, cache=True, NoImplicit=True, max_iter=200):
                             "please use a valid AtomGroup or Universe".format(
                                 type(obj))) from None
 
+        # parameters passed to atomgroup_to_mol
+        kwargs = dict(NoImplicit=NoImplicit, max_iter=max_iter)
         if cache:
-            # key used to search the cache
-            key = f"<{id(ag):#x}>" + ",".join(f"{key}={value}"
-                                            for key, value in kwargs.items())
-            try:
-                mol = self._cache[key]
-            except KeyError:
-                # only keep the current molecule in cache
-                self._cache.clear()
-                # create the topology
-                self._cache[key] = mol = self.atomgroup_to_mol(ag, **kwargs)
-            # continue on copy of the cached molecule
+            mol = atomgroup_to_mol(ag, **kwargs)
             mol = copy.deepcopy(mol)
         else:
-            self._cache.clear()
-            mol = self.atomgroup_to_mol(ag, **kwargs)
+            mol = atomgroup_to_mol.__wrapped__(ag, **kwargs)
 
         # add a conformer for the current Timestep
         if hasattr(ag, "positions"):
@@ -339,108 +322,124 @@ def convert(self, obj, cache=True, NoImplicit=True, max_iter=200):
 
         return mol
 
-    def atomgroup_to_mol(self, ag, NoImplicit=True, max_iter=200):
-        """Converts an AtomGroup to an RDKit molecule.
 
-        Parameters
-        -----------
-        ag : MDAnalysis.core.groups.AtomGroup
-            The AtomGroup to convert
-        NoImplicit : bool
-            Prevent adding hydrogens to the molecule
-        max_iter : int
-            Maximum number of iterations to standardize conjugated systems.
-            See :func:`_rebuild_conjugated_bonds`
-        """
-        try:
-            elements = ag.elements
-        except NoDataError:
-            raise AttributeError(
-                "The `elements` attribute is required for the RDKitConverter "
-                "but is not present in this AtomGroup. Please refer to the "
-                "documentation to guess elements from other attributes or "
-                "type `help(mda.topology.guessers)`") from None
-
-        if "H" not in ag.elements:
-            warnings.warn(
-                "No hydrogen atom could be found in the topology, but the "
-                "converter requires all hydrogens to be explicit. Please "
-                "check carefully the output molecule as the converter is "
-                "likely to add negative charges and assign incorrect bond "
-                "orders to structures with implicit hydrogens. Alternatively, "
-                "you can use the parameter `NoImplicit=False` when using the "
-                "converter to allow implicit hydrogens and disable inferring "
-                "bond orders and charges."
-            )
-
-        # attributes accepted in PDBResidueInfo object
-        pdb_attrs = {}
-        if hasattr(ag, "bfactors") and hasattr(ag, "tempfactors"):
-            raise AttributeError(
-                "Both `tempfactors` and `bfactors` attributes are present but "
-                "only one can be assigned to the RDKit molecule. Please "
-                "delete the unnecessary one and retry."
-            )
-        for attr in RDATTRIBUTES.keys():
-            if hasattr(ag, attr):
-                pdb_attrs[attr] = getattr(ag, attr)
-
-        other_attrs = {}
-        for attr in ["charges", "segids", "types"]:
-            if hasattr(ag, attr):
-                other_attrs[attr] = getattr(ag, attr)
-
-        mol = Chem.RWMol()
-        # map index in universe to index in mol
-        atom_mapper = {}
-
-        for i, (atom, element) in enumerate(zip(ag, elements)):
-            # create atom
-            rdatom = Chem.Atom(element.capitalize())
-            # enable/disable adding implicit H to the molecule
-            rdatom.SetNoImplicit(NoImplicit)
-            # add PDB-like properties
-            mi = Chem.AtomPDBResidueInfo()
-            for attr, values in pdb_attrs.items():
-                _add_mda_attr_to_rdkit(attr, values[i], mi)
-            rdatom.SetMonomerInfo(mi)
-            # other properties
-            for attr in other_attrs.keys():
-                value = other_attrs[attr][i]
-                attr = "_MDAnalysis_%s" % _TOPOLOGY_ATTRS[attr].singular
-                _set_atom_property(rdatom, attr, value)
-            _set_atom_property(rdatom, "_MDAnalysis_index", i)
-            # add atom
-            index = mol.AddAtom(rdatom)
-            atom_mapper[atom.ix] = index
+@lru_cache(maxsize=2)
+def atomgroup_to_mol(ag, NoImplicit=True, max_iter=200):
+    """Converts an AtomGroup to an RDKit molecule without coordinates.
 
+    Parameters
+    -----------
+    ag : MDAnalysis.core.groups.AtomGroup
+        The AtomGroup to convert
+    NoImplicit : bool
+        Prevent adding hydrogens to the molecule
+    max_iter : int
+        Maximum number of iterations to standardize conjugated systems.
+        See :func:`_rebuild_conjugated_bonds`
+    """
+    try:
+        elements = ag.elements
+    except NoDataError:
+        raise AttributeError(
+            "The `elements` attribute is required for the RDKitConverter "
+            "but is not present in this AtomGroup. Please refer to the "
+            "documentation to guess elements from other attributes or "
+            "type `help(mda.topology.guessers)`") from None
+
+    if "H" not in ag.elements:
+        warnings.warn(
+            "No hydrogen atom could be found in the topology, but the "
+            "converter requires all hydrogens to be explicit. Please "
+            "check carefully the output molecule as the converter is "
+            "likely to add negative charges and assign incorrect bond "
+            "orders to structures with implicit hydrogens. Alternatively, "
+            "you can use the parameter `NoImplicit=False` when using the "
+            "converter to allow implicit hydrogens and disable inferring "
+            "bond orders and charges."
+        )
+
+    # attributes accepted in PDBResidueInfo object
+    pdb_attrs = {}
+    if hasattr(ag, "bfactors") and hasattr(ag, "tempfactors"):
+        raise AttributeError(
+            "Both `tempfactors` and `bfactors` attributes are present but "
+            "only one can be assigned to the RDKit molecule. Please "
+            "delete the unnecessary one and retry."
+        )
+    for attr in RDATTRIBUTES.keys():
+        if hasattr(ag, attr):
+            pdb_attrs[attr] = getattr(ag, attr)
+
+    other_attrs = {}
+    for attr in ["charges", "segids", "types"]:
+        if hasattr(ag, attr):
+            other_attrs[attr] = getattr(ag, attr)
+
+    mol = Chem.RWMol()
+    # map index in universe to index in mol
+    atom_mapper = {}
+
+    for i, (atom, element) in enumerate(zip(ag, elements)):
+        # create atom
+        rdatom = Chem.Atom(element.capitalize())
+        # enable/disable adding implicit H to the molecule
+        rdatom.SetNoImplicit(NoImplicit)
+        # add PDB-like properties
+        mi = Chem.AtomPDBResidueInfo()
+        for attr, values in pdb_attrs.items():
+            _add_mda_attr_to_rdkit(attr, values[i], mi)
+        rdatom.SetMonomerInfo(mi)
+        # other properties
+        for attr in other_attrs.keys():
+            value = other_attrs[attr][i]
+            attr = "_MDAnalysis_%s" % _TOPOLOGY_ATTRS[attr].singular
+            _set_atom_property(rdatom, attr, value)
+        _set_atom_property(rdatom, "_MDAnalysis_index", i)
+        # add atom
+        index = mol.AddAtom(rdatom)
+        atom_mapper[atom.ix] = index
+
+    try:
+        ag.bonds
+    except NoDataError:
+        warnings.warn(
+            "No `bonds` attribute in this AtomGroup. Guessing bonds based "
+            "on atoms coordinates")
+        ag.guess_bonds()
+
+    for bond in ag.bonds:
         try:
-            ag.bonds
-        except NoDataError:
-            warnings.warn(
-                "No `bonds` attribute in this AtomGroup. Guessing bonds based "
-                "on atoms coordinates")
-            ag.guess_bonds()
-
-        for bond in ag.bonds:
-            try:
-                bond_indices = [atom_mapper[i] for i in bond.indices]
-            except KeyError:
-                continue
-            bond_type = RDBONDORDER.get(bond.order, Chem.BondType.SINGLE)
-            mol.AddBond(*bond_indices, bond_type)
-
-        mol.UpdatePropertyCache(strict=False)
-
-        if NoImplicit:
-            # infer bond orders and formal charges from the connectivity
-            _infer_bo_and_charges(mol)
-            mol = _standardize_patterns(mol, max_iter)
-
-        # sanitize
-        Chem.SanitizeMol(mol)
+            bond_indices = [atom_mapper[i] for i in bond.indices]
+        except KeyError:
+            continue
+        bond_type = RDBONDORDER.get(bond.order, Chem.BondType.SINGLE)
+        mol.AddBond(*bond_indices, bond_type)
+
+    mol.UpdatePropertyCache(strict=False)
+
+    if NoImplicit:
+        # infer bond orders and formal charges from the connectivity
+        _infer_bo_and_charges(mol)
+        mol = _standardize_patterns(mol, max_iter)
+
+    # sanitize
+    Chem.SanitizeMol(mol)
+
+    return mol
 
-        return mol
+
+def set_converter_cache_size(maxsize):
+    """Set the maximum cache size of the RDKit converter
+
+    Parameters
+    ----------
+    maxsize : int or None
+        If int, the cache will only keep the ``maxsize`` most recent
+        conversions in memory. Using ``maxsize=None`` will remove all limits
+        to the cache size, i.e. everything is cached.
+    """
+    global atomgroup_to_mol
+    atomgroup_to_mol = lru_cache(maxsize=maxsize)(atomgroup_to_mol.__wrapped__)
 
 
 def _add_mda_attr_to_rdkit(attr, value, mi):
@@ -753,7 +752,7 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
             "[*-]-[*+0]=[*+0]-[*-,$([#7;X3;v3]),$([#6+0]=O)]")
     backtrack = []
     for _ in range(max_iter):
-        # simplest case where n=1
+        # check for ending pattern
         end_match = mol.GetSubstructMatch(end_pattern)
         if end_match:
             # index of each atom
@@ -766,6 +765,7 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
                     if neighbor.GetAtomicNum() == 8 and bond.GetBondTypeAsDouble() == 2:
                         bond.SetBondType(Chem.BondType.SINGLE)
                         neighbor.SetFormalCharge(-1)
+                        break
             else:
                 # [*-]-*=*-N
                 if term_atom.GetAtomicNum() == 7 and term_atom.GetFormalCharge() == 0:
@@ -773,7 +773,7 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
                 # [*-]-*=*-[*-]
                 else:
                     end_charge = 0
-                mol.GetAtomWithIdx(anion2).SetFormalCharge(end_charge)
+                term_atom.SetFormalCharge(end_charge)
             # common part of the conjugated systems: [*-]-*=*
             mol.GetAtomWithIdx(anion1).SetFormalCharge(0)
             mol.GetBondBetweenAtoms(anion1, a1).SetBondType(
@@ -784,6 +784,7 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
             mol.UpdatePropertyCache(strict=False)
 
         # shorten the anion-anion pattern from n to n-1
+        # will not match if ending pattern has been reached
         matches = mol.GetSubstructMatches(pattern)
         if matches:
             # check if we haven't already transformed this triplet
@@ -798,14 +799,7 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
                     anion, a1, a2 = match
                     backtrack.append(g)
                     break
-            else:
-                # tried all paths and now blocked from backtracking
-                warnings.warn("The standardization got stuck in a dead-end, "
-                              "likely because of a charge in a conjugated "
-                              "system. Please check the output molecule "
-                              "carefully as it is likely to be an unreasonable"
-                              " resonance structure")
-                return
+
             # charges
             mol.GetAtomWithIdx(anion).SetFormalCharge(0)
             mol.GetAtomWithIdx(a2).SetFormalCharge(-1)

From 4b6afc10fdbce1219f7a097a7b6daf092048264a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <cedric.bouysset@unice.fr>
Date: Wed, 14 Oct 2020 17:59:36 +0200
Subject: [PATCH 04/47] minor comments

---
 package/MDAnalysis/coordinates/RDKit.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/package/MDAnalysis/coordinates/RDKit.py b/package/MDAnalysis/coordinates/RDKit.py
index 81e24fcb6ec..7f749726750 100644
--- a/package/MDAnalysis/coordinates/RDKit.py
+++ b/package/MDAnalysis/coordinates/RDKit.py
@@ -758,7 +758,7 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
             # index of each atom
             anion1, a1, a2, anion2 = end_match
             term_atom = mol.GetAtomWithIdx(anion2)
-            # [*-]-*=*-C=O
+            # [*-]-*=*-C=O --> *=*-*=C-[O-]
             if term_atom.GetAtomicNum() == 6 and term_atom.GetFormalCharge() == 0:
                 for neighbor in term_atom.GetNeighbors():
                     bond = mol.GetBondBetweenAtoms(anion2, neighbor.GetIdx())
@@ -767,10 +767,10 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
                         neighbor.SetFormalCharge(-1)
                         break
             else:
-                # [*-]-*=*-N
+                # [*-]-*=*-N --> *=*-*=[N+]
                 if term_atom.GetAtomicNum() == 7 and term_atom.GetFormalCharge() == 0:
                     end_charge = 1
-                # [*-]-*=*-[*-]
+                # [*-]-*=*-[*-] --> *=*-*=*
                 else:
                     end_charge = 0
                 term_atom.SetFormalCharge(end_charge)

From c9513c7dbed2216a4c558d63dfe4ca5327f90c75 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <cedric.bouysset@unice.fr>
Date: Wed, 14 Oct 2020 18:18:45 +0200
Subject: [PATCH 05/47] assign most likely charges to monatomic ions

---
 package/MDAnalysis/coordinates/RDKit.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/package/MDAnalysis/coordinates/RDKit.py b/package/MDAnalysis/coordinates/RDKit.py
index 7f749726750..f0856d85784 100644
--- a/package/MDAnalysis/coordinates/RDKit.py
+++ b/package/MDAnalysis/coordinates/RDKit.py
@@ -511,11 +511,25 @@ def _infer_bo_and_charges(mol):
     R-C(-O)-O the first oxygen read will receive a double bond and the other
     one will be charged. It will also affect more complex conjugated systems.
     """
-
     atoms = sorted([a for a in mol.GetAtoms() if a.GetAtomicNum() > 1],
                    reverse=True,
                    key=lambda a: _get_nb_unpaired_electrons(a)[0])
+
+    MONOATOMIC_ION_CHARGES = {
+        3: 1, 11: 1, 19: 1, 37: 1, 47: 1, 55: 1,
+        12: 2, 20: 2, 29: 2, 30: 2, 38: 2, 56: 2,
+        26: 2, # Fe could also be 3
+        13: 3,
+        9: -1, 17: -1, 35: -1, 53: -1,
+    }
+
     for atom in atoms:
+        # monatomic ions
+        if atom.GetDegree() == 0:
+            atom.SetFormalCharge(MONOATOMIC_ION_CHARGES.get(
+                                 atom.GetAtomicNum(), 0))
+            mol.UpdatePropertyCache(strict=False)
+            continue
         # get NUE for each possible valence
         nue = _get_nb_unpaired_electrons(atom)
         # if there's only one possible valence state and the corresponding

From 338dad3fba447739f71edabb97dea94f144844f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <cedric.bouysset@unice.fr>
Date: Wed, 14 Oct 2020 18:46:17 +0200
Subject: [PATCH 06/47] fix when 2 ending patterns are matched

---
 package/MDAnalysis/coordinates/RDKit.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package/MDAnalysis/coordinates/RDKit.py b/package/MDAnalysis/coordinates/RDKit.py
index f0856d85784..da42c3b8cf4 100644
--- a/package/MDAnalysis/coordinates/RDKit.py
+++ b/package/MDAnalysis/coordinates/RDKit.py
@@ -796,9 +796,9 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
             mol.GetBondBetweenAtoms(a2, anion2).SetBondType(
                 Chem.BondType.DOUBLE)
             mol.UpdatePropertyCache(strict=False)
+            continue
 
         # shorten the anion-anion pattern from n to n-1
-        # will not match if ending pattern has been reached
         matches = mol.GetSubstructMatches(pattern)
         if matches:
             # check if we haven't already transformed this triplet

From 029ab1f1d68eb7d85ff18db651adf5412661b435 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <cedric.bouysset@unice.fr>
Date: Thu, 15 Oct 2020 15:41:50 +0200
Subject: [PATCH 07/47] skip when backtrack prevents changes

---
 package/MDAnalysis/coordinates/RDKit.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/package/MDAnalysis/coordinates/RDKit.py b/package/MDAnalysis/coordinates/RDKit.py
index da42c3b8cf4..dbf1afae268 100644
--- a/package/MDAnalysis/coordinates/RDKit.py
+++ b/package/MDAnalysis/coordinates/RDKit.py
@@ -813,6 +813,9 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
                     anion, a1, a2 = match
                     backtrack.append(g)
                     break
+            else:
+                # already performed all changes
+                continue
 
             # charges
             mol.GetAtomWithIdx(anion).SetFormalCharge(0)

From 046383ecfd155b2c20d4f03455e8c823d7bab9be Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <cedric.bouysset@unice.fr>
Date: Tue, 27 Oct 2020 19:25:45 +0100
Subject: [PATCH 08/47] improvements for Nterm, sulfones, and nitrogen next to
 a charged atom

---
 package/MDAnalysis/coordinates/RDKit.py       | 64 +++++++++----------
 .../MDAnalysisTests/coordinates/test_rdkit.py | 10 +++
 2 files changed, 41 insertions(+), 33 deletions(-)

diff --git a/package/MDAnalysis/coordinates/RDKit.py b/package/MDAnalysis/coordinates/RDKit.py
index dbf1afae268..c93e98ba696 100644
--- a/package/MDAnalysis/coordinates/RDKit.py
+++ b/package/MDAnalysis/coordinates/RDKit.py
@@ -628,7 +628,7 @@ def _standardize_patterns(mol, max_iter=200):
     +---------------+------------------------------------------------------------------------------+
     | Cterm         | ``[C-;X2;H0:1]=[O:2]>>[C+0:1]=[O:2]``                                        |
     +---------------+------------------------------------------------------------------------------+
-    | Nterm         | ``[N-;X2;H1:1]>>[N+0:1]``                                                    |
+    | Nterm         | ``[N-;H1;$(N-[*^3]):1]>>[N+0:1]``                                            |
     +---------------+------------------------------------------------------------------------------+
     | keto-enolate  | ``[#6-:1]-[#6:2]=[O:3]>>[#6+0:1]=[#6:2]-[O-:3]``                             |
     +---------------+------------------------------------------------------------------------------+
@@ -636,9 +636,9 @@ def _standardize_patterns(mol, max_iter=200):
     +---------------+------------------------------------------------------------------------------+
     | histidine     | ``[#6+0;H0:1]=[#6+0:2]-[#7;X3:3]-[#6-;X3:4]>>[#6:1]=[#6:2]-[#7+:3]=[#6+0:4]``|
     +---------------+------------------------------------------------------------------------------+
-    | sulfone       | ``[S;X4;v4:1](-[O-;X1:2])-[O-;X1:3]>>[S:1](=[O+0:2])=[O+0:3]``               |
+    | sulfone       | ``[S;X4;v4:1](-[*-:2])-[*-:3]>>[S:1](=[*+0:2])=[*+0:3]``                     |
     +---------------+------------------------------------------------------------------------------+
-    | charged N     | ``[#7+0;v3:1]-[*-:2]>>[#7+:1]=[*+0:2]``                                      |
+    | charged N     | ``[#7+0;X3:1]-[*-:2]>>[#7+:1]=[*+0:2]``                                      |
     +---------------+------------------------------------------------------------------------------+
  
     """
@@ -646,24 +646,26 @@ def _standardize_patterns(mol, max_iter=200):
     # standardize conjugated systems
     _rebuild_conjugated_bonds(mol, max_iter)
 
+    # list of sanitized reactions
+    reactions = []
+    for name, reaction in [
+        ("Cterm", "[C-;X2;H0:1]=[O:2]>>[C+0:1]=[O:2]"),
+        ("Nterm", "[N-;X2;H1;$(N-[*^3]):1]>>[N+0:1]"),
+        ("keto-enolate", "[#6-:1]-[#6:2]=[O:3]>>[#6+0:1]=[#6:2]-[O-:3]"),
+        ("ARG", "[C-;v3:1]-[#7+0;v3;H2:2]>>[#6+0:1]=[#7+:2]"),
+        ("HIP", "[#6+0;H0:1]=[#6+0:2]-[#7;X3:3]-[#6-;X3:4]"
+                ">>[#6:1]=[#6:2]-[#7+:3]=[#6+0:4]"),
+        ("sulfone", "[S;X4;v4:1](-[*-:2])-[*-:3]"
+                    ">>[S:1](=[*+0:2])=[*+0:3]"),
+        ("charged-N", "[#7+0;X3:1]-[*-:2]>>[#7+:1]=[*+0:2]"),
+    ]:
+        rxn = AllChem.ReactionFromSmarts(reaction)
+        reactions.append(rxn)
+
     fragments = []
     for reactant in Chem.GetMolFrags(mol, asMols=True):
-
-        for name, reaction in [
-            ("Cterm", "[C-;X2;H0:1]=[O:2]>>[C+0:1]=[O:2]"),
-            ("Nterm", "[N-;X2;H1:1]>>[N+0:1]"),
-            ("keto-enolate", "[#6-:1]-[#6:2]=[O:3]>>[#6+0:1]=[#6:2]-[O-:3]"),
-            ("ARG", "[C-;v3:1]-[#7+0;v3;H2:2]>>[#6+0:1]=[#7+:2]"),
-            ("HIP", "[#6+0;H0:1]=[#6+0:2]-[#7;X3:3]-[#6-;X3:4]"
-                    ">>[#6:1]=[#6:2]-[#7+:3]=[#6+0:4]"),
-            ("sulfone", "[S;X4;v4:1](-[O-;X1:2])-[O-;X1:3]"
-                        ">>[S:1](=[O+0:2])=[O+0:3]"),
-            ("charged-N", "[#7+0;v3:1]-[*-:2]>>[#7+:1]=[*+0:2]"),
-        ]:
-            reactant.UpdatePropertyCache(strict=False)
-            Chem.Kekulize(reactant)
+        for reaction in reactions:
             reactant = _run_reaction(reaction, reactant)
-
         fragments.append(reactant)
 
     # recombine fragments
@@ -683,8 +685,8 @@ def _run_reaction(reaction, reactant):
 
     Parameters
     ----------
-    reaction : str
-        SMARTS reaction
+    reaction : rdkit.Chem.rdChemReactions.ChemicalReaction
+        Reaction from SMARTS
     reactant : rdkit.Chem.rdchem.Mol
         The molecule to transform
 
@@ -693,27 +695,22 @@ def _run_reaction(reaction, reactant):
     product : rdkit.Chem.rdchem.Mol
         The final product of the reaction
     """
-    # count how many times the reaction should be run
-    pattern = Chem.MolFromSmarts(reaction.split(">>")[0])
-    n_matches = len(reactant.GetSubstructMatches(pattern))
-
-    # run the reaction for each matched pattern
-    rxn = AllChem.ReactionFromSmarts(reaction)
-    for n in range(n_matches):
-        products = rxn.RunReactants((reactant,))
+    for n in range(reactant.GetNumAtoms()):
+        reactant.UpdatePropertyCache(strict=False)
+        Chem.Kekulize(reactant)
+        products = reaction.RunReactants((reactant,))
         # only keep the first product
         if products:
             product = products[0][0]
             # map back atom properties from the reactant to the product
             _reassign_props_after_reaction(reactant, product)
             # apply the next reaction to the product
-            product.UpdatePropertyCache(strict=False)
             reactant = product
         else:
-            # exit the n_matches loop if there's no product. Example
-            # where this is needed: SO^{4}_{2-} will match the sulfone
-            # pattern 6 times but the reaction is only needed once
+            # exit the loop if there's no product
             break
+    reactant.UpdatePropertyCache(strict=False)
+    Chem.Kekulize(reactant)
     return reactant
 
 
@@ -815,7 +812,7 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
                     break
             else:
                 # already performed all changes
-                continue
+                anion, a1, a2 = matches[0]
 
             # charges
             mol.GetAtomWithIdx(anion).SetFormalCharge(0)
@@ -829,6 +826,7 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
             continue
 
         # no more changes to apply
+        mol.UpdatePropertyCache(strict=False)
         return
 
     # reached max_iter
diff --git a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py
index 017954390f9..a88d910a399 100644
--- a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py
+++ b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py
@@ -495,6 +495,14 @@ def test_reassign_props_after_reaction(self, rdmol, product, name):
         "[O-][n+]1cccnc1",
         "C[n+]1ccccc1",
         "[PH4+]",
+        "c1nc[nH]n1",
+        "CC(=O)C=C(C)N",
+        "CC(C)=CC=C[O-]",
+        "O=S(C)(C)=NC",
+        "Cc1ccc2c3ncn(Cc4ccco4)c(O)c-3nc2c1",
+        # still needs fixing
+        # "c1c2c(=O)n3cccc(C)c3nc2n(C)c(=N)c1C(=O)NCc1cnccc1",
+        # "C(C#CCCCCCCCC(=O)O)#CC=CCCCC",
     ])
     def test_order_independant(self, smi_in):
         # generate mol with hydrogens but without bond orders
@@ -506,6 +514,7 @@ def test_order_independant(self, smi_in):
         for bond in template.GetBonds():
             bond.SetIsAromatic(False)
             bond.SetBondType(Chem.BondType.SINGLE)
+        template.UpdatePropertyCache(strict=False)
 
         # go through each possible starting atom
         for a in template.GetAtoms():
@@ -513,6 +522,7 @@ def test_order_independant(self, smi_in):
             m = Chem.MolFromSmiles(smi, sanitize=False)
             for atom in m.GetAtoms():
                 atom.SetFormalCharge(0)
+                atom.SetIsAromatic(False)
                 atom.SetNoImplicit(True)
             m.UpdatePropertyCache(strict=False)
             _infer_bo_and_charges(m)

From bd12bc1d0f5de7739ded8ce8a65231251a91e50c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <cedric.bouysset@unice.fr>
Date: Wed, 28 Oct 2020 19:56:35 +0100
Subject: [PATCH 09/47] better handling of ring conjugated systems and
 conjugated triple bonds

---
 package/MDAnalysis/coordinates/RDKit.py       | 71 ++++++++++---------
 .../MDAnalysisTests/coordinates/test_rdkit.py |  5 +-
 2 files changed, 39 insertions(+), 37 deletions(-)

diff --git a/package/MDAnalysis/coordinates/RDKit.py b/package/MDAnalysis/coordinates/RDKit.py
index c93e98ba696..15441883c32 100644
--- a/package/MDAnalysis/coordinates/RDKit.py
+++ b/package/MDAnalysis/coordinates/RDKit.py
@@ -515,7 +515,7 @@ def _infer_bo_and_charges(mol):
                    reverse=True,
                    key=lambda a: _get_nb_unpaired_electrons(a)[0])
 
-    MONOATOMIC_ION_CHARGES = {
+    MONATOMIC_ION_CHARGES = {
         3: 1, 11: 1, 19: 1, 37: 1, 47: 1, 55: 1,
         12: 2, 20: 2, 29: 2, 30: 2, 38: 2, 56: 2,
         26: 2, # Fe could also be 3
@@ -526,8 +526,9 @@ def _infer_bo_and_charges(mol):
     for atom in atoms:
         # monatomic ions
         if atom.GetDegree() == 0:
-            atom.SetFormalCharge(MONOATOMIC_ION_CHARGES.get(
-                                 atom.GetAtomicNum(), 0))
+            atom.SetFormalCharge(MONATOMIC_ION_CHARGES.get(
+                                 atom.GetAtomicNum(),
+                                 _get_nb_unpaired_electrons(atom)[0]))
             mol.UpdatePropertyCache(strict=False)
             continue
         # get NUE for each possible valence
@@ -545,7 +546,7 @@ def _infer_bo_and_charges(mol):
             neighbors = sorted(atom.GetNeighbors(), reverse=True,
                                key=lambda a: _get_nb_unpaired_electrons(a)[0])
             # check if one of the neighbors has a common NUE
-            for i, na in enumerate(neighbors, start=1):
+            for na in neighbors:
                 # get NUE for the neighbor
                 na_nue = _get_nb_unpaired_electrons(na)
                 # smallest common NUE
@@ -561,15 +562,16 @@ def _infer_bo_and_charges(mol):
                     order = common_nue + 1
                     bond.SetBondType(RDBONDORDER[order])
                     mol.UpdatePropertyCache(strict=False)
-                    if i < len(neighbors):
-                        # recalculate nue for atom
-                        nue = _get_nb_unpaired_electrons(atom)
+                    # go to next atom if one of the valences is complete
+                    nue = _get_nb_unpaired_electrons(atom)
+                    if any([n == 0 for n in nue]):
+                        break
 
             # if atom valence is still not filled
-            nue_list = _get_nb_unpaired_electrons(atom)
-            if not any(nue == 0 for nue in nue_list):
+            nue = _get_nb_unpaired_electrons(atom)
+            if not any([n == 0 for n in nue]):
                 # transform nue to charge
-                atom.SetFormalCharge(-nue_list[0])
+                atom.SetFormalCharge(-nue[0])
                 atom.SetNumRadicalElectrons(0)
                 mol.UpdatePropertyCache(strict=False)
 
@@ -620,7 +622,7 @@ def _standardize_patterns(mol, max_iter=200):
     +---------------+------------------------------------------------------------------------------+
     | Name          | Reaction                                                                     |
     +===============+==============================================================================+
-    | conjugated    | ``[*-;!O:1]-[*:2]=[*:3]-[*-:4]>>[*+0:1]=[*:2]-[*:3]=[*+0:4]``                |
+    | conjugated    | ``[*-:1]-[*:2]=[*:3]-[*-:4]>>[*+0:1]=[*:2]-[*:3]=[*+0:4]``                   |
     +---------------+------------------------------------------------------------------------------+
     | conjugated-N+ | ``[N;X3;v3:1]-[*:2]=[*:3]-[*-:4]>>[N+:1]=[*:2]-[*:3]=[*+0:4]``               |
     +---------------+------------------------------------------------------------------------------+
@@ -648,7 +650,7 @@ def _standardize_patterns(mol, max_iter=200):
 
     # list of sanitized reactions
     reactions = []
-    for name, reaction in [
+    for name, rxn in [
         ("Cterm", "[C-;X2;H0:1]=[O:2]>>[C+0:1]=[O:2]"),
         ("Nterm", "[N-;X2;H1;$(N-[*^3]):1]>>[N+0:1]"),
         ("keto-enolate", "[#6-:1]-[#6:2]=[O:3]>>[#6+0:1]=[#6:2]-[O-:3]"),
@@ -659,8 +661,8 @@ def _standardize_patterns(mol, max_iter=200):
                     ">>[S:1](=[*+0:2])=[*+0:3]"),
         ("charged-N", "[#7+0;X3:1]-[*-:2]>>[#7+:1]=[*+0:2]"),
     ]:
-        rxn = AllChem.ReactionFromSmarts(reaction)
-        reactions.append(rxn)
+        reaction = AllChem.ReactionFromSmarts(rxn)
+        reactions.append(reaction)
 
     fragments = []
     for reactant in Chem.GetMolFrags(mol, asMols=True):
@@ -733,6 +735,7 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
     Since ``anion-*=*`` is the same as ``*=*-anion`` in terms of SMARTS, we can
     control that we don't transform the same triplet of atoms back and forth by
     adding their indices to a list.
+    This function also handles conjugated systems with triple bonds.
     The molecule needs to be kekulized first to also cover systems
     with aromatic rings.
 
@@ -745,7 +748,7 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
     """
     mol.UpdatePropertyCache(strict=False)
     Chem.Kekulize(mol)
-    pattern = Chem.MolFromSmarts("[*-]-[*+0]=[*+0;!O]")
+    pattern = Chem.MolFromSmarts("[*-{1-2}]-,=[*+0]=,#[*+0;!O]")
     # number of unique matches with the pattern
     n_matches = len(set([match[0]
                          for match in mol.GetSubstructMatches(pattern)]))
@@ -754,7 +757,7 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
         return
     # check if there's an even number of anion-*=* patterns
     elif n_matches % 2 == 0:
-        end_pattern = Chem.MolFromSmarts("[*-]-[*+0]=[*+0]-[*-]")
+        end_pattern = Chem.MolFromSmarts("[*-{1-2}]-,=[*+0]=,#[*+0]-,=[*-{1-2}]")
     else:
         # as a last resort, the only way to standardize is to find a nitrogen
         # that can accept a double bond and a positive charge
@@ -778,20 +781,16 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
                         neighbor.SetFormalCharge(-1)
                         break
             else:
-                # [*-]-*=*-N --> *=*-*=[N+]
-                if term_atom.GetAtomicNum() == 7 and term_atom.GetFormalCharge() == 0:
-                    end_charge = 1
-                # [*-]-*=*-[*-] --> *=*-*=*
-                else:
-                    end_charge = 0
-                term_atom.SetFormalCharge(end_charge)
+                term_atom.SetFormalCharge(term_atom.GetFormalCharge() + 1)
             # common part of the conjugated systems: [*-]-*=*
-            mol.GetAtomWithIdx(anion1).SetFormalCharge(0)
-            mol.GetBondBetweenAtoms(anion1, a1).SetBondType(
-                Chem.BondType.DOUBLE)
-            mol.GetBondBetweenAtoms(a1, a2).SetBondType(Chem.BondType.SINGLE)
-            mol.GetBondBetweenAtoms(a2, anion2).SetBondType(
-                Chem.BondType.DOUBLE)
+            a = mol.GetAtomWithIdx(anion1)
+            a.SetFormalCharge(a.GetFormalCharge() + 1)
+            b = mol.GetBondBetweenAtoms(anion1, a1)
+            b.SetBondType(RDBONDORDER[b.GetBondTypeAsDouble() + 1])
+            b = mol.GetBondBetweenAtoms(a1, a2)
+            b.SetBondType(RDBONDORDER[b.GetBondTypeAsDouble() - 1])
+            b = mol.GetBondBetweenAtoms(a2, anion2)
+            b.SetBondType(RDBONDORDER[b.GetBondTypeAsDouble() + 1])
             mol.UpdatePropertyCache(strict=False)
             continue
 
@@ -813,14 +812,18 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
             else:
                 # already performed all changes
                 anion, a1, a2 = matches[0]
+                backtrack = []
 
             # charges
-            mol.GetAtomWithIdx(anion).SetFormalCharge(0)
-            mol.GetAtomWithIdx(a2).SetFormalCharge(-1)
+            a = mol.GetAtomWithIdx(anion)
+            a.SetFormalCharge(a.GetFormalCharge() + 1)
+            a = mol.GetAtomWithIdx(a2)
+            a.SetFormalCharge(a.GetFormalCharge() - 1)
             # bonds
-            mol.GetBondBetweenAtoms(anion, a1).SetBondType(
-                Chem.BondType.DOUBLE)
-            mol.GetBondBetweenAtoms(a1, a2).SetBondType(Chem.BondType.SINGLE)
+            b = mol.GetBondBetweenAtoms(anion, a1)
+            b.SetBondType(RDBONDORDER[b.GetBondTypeAsDouble() + 1])
+            b = mol.GetBondBetweenAtoms(a1, a2)
+            b.SetBondType(RDBONDORDER[b.GetBondTypeAsDouble() - 1])
             mol.UpdatePropertyCache(strict=False)
             # start new iteration
             continue
diff --git a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py
index a88d910a399..647f51ca632 100644
--- a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py
+++ b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py
@@ -500,9 +500,8 @@ def test_reassign_props_after_reaction(self, rdmol, product, name):
         "CC(C)=CC=C[O-]",
         "O=S(C)(C)=NC",
         "Cc1ccc2c3ncn(Cc4ccco4)c(O)c-3nc2c1",
-        # still needs fixing
-        # "c1c2c(=O)n3cccc(C)c3nc2n(C)c(=N)c1C(=O)NCc1cnccc1",
-        # "C(C#CCCCCCCCC(=O)O)#CC=CCCCC",
+        "CCCC/C=C/C#CC#CCCCCCCCC(=O)O",
+        "c1c2c(=O)n3cccc(C)c3nc2n(C)c(=N)c1C(=O)NCc1cnccc1",
     ])
     def test_order_independant(self, smi_in):
         # generate mol with hydrogens but without bond orders

From bd14836837eba579b996e504b044f1390c578b7f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <cedric.bouysset@unice.fr>
Date: Wed, 28 Oct 2020 20:04:59 +0100
Subject: [PATCH 10/47] revert rdkitcache branch changes

---
 package/MDAnalysis/coordinates/RDKit.py | 249 ++++++++++++------------
 1 file changed, 126 insertions(+), 123 deletions(-)

diff --git a/package/MDAnalysis/coordinates/RDKit.py b/package/MDAnalysis/coordinates/RDKit.py
index 15441883c32..bf104fda9c9 100644
--- a/package/MDAnalysis/coordinates/RDKit.py
+++ b/package/MDAnalysis/coordinates/RDKit.py
@@ -245,13 +245,19 @@ class RDKitConverter(base.ConverterBase):
     Since one of the main use case of the converter is converting trajectories
     and not just a topology, creating a new molecule from scratch for every
     frame would be too slow so the converter uses a caching system. The cache
-    only remembers the 2 most recent AtomGroups that were converted, as well
-    as the arguments that were passed to the converter. The number of objects
-    cached can be changed with the function :func:`set_converter_cache_size`.
-    However, ``ag.convert_to("RDKIT")`` followed by ``ag.convert_to("RDKIT", NoImplicit=False)``
-    will not use the cache since the arguments given are different.
-    You can pass a ``cache=False`` argument to the converter to bypass the
-    caching system.
+    only remembers the id of the last AtomGroup that was converted, as well
+    as the arguments that were passed to the converter. This means that using
+    ``u.select_atoms("protein").convert_to("RDKIT")`` will not benefit from the
+    cache since the selection is deleted from memory as soon as the conversion
+    is finished. Instead, users should do this in two steps by first saving the
+    selection in a variable and then converting the saved AtomGroup. It also
+    means that ``ag.convert_to("RDKIT")`` followed by
+    ``ag.convert_to("RDKIT", NoImplicit=False)`` will not use the cache.
+    Finally if you're modifying the AtomGroup in place between two conversions,
+    the id of the AtomGroup won't change and thus the converter will use the
+    cached molecule. For this reason, you can pass a ``cache=False`` argument
+    to the converter to bypass the caching system.
+    Note that the cached molecule doesn't contain the coordinates of the atoms.
 
 
     .. versionadded:: 2.0.0
@@ -260,6 +266,7 @@ class RDKitConverter(base.ConverterBase):
 
     lib = 'RDKIT'
     units = {'time': None, 'length': 'Angstrom'}
+    _cache = dict()
 
     def convert(self, obj, cache=True, NoImplicit=True, max_iter=200):
         """Write selection at current trajectory frame to
@@ -272,14 +279,16 @@ def convert(self, obj, cache=True, NoImplicit=True, max_iter=200):
         cache : bool
             Use a cached copy of the molecule's topology when available. To be
             used, the cached molecule and the new one have to be made from the
-            same AtomGroup selection and with the same arguments passed
-            to the converter
+            same AtomGroup object (same id) and with the same arguments passed
+            to the converter (with the exception of this `cache` argument)
         NoImplicit : bool
             Prevent adding hydrogens to the molecule
         max_iter : int
             Maximum number of iterations to standardize conjugated systems.
             See :func:`_rebuild_conjugated_bonds`
         """
+        # parameters passed to atomgroup_to_mol and used by the cache
+        kwargs = dict(NoImplicit=NoImplicit, max_iter=max_iter)
 
         try:
             from rdkit import Chem
@@ -295,13 +304,22 @@ def convert(self, obj, cache=True, NoImplicit=True, max_iter=200):
                             "please use a valid AtomGroup or Universe".format(
                                 type(obj))) from None
 
-        # parameters passed to atomgroup_to_mol
-        kwargs = dict(NoImplicit=NoImplicit, max_iter=max_iter)
         if cache:
-            mol = atomgroup_to_mol(ag, **kwargs)
+            # key used to search the cache
+            key = f"<{id(ag):#x}>" + ",".join(f"{key}={value}"
+                                            for key, value in kwargs.items())
+            try:
+                mol = self._cache[key]
+            except KeyError:
+                # only keep the current molecule in cache
+                self._cache.clear()
+                # create the topology
+                self._cache[key] = mol = self.atomgroup_to_mol(ag, **kwargs)
+            # continue on copy of the cached molecule
             mol = copy.deepcopy(mol)
         else:
-            mol = atomgroup_to_mol.__wrapped__(ag, **kwargs)
+            self._cache.clear()
+            mol = self.atomgroup_to_mol(ag, **kwargs)
 
         # add a conformer for the current Timestep
         if hasattr(ag, "positions"):
@@ -323,123 +341,108 @@ def convert(self, obj, cache=True, NoImplicit=True, max_iter=200):
         return mol
 
 
-@lru_cache(maxsize=2)
-def atomgroup_to_mol(ag, NoImplicit=True, max_iter=200):
-    """Converts an AtomGroup to an RDKit molecule without coordinates.
+    def atomgroup_to_mol(self, ag, NoImplicit=True, max_iter=200):
+        """Converts an AtomGroup to an RDKit molecule without coordinates.
 
-    Parameters
-    -----------
-    ag : MDAnalysis.core.groups.AtomGroup
-        The AtomGroup to convert
-    NoImplicit : bool
-        Prevent adding hydrogens to the molecule
-    max_iter : int
-        Maximum number of iterations to standardize conjugated systems.
-        See :func:`_rebuild_conjugated_bonds`
-    """
-    try:
-        elements = ag.elements
-    except NoDataError:
-        raise AttributeError(
-            "The `elements` attribute is required for the RDKitConverter "
-            "but is not present in this AtomGroup. Please refer to the "
-            "documentation to guess elements from other attributes or "
-            "type `help(mda.topology.guessers)`") from None
-
-    if "H" not in ag.elements:
-        warnings.warn(
-            "No hydrogen atom could be found in the topology, but the "
-            "converter requires all hydrogens to be explicit. Please "
-            "check carefully the output molecule as the converter is "
-            "likely to add negative charges and assign incorrect bond "
-            "orders to structures with implicit hydrogens. Alternatively, "
-            "you can use the parameter `NoImplicit=False` when using the "
-            "converter to allow implicit hydrogens and disable inferring "
-            "bond orders and charges."
-        )
-
-    # attributes accepted in PDBResidueInfo object
-    pdb_attrs = {}
-    if hasattr(ag, "bfactors") and hasattr(ag, "tempfactors"):
-        raise AttributeError(
-            "Both `tempfactors` and `bfactors` attributes are present but "
-            "only one can be assigned to the RDKit molecule. Please "
-            "delete the unnecessary one and retry."
-        )
-    for attr in RDATTRIBUTES.keys():
-        if hasattr(ag, attr):
-            pdb_attrs[attr] = getattr(ag, attr)
-
-    other_attrs = {}
-    for attr in ["charges", "segids", "types"]:
-        if hasattr(ag, attr):
-            other_attrs[attr] = getattr(ag, attr)
-
-    mol = Chem.RWMol()
-    # map index in universe to index in mol
-    atom_mapper = {}
-
-    for i, (atom, element) in enumerate(zip(ag, elements)):
-        # create atom
-        rdatom = Chem.Atom(element.capitalize())
-        # enable/disable adding implicit H to the molecule
-        rdatom.SetNoImplicit(NoImplicit)
-        # add PDB-like properties
-        mi = Chem.AtomPDBResidueInfo()
-        for attr, values in pdb_attrs.items():
-            _add_mda_attr_to_rdkit(attr, values[i], mi)
-        rdatom.SetMonomerInfo(mi)
-        # other properties
-        for attr in other_attrs.keys():
-            value = other_attrs[attr][i]
-            attr = "_MDAnalysis_%s" % _TOPOLOGY_ATTRS[attr].singular
-            _set_atom_property(rdatom, attr, value)
-        _set_atom_property(rdatom, "_MDAnalysis_index", i)
-        # add atom
-        index = mol.AddAtom(rdatom)
-        atom_mapper[atom.ix] = index
-
-    try:
-        ag.bonds
-    except NoDataError:
-        warnings.warn(
-            "No `bonds` attribute in this AtomGroup. Guessing bonds based "
-            "on atoms coordinates")
-        ag.guess_bonds()
-
-    for bond in ag.bonds:
+        Parameters
+        -----------
+        ag : MDAnalysis.core.groups.AtomGroup
+            The AtomGroup to convert
+        NoImplicit : bool
+            Prevent adding hydrogens to the molecule
+        max_iter : int
+            Maximum number of iterations to standardize conjugated systems.
+            See :func:`_rebuild_conjugated_bonds`
+        """
         try:
-            bond_indices = [atom_mapper[i] for i in bond.indices]
-        except KeyError:
-            continue
-        bond_type = RDBONDORDER.get(bond.order, Chem.BondType.SINGLE)
-        mol.AddBond(*bond_indices, bond_type)
-
-    mol.UpdatePropertyCache(strict=False)
-
-    if NoImplicit:
-        # infer bond orders and formal charges from the connectivity
-        _infer_bo_and_charges(mol)
-        mol = _standardize_patterns(mol, max_iter)
+            elements = ag.elements
+        except NoDataError:
+            raise AttributeError(
+                "The `elements` attribute is required for the RDKitConverter "
+                "but is not present in this AtomGroup. Please refer to the "
+                "documentation to guess elements from other attributes or "
+                "type `help(mda.topology.guessers)`") from None
+
+        if "H" not in ag.elements:
+            warnings.warn(
+                "No hydrogen atom could be found in the topology, but the "
+                "converter requires all hydrogens to be explicit. Please "
+                "check carefully the output molecule as the converter is "
+                "likely to add negative charges and assign incorrect bond "
+                "orders to structures with implicit hydrogens. Alternatively, "
+                "you can use the parameter `NoImplicit=False` when using the "
+                "converter to allow implicit hydrogens and disable inferring "
+                "bond orders and charges."
+            )
+
+        # attributes accepted in PDBResidueInfo object
+        pdb_attrs = {}
+        if hasattr(ag, "bfactors") and hasattr(ag, "tempfactors"):
+            raise AttributeError(
+                "Both `tempfactors` and `bfactors` attributes are present but "
+                "only one can be assigned to the RDKit molecule. Please "
+                "delete the unnecessary one and retry."
+            )
+        for attr in RDATTRIBUTES.keys():
+            if hasattr(ag, attr):
+                pdb_attrs[attr] = getattr(ag, attr)
+
+        other_attrs = {}
+        for attr in ["charges", "segids", "types"]:
+            if hasattr(ag, attr):
+                other_attrs[attr] = getattr(ag, attr)
+
+        mol = Chem.RWMol()
+        # map index in universe to index in mol
+        atom_mapper = {}
+
+        for i, (atom, element) in enumerate(zip(ag, elements)):
+            # create atom
+            rdatom = Chem.Atom(element.capitalize())
+            # enable/disable adding implicit H to the molecule
+            rdatom.SetNoImplicit(NoImplicit)
+            # add PDB-like properties
+            mi = Chem.AtomPDBResidueInfo()
+            for attr, values in pdb_attrs.items():
+                _add_mda_attr_to_rdkit(attr, values[i], mi)
+            rdatom.SetMonomerInfo(mi)
+            # other properties
+            for attr in other_attrs.keys():
+                value = other_attrs[attr][i]
+                attr = "_MDAnalysis_%s" % _TOPOLOGY_ATTRS[attr].singular
+                _set_atom_property(rdatom, attr, value)
+            _set_atom_property(rdatom, "_MDAnalysis_index", i)
+            # add atom
+            index = mol.AddAtom(rdatom)
+            atom_mapper[atom.ix] = index
 
-    # sanitize
-    Chem.SanitizeMol(mol)
+        try:
+            ag.bonds
+        except NoDataError:
+            warnings.warn(
+                "No `bonds` attribute in this AtomGroup. Guessing bonds based "
+                "on atoms coordinates")
+            ag.guess_bonds()
+
+        for bond in ag.bonds:
+            try:
+                bond_indices = [atom_mapper[i] for i in bond.indices]
+            except KeyError:
+                continue
+            bond_type = RDBONDORDER.get(bond.order, Chem.BondType.SINGLE)
+            mol.AddBond(*bond_indices, bond_type)
 
-    return mol
+        mol.UpdatePropertyCache(strict=False)
 
+        if NoImplicit:
+            # infer bond orders and formal charges from the connectivity
+            _infer_bo_and_charges(mol)
+            mol = _standardize_patterns(mol, max_iter)
 
-def set_converter_cache_size(maxsize):
-    """Set the maximum cache size of the RDKit converter
+        # sanitize
+        Chem.SanitizeMol(mol)
 
-    Parameters
-    ----------
-    maxsize : int or None
-        If int, the cache will only keep the ``maxsize`` most recent
-        conversions in memory. Using ``maxsize=None`` will remove all limits
-        to the cache size, i.e. everything is cached.
-    """
-    global atomgroup_to_mol
-    atomgroup_to_mol = lru_cache(maxsize=maxsize)(atomgroup_to_mol.__wrapped__)
+        return mol
 
 
 def _add_mda_attr_to_rdkit(attr, value, mi):

From 0e7d4e486a950c0a45a90b1539c67be0f22193d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <cedric.bouysset@unice.fr>
Date: Fri, 13 Nov 2020 02:51:14 +0100
Subject: [PATCH 11/47] monatomic default charges handling

---
 package/MDAnalysis/coordinates/RDKit.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/package/MDAnalysis/coordinates/RDKit.py b/package/MDAnalysis/coordinates/RDKit.py
index bf104fda9c9..8d0b93e1e30 100644
--- a/package/MDAnalysis/coordinates/RDKit.py
+++ b/package/MDAnalysis/coordinates/RDKit.py
@@ -518,20 +518,19 @@ def _infer_bo_and_charges(mol):
                    reverse=True,
                    key=lambda a: _get_nb_unpaired_electrons(a)[0])
 
-    MONATOMIC_ION_CHARGES = {
+    MONATOMIC_CATION_CHARGES = {
         3: 1, 11: 1, 19: 1, 37: 1, 47: 1, 55: 1,
         12: 2, 20: 2, 29: 2, 30: 2, 38: 2, 56: 2,
-        26: 2, # Fe could also be 3
+        26: 2, # Fe could also be +3
         13: 3,
-        9: -1, 17: -1, 35: -1, 53: -1,
     }
 
     for atom in atoms:
         # monatomic ions
         if atom.GetDegree() == 0:
-            atom.SetFormalCharge(MONATOMIC_ION_CHARGES.get(
+            atom.SetFormalCharge(MONATOMIC_CATION_CHARGES.get(
                                  atom.GetAtomicNum(),
-                                 _get_nb_unpaired_electrons(atom)[0]))
+                                 -_get_nb_unpaired_electrons(atom)[0]))
             mol.UpdatePropertyCache(strict=False)
             continue
         # get NUE for each possible valence

From d9b4d95024353eb7877755921d6258e40c904fa0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <cedric.bouysset@unice.fr>
Date: Tue, 17 Nov 2020 19:43:37 +0100
Subject: [PATCH 12/47] fix multiple types of end pattern + test

---
 package/MDAnalysis/coordinates/RDKit.py       | 26 +++++++++++++------
 .../MDAnalysisTests/coordinates/test_rdkit.py |  1 +
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/package/MDAnalysis/coordinates/RDKit.py b/package/MDAnalysis/coordinates/RDKit.py
index 8d0b93e1e30..7fc2ddbf522 100644
--- a/package/MDAnalysis/coordinates/RDKit.py
+++ b/package/MDAnalysis/coordinates/RDKit.py
@@ -751,21 +751,25 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
     mol.UpdatePropertyCache(strict=False)
     Chem.Kekulize(mol)
     pattern = Chem.MolFromSmarts("[*-{1-2}]-,=[*+0]=,#[*+0;!O]")
+    base_end_pattern = Chem.MolFromSmarts(
+        "[*-{1-2}]-,=[*+0]=,#[*+0]-,=[*-{1-2}]")
+    odd_end_pattern = Chem.MolFromSmarts(
+        "[*-]-[*+0]=[*+0]-[*-,$([#7;X3;v3]),$([#6+0]=O)]")
     # number of unique matches with the pattern
     n_matches = len(set([match[0]
                          for match in mol.GetSubstructMatches(pattern)]))
     if n_matches == 0:
         # nothing to standardize
         return
-    # check if there's an even number of anion-*=* patterns
-    elif n_matches % 2 == 0:
-        end_pattern = Chem.MolFromSmarts("[*-{1-2}]-,=[*+0]=,#[*+0]-,=[*-{1-2}]")
-    else:
+    elif n_matches == 1:
         # as a last resort, the only way to standardize is to find a nitrogen
         # that can accept a double bond and a positive charge
         # or a carbonyl that will become an enolate
-        end_pattern = Chem.MolFromSmarts(
-            "[*-]-[*+0]=[*+0]-[*-,$([#7;X3;v3]),$([#6+0]=O)]")
+        end_pattern = odd_end_pattern
+    else:
+        # give priority to base end pattern and then deal with the odd one if
+        # necessary
+        end_pattern = base_end_pattern
     backtrack = []
     for _ in range(max_iter):
         # check for ending pattern
@@ -775,10 +779,12 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
             anion1, a1, a2, anion2 = end_match
             term_atom = mol.GetAtomWithIdx(anion2)
             # [*-]-*=*-C=O --> *=*-*=C-[O-]
-            if term_atom.GetAtomicNum() == 6 and term_atom.GetFormalCharge() == 0:
+            if (term_atom.GetAtomicNum() == 6 and
+                term_atom.GetFormalCharge() == 0):
                 for neighbor in term_atom.GetNeighbors():
                     bond = mol.GetBondBetweenAtoms(anion2, neighbor.GetIdx())
-                    if neighbor.GetAtomicNum() == 8 and bond.GetBondTypeAsDouble() == 2:
+                    if (neighbor.GetAtomicNum() == 8 and
+                        bond.GetBondTypeAsDouble() == 2):
                         bond.SetBondType(Chem.BondType.SINGLE)
                         neighbor.SetFormalCharge(-1)
                         break
@@ -827,6 +833,10 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
             b = mol.GetBondBetweenAtoms(a1, a2)
             b.SetBondType(RDBONDORDER[b.GetBondTypeAsDouble() - 1])
             mol.UpdatePropertyCache(strict=False)
+            # update number of matches for the end pattern
+            n_matches = len(set([match[0] for match in matches]))
+            if n_matches == 1:
+                end_pattern = odd_end_pattern
             # start new iteration
             continue
 
diff --git a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py
index 647f51ca632..7c5d1e95dc6 100644
--- a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py
+++ b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py
@@ -502,6 +502,7 @@ def test_reassign_props_after_reaction(self, rdmol, product, name):
         "Cc1ccc2c3ncn(Cc4ccco4)c(O)c-3nc2c1",
         "CCCC/C=C/C#CC#CCCCCCCCC(=O)O",
         "c1c2c(=O)n3cccc(C)c3nc2n(C)c(=N)c1C(=O)NCc1cnccc1",
+        "N#Cc1c[nH]c(C(=O)NC(=O)c2cc[n+]([O-])cc2)n1"
     ])
     def test_order_independant(self, smi_in):
         # generate mol with hydrogens but without bond orders

From b36eab62a1ffb697bc56b0a585edf9376e9e40d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <cedric.bouysset@unice.fr>
Date: Wed, 18 Nov 2020 15:51:02 +0100
Subject: [PATCH 13/47] fix some sulfur and nitrogen compounds

---
 package/MDAnalysis/coordinates/RDKit.py       | 32 ++++++++++++++-----
 .../MDAnalysisTests/coordinates/test_rdkit.py |  5 ++-
 2 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/package/MDAnalysis/coordinates/RDKit.py b/package/MDAnalysis/coordinates/RDKit.py
index 7fc2ddbf522..99c97cb1349 100644
--- a/package/MDAnalysis/coordinates/RDKit.py
+++ b/package/MDAnalysis/coordinates/RDKit.py
@@ -659,8 +659,7 @@ def _standardize_patterns(mol, max_iter=200):
         ("ARG", "[C-;v3:1]-[#7+0;v3;H2:2]>>[#6+0:1]=[#7+:2]"),
         ("HIP", "[#6+0;H0:1]=[#6+0:2]-[#7;X3:3]-[#6-;X3:4]"
                 ">>[#6:1]=[#6:2]-[#7+:3]=[#6+0:4]"),
-        ("sulfone", "[S;X4;v4:1](-[*-:2])-[*-:3]"
-                    ">>[S:1](=[*+0:2])=[*+0:3]"),
+        ("sulfone", "[S;D4;!v6:1]-[*-:2]>>[S;v6:1]=[*+0:2]"),
         ("charged-N", "[#7+0;X3:1]-[*-:2]>>[#7+:1]=[*+0:2]"),
     ]:
         reaction = AllChem.ReactionFromSmarts(rxn)
@@ -754,7 +753,8 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
     base_end_pattern = Chem.MolFromSmarts(
         "[*-{1-2}]-,=[*+0]=,#[*+0]-,=[*-{1-2}]")
     odd_end_pattern = Chem.MolFromSmarts(
-        "[*-]-[*+0]=[*+0]-[*-,$([#7;X3;v3]),$([#6+0]=O)]")
+        "[*-]-[*+0]=[*+0]-[*-,$([#7;X3;v3]),$([#6+0,#7+1]=O),"
+        "$([S;D4;v4]-[O-])]")
     # number of unique matches with the pattern
     n_matches = len(set([match[0]
                          for match in mol.GetSubstructMatches(pattern)]))
@@ -778,9 +778,14 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
             # index of each atom
             anion1, a1, a2, anion2 = end_match
             term_atom = mol.GetAtomWithIdx(anion2)
-            # [*-]-*=*-C=O --> *=*-*=C-[O-]
-            if (term_atom.GetAtomicNum() == 6 and
-                term_atom.GetFormalCharge() == 0):
+            # [*-]-*=*-[C,N+]=O --> *=*-*=[C,N+]-[O-]
+            if (
+                term_atom.GetAtomicNum() == 6
+                and term_atom.GetFormalCharge() == 0
+            ) or (
+                term_atom.GetAtomicNum() == 7
+                and term_atom.GetFormalCharge() == 1
+            ):
                 for neighbor in term_atom.GetNeighbors():
                     bond = mol.GetBondBetweenAtoms(anion2, neighbor.GetIdx())
                     if (neighbor.GetAtomicNum() == 8 and
@@ -788,6 +793,16 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
                         bond.SetBondType(Chem.BondType.SINGLE)
                         neighbor.SetFormalCharge(-1)
                         break
+            # [*-]-*=*-[Sv4]-[O-] --> *=*-*=[Sv6]=O
+            elif term_atom.GetAtomicNum() == 16:
+                for neighbor in term_atom.GetNeighbors():
+                    bond = mol.GetBondBetweenAtoms(anion2, neighbor.GetIdx())
+                    if (neighbor.GetAtomicNum() == 8 and
+                        neighbor.GetFormalCharge() == -1 and
+                        bond.GetBondTypeAsDouble() == 1):
+                        bond.SetBondType(Chem.BondType.DOUBLE)
+                        neighbor.SetFormalCharge(0)
+                        break
             else:
                 term_atom.SetFormalCharge(term_atom.GetFormalCharge() + 1)
             # common part of the conjugated systems: [*-]-*=*
@@ -819,8 +834,9 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
                     break
             else:
                 # already performed all changes
-                anion, a1, a2 = matches[0]
-                backtrack = []
+                match = matches[0]
+                anion, a1, a2 = match
+                backtrack = [set(match)]
 
             # charges
             a = mol.GetAtomWithIdx(anion)
diff --git a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py
index 7c5d1e95dc6..395ade91b07 100644
--- a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py
+++ b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py
@@ -502,7 +502,10 @@ def test_reassign_props_after_reaction(self, rdmol, product, name):
         "Cc1ccc2c3ncn(Cc4ccco4)c(O)c-3nc2c1",
         "CCCC/C=C/C#CC#CCCCCCCCC(=O)O",
         "c1c2c(=O)n3cccc(C)c3nc2n(C)c(=N)c1C(=O)NCc1cnccc1",
-        "N#Cc1c[nH]c(C(=O)NC(=O)c2cc[n+]([O-])cc2)n1"
+        "N#Cc1c[nH]c(C(=O)NC(=O)c2cc[n+]([O-])cc2)n1",
+        "C[C@@H](Oc1cc(F)ccc1Nc1ncnc2cc(N=S3(=O)CCC3)cc(F)c12)C(=O)NCC#N",
+        "[O-][n+]1onc2ccccc21",
+
     ])
     def test_order_independant(self, smi_in):
         # generate mol with hydrogens but without bond orders

From 555eab302fa35fb2921e59bd4f2514cfea91f091 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <cedric.bouysset@unice.fr>
Date: Wed, 18 Nov 2020 16:09:52 +0100
Subject: [PATCH 14/47] add amino and nucleic acids test

---
 .../MDAnalysisTests/coordinates/test_rdkit.py | 123 ++++++++++++------
 1 file changed, 80 insertions(+), 43 deletions(-)

diff --git a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py
index 395ade91b07..20cf7881ae3 100644
--- a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py
+++ b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py
@@ -363,6 +363,20 @@ def test_cache(self):
 
 @requires_rdkit
 class TestRDKitFunctions(object):
+
+    def mol_to_template(self, mol):
+        """Remove bond orders and charges from molecule"""
+        template = Chem.AddHs(mol)
+        for atom in template.GetAtoms():
+            atom.SetIsAromatic(False)
+            atom.SetFormalCharge(0)
+            atom.SetNoImplicit(True)
+        for bond in template.GetBonds():
+            bond.SetIsAromatic(False)
+            bond.SetBondType(Chem.BondType.SINGLE)
+        template.UpdatePropertyCache(strict=False)
+        return template
+
     @pytest.mark.parametrize("smi, out", [
         ("C(-[H])(-[H])(-[H])-[H]", "C"),
         ("[C](-[H])(-[H])-[C](-[H])-[H]", "C=C"),
@@ -473,52 +487,75 @@ def test_reassign_props_after_reaction(self, rdmol, product, name):
             with pytest.raises(KeyError, match="_MDAnalysis_index"):
                 atom.GetIntProp("_MDAnalysis_index")
 
-    @pytest.mark.parametrize("smi_in", [
-        "c1ccc(cc1)-c1ccccc1-c1ccccc1",
-        "c1cc[nH]c1",
-        "c1ccc(cc1)-c1ccc(-c2ccccc2)c(-c2ccccc2)c1-c1ccccc1",
-        "c1ccc2c(c1)c1ccccc1c1ccccc1c1ccccc1c1ccccc21",
-        "c1csc(c1)-c1ccoc1-c1cc[nH]c1",
-        "C1=C2C(=NC=N1)N=CN2",
-        "CN1C=NC(=C1SC2=NC=NC3=C2NC=N3)[N+](=O)[O-]",
-        "c1c[nH]c(c1)-c1ccc(s1)-c1ccoc1-c1c[nH]cc1-c1ccccc1",
-        "C=CC=CC=CC=CC=CC=C",
-        "NCCCCC([NH3+])C(=O)[O-]",
-        "CC(C=CC1=C(C)CCCC1(C)C)=CC=CC(C)=CC=[NH+]C",
-        "C#CC=C",
+    @pytest.mark.parametrize("input_type, input_str", [
+        ("smi","c1ccc(cc1)-c1ccccc1-c1ccccc1"),
+        ("smi","c1cc[nH]c1"),
+        ("smi","c1ccc(cc1)-c1ccc(-c2ccccc2)c(-c2ccccc2)c1-c1ccccc1"),
+        ("smi","c1ccc2c(c1)c1ccccc1c1ccccc1c1ccccc1c1ccccc21"),
+        ("smi","c1csc(c1)-c1ccoc1-c1cc[nH]c1"),
+        ("smi","C1=C2C(=NC=N1)N=CN2"),
+        ("smi","CN1C=NC(=C1SC2=NC=NC3=C2NC=N3)[N+](=O)[O-]"),
+        ("smi","c1c[nH]c(c1)-c1ccc(s1)-c1ccoc1-c1c[nH]cc1-c1ccccc1"),
+        ("smi","C=CC=CC=CC=CC=CC=C"),
+        ("smi","NCCCCC([NH3+])C(=O)[O-]"),
+        ("smi","CC(C=CC1=C(C)CCCC1(C)C)=CC=CC(C)=CC=[NH+]C"),
+        ("smi","C#CC=C"),
         # HID HIE HIP residues, see PR #2941
-        "O=C([C@H](CC1=CNC=N1)N)O",
-        "O=C([C@H](CC1=CN=CN1)N)O",
-        "O=C([C@H](CC1=C[NH1+]=CN1)[NH3+])[O-]",
+        ("smi","O=C([C@H](CC1=CNC=N1)N)O"),
+        ("smi","O=C([C@H](CC1=CN=CN1)N)O"),
+        ("smi","O=C([C@H](CC1=C[NH1+]=CN1)[NH3+])[O-]"),
         # fixes from PR #???
-        "CCOC(=O)c1cc2cc(C(=O)O)ccc2[nH]1",
-        "[O-][n+]1cccnc1",
-        "C[n+]1ccccc1",
-        "[PH4+]",
-        "c1nc[nH]n1",
-        "CC(=O)C=C(C)N",
-        "CC(C)=CC=C[O-]",
-        "O=S(C)(C)=NC",
-        "Cc1ccc2c3ncn(Cc4ccco4)c(O)c-3nc2c1",
-        "CCCC/C=C/C#CC#CCCCCCCCC(=O)O",
-        "c1c2c(=O)n3cccc(C)c3nc2n(C)c(=N)c1C(=O)NCc1cnccc1",
-        "N#Cc1c[nH]c(C(=O)NC(=O)c2cc[n+]([O-])cc2)n1",
-        "C[C@@H](Oc1cc(F)ccc1Nc1ncnc2cc(N=S3(=O)CCC3)cc(F)c12)C(=O)NCC#N",
-        "[O-][n+]1onc2ccccc21",
-
+        ("smi","CCOC(=O)c1cc2cc(C(=O)O)ccc2[nH]1"),
+        ("smi","[O-][n+]1cccnc1"),
+        ("smi","C[n+]1ccccc1"),
+        ("smi","[PH4+]"),
+        ("smi","c1nc[nH]n1"),
+        ("smi","CC(=O)C=C(C)N"),
+        ("smi","CC(C)=CC=C[O-]"),
+        ("smi","O=S(C)(C)=NC"),
+        ("smi","Cc1ccc2c3ncn(Cc4ccco4)c(O)c-3nc2c1"),
+        ("smi","CCCC/C=C/C#CC#CCCCCCCCC(=O)O"),
+        ("smi","c1c2c(=O)n3cccc(C)c3nc2n(C)c(=N)c1C(=O)NCc1cnccc1"),
+        ("smi","N#Cc1c[nH]c(C(=O)NC(=O)c2cc[n+]([O-])cc2)n1"),
+        ("smi","C[C@@H](Oc1cc(F)ccc1Nc1ncnc2cc(N=S3(=O)CCC3)cc(F)c12)C(=O)NCC#N"),
+        ("smi","[O-][n+]1onc2ccccc21"),
+        # test amino acids
+        ("aa", "A"),
+        ("aa", "G"),
+        ("aa", "I"),
+        ("aa", "L"),
+        ("aa", "P"),
+        ("aa", "V"),
+        ("aa", "F"),
+        ("aa", "W"),
+        ("aa", "Y"),
+        ("aa", "D"),
+        ("aa", "E"),
+        ("aa", "R"),
+        ("aa", "H"),
+        ("aa", "K"),
+        ("aa", "S"),
+        ("aa", "T"),
+        ("aa", "C"),
+        ("aa", "M"),
+        ("aa", "N"),
+        ("aa", "Q"),
+        # test nucleic acids
+        ("na", "A"),
+        ("na", "T"),
+        ("na", "U"),
+        ("na", "C"),
+        ("na", "G"),
     ])
-    def test_order_independant(self, smi_in):
+    def test_order_independant(self, input_type, input_str):
         # generate mol with hydrogens but without bond orders
-        ref = Chem.MolFromSmiles(smi_in)
-        template = Chem.AddHs(ref)
-        for atom in template.GetAtoms():
-            atom.SetIsAromatic(False)
-            atom.SetFormalCharge(0)
-        for bond in template.GetBonds():
-            bond.SetIsAromatic(False)
-            bond.SetBondType(Chem.BondType.SINGLE)
-        template.UpdatePropertyCache(strict=False)
-
+        if input_type == "smi":
+            ref = Chem.MolFromSmiles(input_str)
+        elif input_type == "na":
+            ref = Chem.MolFromSequence(input_str, flavor=9)
+        else:
+            ref = Chem.MolFromSequence(input_str)
+        template = self.mol_to_template(ref)
         # go through each possible starting atom
         for a in template.GetAtoms():
             smi = Chem.MolToSmiles(template, rootedAtAtom=a.GetIdx())
@@ -542,7 +579,7 @@ def test_order_independant(self, smi_in):
             assert match, (f"(input) {Chem.MolToSmiles(ref)} != "
                            f"{Chem.MolToSmiles(m)} (output) "
                            f"root atom {a.GetIdx()}")
-
+    
     def test_warn_conjugated_max_iter(self):
         smi = "[C-]C=CC=CC=CC=CC=CC=C[C-]"
         mol = Chem.MolFromSmiles(smi)

From 3e4d066affd0cec479664e47967e2cbb200949f3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <cedric.bouysset@unice.fr>
Date: Wed, 18 Nov 2020 16:27:21 +0100
Subject: [PATCH 15/47] add test for ions

---
 .../MDAnalysisTests/coordinates/test_rdkit.py    | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py
index 20cf7881ae3..82c464af284 100644
--- a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py
+++ b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py
@@ -586,3 +586,19 @@ def test_warn_conjugated_max_iter(self):
         with pytest.warns(UserWarning,
                           match="reasonable number of iterations"):
             _rebuild_conjugated_bonds(mol, 2)
+
+    @pytest.mark.parametrize("smi", [
+        "[Li+]", "[Na+]", "[K+]", "[Ag+]",
+        "[Mg+2]", "[Ca+2]", "[Cu+2]", "[Zn+2]", "[Fe+2]",
+        "[Al+3]",
+        "[Cl-]",
+        "[O-2]",
+        "[Na+].[Cl-]",
+    ])
+    def test_ions(self, smi):
+        ref = Chem.MolFromSmiles(smi)
+        mol = self.mol_to_template(ref)
+        _infer_bo_and_charges(mol)
+        mol = _standardize_patterns(mol)
+        Chem.SanitizeMol(mol)
+        assert mol.HasSubstructMatch(ref) and ref.HasSubstructMatch(mol)

From ec54125c363f63dcd10d1e97c86c63c79494230b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <cedric.bouysset@unice.fr>
Date: Thu, 19 Nov 2020 15:46:19 +0100
Subject: [PATCH 16/47] final fixes

---
 package/MDAnalysis/coordinates/RDKit.py          | 16 ++++++++++++++--
 .../MDAnalysisTests/coordinates/test_rdkit.py    |  2 ++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/package/MDAnalysis/coordinates/RDKit.py b/package/MDAnalysis/coordinates/RDKit.py
index 99c97cb1349..fca91f4acfd 100644
--- a/package/MDAnalysis/coordinates/RDKit.py
+++ b/package/MDAnalysis/coordinates/RDKit.py
@@ -749,7 +749,7 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
     """
     mol.UpdatePropertyCache(strict=False)
     Chem.Kekulize(mol)
-    pattern = Chem.MolFromSmarts("[*-{1-2}]-,=[*+0]=,#[*+0;!O]")
+    pattern = Chem.MolFromSmarts("[*-{1-2}]-,=[*+0]=,#[*+0]")
     base_end_pattern = Chem.MolFromSmarts(
         "[*-{1-2}]-,=[*+0]=,#[*+0]-,=[*-{1-2}]")
     odd_end_pattern = Chem.MolFromSmarts(
@@ -771,6 +771,7 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
         # necessary
         end_pattern = base_end_pattern
     backtrack = []
+    backtrack_cycles = 0
     for _ in range(max_iter):
         # check for ending pattern
         end_match = mol.GetSubstructMatch(end_pattern)
@@ -794,7 +795,8 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
                         neighbor.SetFormalCharge(-1)
                         break
             # [*-]-*=*-[Sv4]-[O-] --> *=*-*=[Sv6]=O
-            elif term_atom.GetAtomicNum() == 16:
+            elif (term_atom.GetAtomicNum() == 16 and 
+                  term_atom.GetFormalCharge() == 0):
                 for neighbor in term_atom.GetNeighbors():
                     bond = mol.GetBondBetweenAtoms(anion2, neighbor.GetIdx())
                     if (neighbor.GetAtomicNum() == 8 and
@@ -834,9 +836,19 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
                     break
             else:
                 # already performed all changes
+                if backtrack_cycles == 1:
+                    # might be stuck because there were 2 "odd" end patterns
+                    # misqualified as a single base one
+                    end_pattern = odd_end_pattern
+                elif backtrack_cycles > 1:
+                    # likely stuck changing the same bonds over and over so
+                    # let's stop here
+                    mol.UpdatePropertyCache(strict=False)
+                    return
                 match = matches[0]
                 anion, a1, a2 = match
                 backtrack = [set(match)]
+                backtrack_cycles += 1
 
             # charges
             a = mol.GetAtomWithIdx(anion)
diff --git a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py
index 82c464af284..c800909c012 100644
--- a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py
+++ b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py
@@ -519,6 +519,8 @@ def test_reassign_props_after_reaction(self, rdmol, product, name):
         ("smi","N#Cc1c[nH]c(C(=O)NC(=O)c2cc[n+]([O-])cc2)n1"),
         ("smi","C[C@@H](Oc1cc(F)ccc1Nc1ncnc2cc(N=S3(=O)CCC3)cc(F)c12)C(=O)NCC#N"),
         ("smi","[O-][n+]1onc2ccccc21"),
+        ("smi", "Cc1cc[n+](CC[n+]2ccc(C)cc2)cc1"),
+        ("smi", "[O-]c1ccccc1"),
         # test amino acids
         ("aa", "A"),
         ("aa", "G"),

From 3167e43db6f60816a83a451d8ad31c4baa56ecf5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <cedric.bouysset@unice.fr>
Date: Thu, 19 Nov 2020 20:17:48 +0100
Subject: [PATCH 17/47] fix docs and unused import

---
 package/MDAnalysis/coordinates/RDKit.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/package/MDAnalysis/coordinates/RDKit.py b/package/MDAnalysis/coordinates/RDKit.py
index fca91f4acfd..9470f8f3e95 100644
--- a/package/MDAnalysis/coordinates/RDKit.py
+++ b/package/MDAnalysis/coordinates/RDKit.py
@@ -67,7 +67,6 @@
 import warnings
 import re
 import copy
-from functools import lru_cache
 
 import numpy as np
 
@@ -626,13 +625,15 @@ def _standardize_patterns(mol, max_iter=200):
     +===============+==============================================================================+
     | conjugated    | ``[*-:1]-[*:2]=[*:3]-[*-:4]>>[*+0:1]=[*:2]-[*:3]=[*+0:4]``                   |
     +---------------+------------------------------------------------------------------------------+
-    | conjugated-N+ | ``[N;X3;v3:1]-[*:2]=[*:3]-[*-:4]>>[N+:1]=[*:2]-[*:3]=[*+0:4]``               |
+    | conjugated N+ | ``[N;X3;v3:1]-[*:2]=[*:3]-[*-:4]>>[N+:1]=[*:2]-[*:3]=[*+0:4]``               |
     +---------------+------------------------------------------------------------------------------+
-    | conjugated-O- | ``[O:1]=[#6:2]-[*:3]=[*:4]-[*-:5]>>[O-:1]-[*:2]=[*:3]-[*:4]=[*+0:5]``        |
+    | conjugated O- | ``[O:1]=[#6+0,#7+:2]-[*:3]=[*:4]-[*-:5]>>[O-:1]-[*:2]=[*:3]-[*:4]=[*+0:5]``  |
+    +---------------+------------------------------------------------------------------------------+
+    | conjug. S=O   | ``[O-:1]-[S;D4;v4:2]-[*:3]=[*:4]-[*-:5]>>[O+0:1]=[*:2]=[*:3]-[*:4]=[*+0:5]`` |
     +---------------+------------------------------------------------------------------------------+
     | Cterm         | ``[C-;X2;H0:1]=[O:2]>>[C+0:1]=[O:2]``                                        |
     +---------------+------------------------------------------------------------------------------+
-    | Nterm         | ``[N-;H1;$(N-[*^3]):1]>>[N+0:1]``                                            |
+    | Nterm         | ``[N-;X2;H1;$(N-[*^3]):1]>>[N+0:1]``                                         |
     +---------------+------------------------------------------------------------------------------+
     | keto-enolate  | ``[#6-:1]-[#6:2]=[O:3]>>[#6+0:1]=[#6:2]-[O-:3]``                             |
     +---------------+------------------------------------------------------------------------------+
@@ -640,7 +641,7 @@ def _standardize_patterns(mol, max_iter=200):
     +---------------+------------------------------------------------------------------------------+
     | histidine     | ``[#6+0;H0:1]=[#6+0:2]-[#7;X3:3]-[#6-;X3:4]>>[#6:1]=[#6:2]-[#7+:3]=[#6+0:4]``|
     +---------------+------------------------------------------------------------------------------+
-    | sulfone       | ``[S;X4;v4:1](-[*-:2])-[*-:3]>>[S:1](=[*+0:2])=[*+0:3]``                     |
+    | sulfone       | ``[S;D4;!v6:1]-[*-:2]>>[S;v6:1]=[*+0:2]``                                    |
     +---------------+------------------------------------------------------------------------------+
     | charged N     | ``[#7+0;X3:1]-[*-:2]>>[#7+:1]=[*+0:2]``                                      |
     +---------------+------------------------------------------------------------------------------+

From 7b6823b93d64998e97c72f6a39d6305bb2ed836a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <cedric.bouysset@unice.fr>
Date: Thu, 19 Nov 2020 20:34:32 +0100
Subject: [PATCH 18/47] pep8

---
 package/MDAnalysis/coordinates/RDKit.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/package/MDAnalysis/coordinates/RDKit.py b/package/MDAnalysis/coordinates/RDKit.py
index 9470f8f3e95..de77186d499 100644
--- a/package/MDAnalysis/coordinates/RDKit.py
+++ b/package/MDAnalysis/coordinates/RDKit.py
@@ -273,8 +273,8 @@ def convert(self, obj, cache=True, NoImplicit=True, max_iter=200):
 
         Parameters
         -----------
-        obj : :class:`~MDAnalysis.core.groups.AtomGroup` or :class:`~MDAnalysis.core.universe.Universe`
-
+        obj : :class:`~MDAnalysis.core.groups.AtomGroup` or 
+            :class:`~MDAnalysis.core.universe.Universe`
         cache : bool
             Use a cached copy of the molecule's topology when available. To be
             used, the cached molecule and the new one have to be made from the
@@ -339,9 +339,8 @@ def convert(self, obj, cache=True, NoImplicit=True, max_iter=200):
 
         return mol
 
-
     def atomgroup_to_mol(self, ag, NoImplicit=True, max_iter=200):
-        """Converts an AtomGroup to an RDKit molecule without coordinates.
+        """Converts an AtomGroup to an RDKit molecule
 
         Parameters
         -----------
@@ -645,7 +644,7 @@ def _standardize_patterns(mol, max_iter=200):
     +---------------+------------------------------------------------------------------------------+
     | charged N     | ``[#7+0;X3:1]-[*-:2]>>[#7+:1]=[*+0:2]``                                      |
     +---------------+------------------------------------------------------------------------------+
- 
+
     """
 
     # standardize conjugated systems
@@ -796,7 +795,7 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
                         neighbor.SetFormalCharge(-1)
                         break
             # [*-]-*=*-[Sv4]-[O-] --> *=*-*=[Sv6]=O
-            elif (term_atom.GetAtomicNum() == 16 and 
+            elif (term_atom.GetAtomicNum() == 16 and
                   term_atom.GetFormalCharge() == 0):
                 for neighbor in term_atom.GetNeighbors():
                     bond = mol.GetBondBetweenAtoms(anion2, neighbor.GetIdx())

From 3f4645fb3f0ba3fea6722f1434952483f5b6a6ed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <bouysset.cedric@gmail.com>
Date: Tue, 11 May 2021 22:10:25 +0200
Subject: [PATCH 19/47] pep8

---
 package/MDAnalysis/converters/RDKit.py        |  8 +--
 .../MDAnalysisTests/converters/test_rdkit.py  | 63 ++++++++++---------
 2 files changed, 36 insertions(+), 35 deletions(-)

diff --git a/package/MDAnalysis/converters/RDKit.py b/package/MDAnalysis/converters/RDKit.py
index fac25a82d88..ffe9758530c 100644
--- a/package/MDAnalysis/converters/RDKit.py
+++ b/package/MDAnalysis/converters/RDKit.py
@@ -268,7 +268,7 @@ def convert(self, obj, cache=True, NoImplicit=True, max_iter=200,
 
         Parameters
         -----------
-        obj : :class:`~MDAnalysis.core.groups.AtomGroup` or 
+        obj : :class:`~MDAnalysis.core.groups.AtomGroup` or
             :class:`~MDAnalysis.core.universe.Universe`
         cache : bool
             Use a cached copy of the molecule's topology when available. To be
@@ -529,7 +529,7 @@ def _infer_bo_and_charges(mol):
     MONATOMIC_CATION_CHARGES = {
         3: 1, 11: 1, 19: 1, 37: 1, 47: 1, 55: 1,
         12: 2, 20: 2, 29: 2, 30: 2, 38: 2, 56: 2,
-        26: 2, # Fe could also be +3
+        26: 2,  # Fe could also be +3
         13: 3,
     }
 
@@ -800,7 +800,7 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
                 for neighbor in term_atom.GetNeighbors():
                     bond = mol.GetBondBetweenAtoms(anion2, neighbor.GetIdx())
                     if (neighbor.GetAtomicNum() == 8 and
-                        bond.GetBondTypeAsDouble() == 2):
+                            bond.GetBondTypeAsDouble() == 2):
                         bond.SetBondType(Chem.BondType.SINGLE)
                         neighbor.SetFormalCharge(-1)
                         break
@@ -811,7 +811,7 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
                     bond = mol.GetBondBetweenAtoms(anion2, neighbor.GetIdx())
                     if (neighbor.GetAtomicNum() == 8 and
                         neighbor.GetFormalCharge() == -1 and
-                        bond.GetBondTypeAsDouble() == 1):
+                            bond.GetBondTypeAsDouble() == 1):
                         bond.SetBondType(Chem.BondType.DOUBLE)
                         neighbor.SetFormalCharge(0)
                         break
diff --git a/testsuite/MDAnalysisTests/converters/test_rdkit.py b/testsuite/MDAnalysisTests/converters/test_rdkit.py
index 7b583bbfeef..b8bbe9c2198 100644
--- a/testsuite/MDAnalysisTests/converters/test_rdkit.py
+++ b/testsuite/MDAnalysisTests/converters/test_rdkit.py
@@ -535,37 +535,37 @@ def test_reassign_props_after_reaction(self, rdmol, product, name):
                 atom.GetIntProp("_MDAnalysis_index")
 
     @pytest.mark.parametrize("input_type, input_str", [
-        ("smi","c1ccc(cc1)-c1ccccc1-c1ccccc1"),
-        ("smi","c1cc[nH]c1"),
-        ("smi","c1ccc(cc1)-c1ccc(-c2ccccc2)c(-c2ccccc2)c1-c1ccccc1"),
-        ("smi","c1ccc2c(c1)c1ccccc1c1ccccc1c1ccccc1c1ccccc21"),
-        ("smi","c1csc(c1)-c1ccoc1-c1cc[nH]c1"),
-        ("smi","C1=C2C(=NC=N1)N=CN2"),
-        ("smi","CN1C=NC(=C1SC2=NC=NC3=C2NC=N3)[N+](=O)[O-]"),
-        ("smi","c1c[nH]c(c1)-c1ccc(s1)-c1ccoc1-c1c[nH]cc1-c1ccccc1"),
-        ("smi","C=CC=CC=CC=CC=CC=C"),
-        ("smi","NCCCCC([NH3+])C(=O)[O-]"),
-        ("smi","CC(C=CC1=C(C)CCCC1(C)C)=CC=CC(C)=CC=[NH+]C"),
-        ("smi","C#CC=C"),
+        ("smi", "c1ccc(cc1)-c1ccccc1-c1ccccc1"),
+        ("smi", "c1cc[nH]c1"),
+        ("smi", "c1ccc(cc1)-c1ccc(-c2ccccc2)c(-c2ccccc2)c1-c1ccccc1"),
+        ("smi", "c1ccc2c(c1)c1ccccc1c1ccccc1c1ccccc1c1ccccc21"),
+        ("smi", "c1csc(c1)-c1ccoc1-c1cc[nH]c1"),
+        ("smi", "C1=C2C(=NC=N1)N=CN2"),
+        ("smi", "CN1C=NC(=C1SC2=NC=NC3=C2NC=N3)[N+](=O)[O-]"),
+        ("smi", "c1c[nH]c(c1)-c1ccc(s1)-c1ccoc1-c1c[nH]cc1-c1ccccc1"),
+        ("smi", "C=CC=CC=CC=CC=CC=C"),
+        ("smi", "NCCCCC([NH3+])C(=O)[O-]"),
+        ("smi", "CC(C=CC1=C(C)CCCC1(C)C)=CC=CC(C)=CC=[NH+]C"),
+        ("smi", "C#CC=C"),
         # HID HIE HIP residues, see PR #2941
-        ("smi","O=C([C@H](CC1=CNC=N1)N)O"),
-        ("smi","O=C([C@H](CC1=CN=CN1)N)O"),
-        ("smi","O=C([C@H](CC1=C[NH1+]=CN1)[NH3+])[O-]"),
+        ("smi", "O=C([C@H](CC1=CNC=N1)N)O"),
+        ("smi", "O=C([C@H](CC1=CN=CN1)N)O"),
+        ("smi", "O=C([C@H](CC1=C[NH1+]=CN1)[NH3+])[O-]"),
         # fixes from PR #???
-        ("smi","CCOC(=O)c1cc2cc(C(=O)O)ccc2[nH]1"),
-        ("smi","[O-][n+]1cccnc1"),
-        ("smi","C[n+]1ccccc1"),
-        ("smi","[PH4+]"),
-        ("smi","c1nc[nH]n1"),
-        ("smi","CC(=O)C=C(C)N"),
-        ("smi","CC(C)=CC=C[O-]"),
-        ("smi","O=S(C)(C)=NC"),
-        ("smi","Cc1ccc2c3ncn(Cc4ccco4)c(O)c-3nc2c1"),
-        ("smi","CCCC/C=C/C#CC#CCCCCCCCC(=O)O"),
-        ("smi","c1c2c(=O)n3cccc(C)c3nc2n(C)c(=N)c1C(=O)NCc1cnccc1"),
-        ("smi","N#Cc1c[nH]c(C(=O)NC(=O)c2cc[n+]([O-])cc2)n1"),
-        ("smi","C[C@@H](Oc1cc(F)ccc1Nc1ncnc2cc(N=S3(=O)CCC3)cc(F)c12)C(=O)NCC#N"),
-        ("smi","[O-][n+]1onc2ccccc21"),
+        ("smi", "CCOC(=O)c1cc2cc(C(=O)O)ccc2[nH]1"),
+        ("smi", "[O-][n+]1cccnc1"),
+        ("smi", "C[n+]1ccccc1"),
+        ("smi", "[PH4+]"),
+        ("smi", "c1nc[nH]n1"),
+        ("smi", "CC(=O)C=C(C)N"),
+        ("smi", "CC(C)=CC=C[O-]"),
+        ("smi", "O=S(C)(C)=NC"),
+        ("smi", "Cc1ccc2c3ncn(Cc4ccco4)c(O)c-3nc2c1"),
+        ("smi", "CCCC/C=C/C#CC#CCCCCCCCC(=O)O"),
+        ("smi", "c1c2c(=O)n3cccc(C)c3nc2n(C)c(=N)c1C(=O)NCc1cnccc1"),
+        ("smi", "N#Cc1c[nH]c(C(=O)NC(=O)c2cc[n+]([O-])cc2)n1"),
+        ("smi", "C[C@@H](Oc1cc(F)ccc1Nc1ncnc2cc(N=S3(=O)CCC3)cc(F)c12)C(=O)NCC#N"),
+        ("smi", "[O-][n+]1onc2ccccc21"),
         ("smi", "Cc1cc[n+](CC[n+]2ccc(C)cc2)cc1"),
         ("smi", "[O-]c1ccccc1"),
         # test amino acids
@@ -622,13 +622,14 @@ def test_order_independant(self, input_type, input_str):
             if not match:
                 # try resonance structures for charged conjugated systems
                 for mol in Chem.ResonanceMolSupplier(m, maxStructs=20):
-                    match = mol.HasSubstructMatch(ref) and ref.HasSubstructMatch(mol)
+                    match = (mol.HasSubstructMatch(ref) and
+                             ref.HasSubstructMatch(mol))
                     if match:
                         break
             assert match, (f"(input) {Chem.MolToSmiles(ref)} != "
                            f"{Chem.MolToSmiles(m)} (output) "
                            f"root atom {a.GetIdx()}")
-    
+
     def test_warn_conjugated_max_iter(self):
         smi = "[C-]C=CC=CC=CC=CC=CC=C[C-]"
         mol = Chem.MolFromSmiles(smi)

From 20a64ae222828448a6fd978d6425687f652d572d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <bouysset.cedric@gmail.com>
Date: Tue, 11 May 2021 22:49:21 +0200
Subject: [PATCH 20/47] comments

---
 package/MDAnalysis/converters/RDKit.py | 28 +++++++++++++++++---------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/package/MDAnalysis/converters/RDKit.py b/package/MDAnalysis/converters/RDKit.py
index ffe9758530c..3436c4246e6 100644
--- a/package/MDAnalysis/converters/RDKit.py
+++ b/package/MDAnalysis/converters/RDKit.py
@@ -662,15 +662,15 @@ def _standardize_patterns(mol, max_iter=200):
 
     # list of sanitized reactions
     reactions = []
-    for name, rxn in [
-        ("Cterm", "[C-;X2;H0:1]=[O:2]>>[C+0:1]=[O:2]"),
-        ("Nterm", "[N-;X2;H1;$(N-[*^3]):1]>>[N+0:1]"),
-        ("keto-enolate", "[#6-:1]-[#6:2]=[O:3]>>[#6+0:1]=[#6:2]-[O-:3]"),
-        ("ARG", "[C-;v3:1]-[#7+0;v3;H2:2]>>[#6+0:1]=[#7+:2]"),
-        ("HIP", "[#6+0;H0:1]=[#6+0:2]-[#7;X3:3]-[#6-;X3:4]"
-                ">>[#6:1]=[#6:2]-[#7+:3]=[#6+0:4]"),
-        ("sulfone", "[S;D4;!v6:1]-[*-:2]>>[S;v6:1]=[*+0:2]"),
-        ("charged-N", "[#7+0;X3:1]-[*-:2]>>[#7+:1]=[*+0:2]"),
+    for rxn in [
+        "[C-;X2;H0:1]=[O:2]>>[C+0:1]=[O:2]",  # Cterm
+        "[N-;X2;H1;$(N-[*^3]):1]>>[N+0:1]",  # Nterm
+        "[#6-:1]-[#6:2]=[O:3]>>[#6+0:1]=[#6:2]-[O-:3]",  # keto-enolate
+        "[C-;v3:1]-[#7+0;v3;H2:2]>>[#6+0:1]=[#7+:2]",  # ARG
+        "[#6+0;H0:1]=[#6+0:2]-[#7;X3:3]-[#6-;X3:4]"
+        ">>[#6:1]=[#6:2]-[#7+:3]=[#6+0:4]",  # HIP
+        "[S;D4;!v6:1]-[*-:2]>>[S;v6:1]=[*+0:2]",  # sulfone
+        "[#7+0;X3:1]-[*-:2]>>[#7+:1]=[*+0:2]",  # charged-N
     ]:
         reaction = AllChem.ReactionFromSmarts(rxn)
         reactions.append(reaction)
@@ -789,7 +789,9 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
             # index of each atom
             anion1, a1, a2, anion2 = end_match
             term_atom = mol.GetAtomWithIdx(anion2)
+            # edge-case 1: C-[O-] or [N+]-[O-]
             # [*-]-*=*-[C,N+]=O --> *=*-*=[C,N+]-[O-]
+            # transform the =O to -[O-]
             if (
                 term_atom.GetAtomicNum() == 6
                 and term_atom.GetFormalCharge() == 0
@@ -804,7 +806,9 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
                         bond.SetBondType(Chem.BondType.SINGLE)
                         neighbor.SetFormalCharge(-1)
                         break
+            # edge-case 2: S=O
             # [*-]-*=*-[Sv4]-[O-] --> *=*-*=[Sv6]=O
+            # transform -[O-] to =O
             elif (term_atom.GetAtomicNum() == 16 and
                   term_atom.GetFormalCharge() == 0):
                 for neighbor in term_atom.GetNeighbors():
@@ -815,9 +819,13 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
                         bond.SetBondType(Chem.BondType.DOUBLE)
                         neighbor.SetFormalCharge(0)
                         break
+            # not an edge case:
+            # increment the charge
             else:
                 term_atom.SetFormalCharge(term_atom.GetFormalCharge() + 1)
-            # common part of the conjugated systems: [*-]-*=*
+            # common to all cases: 
+            # [*-]-*=*-[R] --> *=*-*=[R]
+            # increment the charge and switch single and double bonds
             a = mol.GetAtomWithIdx(anion1)
             a.SetFormalCharge(a.GetFormalCharge() + 1)
             b = mol.GetBondBetweenAtoms(anion1, a1)

From 5c03ee8a3c89ab7e942fd296ef0c24d548d805ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <bouysset.cedric@gmail.com>
Date: Tue, 11 May 2021 22:54:46 +0200
Subject: [PATCH 21/47] fix rdkit pdb atom names formatting

---
 package/MDAnalysis/converters/RDKit.py             | 10 +++++-----
 testsuite/MDAnalysisTests/converters/test_rdkit.py | 11 ++++++++---
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/package/MDAnalysis/converters/RDKit.py b/package/MDAnalysis/converters/RDKit.py
index 3436c4246e6..39ba51a9e2d 100644
--- a/package/MDAnalysis/converters/RDKit.py
+++ b/package/MDAnalysis/converters/RDKit.py
@@ -474,14 +474,14 @@ def _add_mda_attr_to_rdkit(attr, value, mi):
         # PDB file (1 letter elements start at col 14)
         name = re.findall(r'(\D+|\d+)', value)
         if len(name) == 2:
-            symbol, number = name
-            if len(number) > 2 and len(symbol) == 1:
-                value = "{}{}".format(symbol, number)
+            if len(value) == 2:
+                value = f" {value} "
             else:
-                value = "{:>2.2}{:<2.2}".format(symbol, number)
+                symbol, number = name
+                value = f"{symbol + number:>4.4}"
         else:
             # no number in the name
-            value = " {:<}".format(name[0])
+            value = f" {name[0]:<3.3}"
 
     # set attribute value in RDKit MonomerInfo
     rdattr = RDATTRIBUTES[attr]
diff --git a/testsuite/MDAnalysisTests/converters/test_rdkit.py b/testsuite/MDAnalysisTests/converters/test_rdkit.py
index b8bbe9c2198..beb27918cb5 100644
--- a/testsuite/MDAnalysisTests/converters/test_rdkit.py
+++ b/testsuite/MDAnalysisTests/converters/test_rdkit.py
@@ -263,9 +263,14 @@ def test_warning_no_hydrogen_force(self, uo2):
             uo2.atoms.convert_to.rdkit(NoImplicit=False, force=True)
 
     @pytest.mark.parametrize("attr, value, expected", [
-        ("names", "C1", " C1 "),
-        ("names", "C12", " C12"),
-        ("names", "Cl1", "Cl1 "),
+        ("names", "N",    " N  "),
+        ("names", "CA",   " CA "),
+        ("names", "CAT",  " CAT"),
+        ("names", "N1",   " N1 "),
+        ("names", "CE2",  " CE2"),
+        ("names", "C12",  " C12"),
+        ("names", "HD12", "HD12"),
+        ("names", "C123", "C123"),
         ("altLocs", "A", "A"),
         ("chainIDs", "B", "B"),
         ("icodes", "C", "C"),

From 44c523e5f1926386ce05cf957359fdab66145153 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <bouysset.cedric@gmail.com>
Date: Tue, 11 May 2021 23:46:04 +0200
Subject: [PATCH 22/47] skip sanitization if fails

---
 package/MDAnalysis/converters/RDKit.py             |  7 +++++--
 testsuite/MDAnalysisTests/converters/test_rdkit.py | 13 +++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/package/MDAnalysis/converters/RDKit.py b/package/MDAnalysis/converters/RDKit.py
index 39ba51a9e2d..122ff1db446 100644
--- a/package/MDAnalysis/converters/RDKit.py
+++ b/package/MDAnalysis/converters/RDKit.py
@@ -433,8 +433,11 @@ def atomgroup_to_mol(ag, NoImplicit=True, max_iter=200, force=False):
         _infer_bo_and_charges(mol)
         mol = _standardize_patterns(mol, max_iter)
 
-    # sanitize
-    Chem.SanitizeMol(mol)
+    # sanitize if possible
+    err = Chem.SanitizeMol(mol, catchErrors=True)
+    if err:
+        warnings.warn("Could not sanitize molecule: "
+                      f"failed during step {err!r}")
 
     return mol
 
diff --git a/testsuite/MDAnalysisTests/converters/test_rdkit.py b/testsuite/MDAnalysisTests/converters/test_rdkit.py
index beb27918cb5..f93600b3b20 100644
--- a/testsuite/MDAnalysisTests/converters/test_rdkit.py
+++ b/testsuite/MDAnalysisTests/converters/test_rdkit.py
@@ -410,6 +410,19 @@ def test_cache(self):
         new_cache = cached_func.cache_info()
         assert cache == new_cache
 
+    def test_sanitize_fail_warning(self):
+        mol = Chem.MolFromSmiles("[H]-N(-[H])(-[H])-[H]", sanitize=False)
+        mol.UpdatePropertyCache(strict=False)
+        mol = Chem.AddHs(mol)
+        u = mda.Universe(mol)
+        with pytest.warns(UserWarning, match="Could not sanitize molecule"):
+            u.atoms.convert_to.rdkit(NoImplicit=False)
+        with pytest.warns(None) as record:
+            u.atoms.convert_to.rdkit()
+        if record:
+            assert all("Could not sanitize molecule" not in str(w.message)
+                       for w in record.list)
+
 
 @requires_rdkit
 class TestRDKitFunctions(object):

From 0ce32710ef8f9b3b56e91f1d4df456dbfd429c8f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <bouysset.cedric@gmail.com>
Date: Tue, 11 May 2021 23:55:13 +0200
Subject: [PATCH 23/47] reorder rdkit atoms to match mdanalysis

---
 package/MDAnalysis/converters/RDKit.py        |  5 +++++
 .../MDAnalysisTests/converters/test_rdkit.py  | 19 +++++--------------
 2 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/package/MDAnalysis/converters/RDKit.py b/package/MDAnalysis/converters/RDKit.py
index 122ff1db446..d8931b7af3e 100644
--- a/package/MDAnalysis/converters/RDKit.py
+++ b/package/MDAnalysis/converters/RDKit.py
@@ -439,6 +439,11 @@ def atomgroup_to_mol(ag, NoImplicit=True, max_iter=200, force=False):
         warnings.warn("Could not sanitize molecule: "
                       f"failed during step {err!r}")
 
+    # reorder atoms to match MDAnalysis
+    order = np.argsort([atom.GetIntProp("_MDAnalysis_index")
+                        for atom in mol.GetAtoms()])
+    mol = Chem.RenumberAtoms(mol, order.astype(int).tolist())
+
     return mol
 
 
diff --git a/testsuite/MDAnalysisTests/converters/test_rdkit.py b/testsuite/MDAnalysisTests/converters/test_rdkit.py
index f93600b3b20..9e5ab09cb2d 100644
--- a/testsuite/MDAnalysisTests/converters/test_rdkit.py
+++ b/testsuite/MDAnalysisTests/converters/test_rdkit.py
@@ -204,13 +204,12 @@ def test_monomer_info(self, pdb, sel_str, atom_index):
         sel = pdb.select_atoms(sel_str)
         umol = sel.convert_to.rdkit(NoImplicit=False)
         atom = umol.GetAtomWithIdx(atom_index)
-        mda_index = atom.GetIntProp("_MDAnalysis_index")
         mi = atom.GetMonomerInfo()
 
         for mda_attr, rd_attr in RDATTRIBUTES.items():
             rd_value = getattr(mi, "Get%s" % rd_attr)()
             if hasattr(sel, mda_attr):
-                mda_value = getattr(sel, mda_attr)[mda_index]
+                mda_value = getattr(sel, mda_attr)[atom_index]
                 if mda_attr == "names":
                     rd_value = rd_value.strip()
                 assert rd_value == mda_value
@@ -303,10 +302,9 @@ def test_other_attributes(self, mol2, idx):
         mol = mol2.atoms.convert_to("RDKIT")
         rdatom = mol.GetAtomWithIdx(idx)
         rdprops = rdatom.GetPropsAsDict()
-        mda_idx = int(rdprops["_MDAnalysis_index"])
         for prop in ["charge", "segid", "type"]:
             rdprop = rdprops["_MDAnalysis_%s" % prop]
-            mdaprop = getattr(mol2.atoms[mda_idx], prop)
+            mdaprop = getattr(mol2.atoms[idx], prop)
             assert rdprop == mdaprop
 
     @pytest.mark.parametrize("sel_str", [
@@ -317,17 +315,13 @@ def test_index_property(self, pdb, sel_str):
         ag = pdb.select_atoms(sel_str)
         mol = ag.convert_to.rdkit(NoImplicit=False)
         expected = [i for i in range(len(ag))]
-        indices = sorted([a.GetIntProp("_MDAnalysis_index")
-                          for a in mol.GetAtoms()])
+        indices = [a.GetIntProp("_MDAnalysis_index") for a in mol.GetAtoms()]
         assert_equal(indices, expected)
 
     def test_assign_coordinates(self, pdb):
         mol = pdb.atoms.convert_to.rdkit(NoImplicit=False)
         positions = mol.GetConformer().GetPositions()
-        indices = sorted(mol.GetAtoms(),
-                         key=lambda a: a.GetIntProp("_MDAnalysis_index"))
-        indices = [a.GetIdx() for a in indices]
-        assert_almost_equal(positions[indices], pdb.atoms.positions)
+        assert_almost_equal(positions, pdb.atoms.positions)
 
     def test_assign_stereochemistry(self, mol2):
         umol = mol2.atoms.convert_to("RDKIT")
@@ -342,10 +336,7 @@ def test_trajectory_coords(self):
         for ts in u.trajectory:
             mol = u.atoms.convert_to("RDKIT")
             positions = mol.GetConformer().GetPositions()
-            indices = sorted(mol.GetAtoms(),
-                             key=lambda a: a.GetIntProp("_MDAnalysis_index"))
-            indices = [a.GetIdx() for a in indices]
-            assert_almost_equal(positions[indices], ts.positions)
+            assert_almost_equal(positions, ts.positions)
 
     def test_nan_coords(self):
         u = mda.Universe.from_smiles("CCO")

From 7477e7ff7aad75a1539ff9a744b8c94a629cf462 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <bouysset.cedric@gmail.com>
Date: Wed, 12 May 2021 02:24:57 +0200
Subject: [PATCH 24/47] change setting and transferring atom properties

---
 package/MDAnalysis/converters/RDKit.py        | 111 ++++++++++++------
 .../MDAnalysisTests/converters/test_rdkit.py  |  71 +++++------
 2 files changed, 106 insertions(+), 76 deletions(-)

diff --git a/package/MDAnalysis/converters/RDKit.py b/package/MDAnalysis/converters/RDKit.py
index d8931b7af3e..8d251a64374 100644
--- a/package/MDAnalysis/converters/RDKit.py
+++ b/package/MDAnalysis/converters/RDKit.py
@@ -72,7 +72,6 @@
 import numpy as np
 
 from ..exceptions import NoDataError
-from ..topology.guessers import guess_atom_element
 from ..core.topologyattrs import _TOPOLOGY_ATTRS
 from ..coordinates import memory
 from ..coordinates import base
@@ -316,9 +315,9 @@ def convert(self, obj, cache=True, NoImplicit=True, max_iter=200,
                 # assign coordinates
                 conf = Chem.Conformer(mol.GetNumAtoms())
                 for atom in mol.GetAtoms():
-                    idx = atom.GetIntProp("_MDAnalysis_index")
+                    idx = atom.GetIdx()
                     xyz = ag.positions[idx].astype(float)
-                    conf.SetAtomPosition(atom.GetIdx(), xyz)
+                    conf.SetAtomPosition(idx, xyz)
                 mol.AddConformer(conf)
                 # assign R/S to atoms and Z/E to bonds
                 Chem.AssignStereochemistryFrom3D(mol)
@@ -432,6 +431,10 @@ def atomgroup_to_mol(ag, NoImplicit=True, max_iter=200, force=False):
         # infer bond orders and formal charges from the connectivity
         _infer_bo_and_charges(mol)
         mol = _standardize_patterns(mol, max_iter)
+        # reorder atoms to match MDAnalysis
+        order = np.argsort([atom.GetIntProp("_MDAnalysis_index")
+                            for atom in mol.GetAtoms()])
+        mol = Chem.RenumberAtoms(mol, order.astype(int).tolist())
 
     # sanitize if possible
     err = Chem.SanitizeMol(mol, catchErrors=True)
@@ -439,11 +442,6 @@ def atomgroup_to_mol(ag, NoImplicit=True, max_iter=200, force=False):
         warnings.warn("Could not sanitize molecule: "
                       f"failed during step {err!r}")
 
-    # reorder atoms to match MDAnalysis
-    order = np.argsort([atom.GetIntProp("_MDAnalysis_index")
-                        for atom in mol.GetAtoms()])
-    mol = Chem.RenumberAtoms(mol, order.astype(int).tolist())
-
     return mol
 
 
@@ -496,14 +494,40 @@ def _add_mda_attr_to_rdkit(attr, value, mi):
     getattr(mi, "Set%s" % rdattr)(value)
 
 
+def _set_str_prop(atom, attr, value):
+    atom.SetProp(attr, value)
+
+def _set_float_prop(atom, attr, value):
+    atom.SetDoubleProp(attr, value)
+
+def _set_np_float_prop(atom, attr, value):
+    atom.SetDoubleProp(attr, float(value))
+
+def _set_int_prop(atom, attr, value):
+    atom.SetIntProp(attr, value)
+
+def _set_np_int_prop(atom, attr, value):
+    atom.SetIntProp(attr, int(value))
+
+_atom_property_dispatcher = {
+    str: _set_str_prop,
+    float: _set_float_prop,
+    np.float32: _set_np_float_prop,
+    np.float64: _set_np_float_prop,
+    int: _set_int_prop,
+    np.int8: _set_np_int_prop,
+    np.int16: _set_np_int_prop,
+    np.int32: _set_np_int_prop,
+    np.int64: _set_np_int_prop,
+    np.uint8: _set_np_int_prop,
+    np.uint16: _set_np_int_prop,
+    np.uint32: _set_np_int_prop,
+    np.uint64: _set_np_int_prop,
+}
+
 def _set_atom_property(atom, attr, value):
     """Saves any attribute and value into an RDKit atom property"""
-    if isinstance(value, (float, np.floating)):
-        atom.SetDoubleProp(attr, float(value))
-    elif isinstance(value, (int, np.integer)):
-        atom.SetIntProp(attr, int(value))
-    else:
-        atom.SetProp(attr, value)
+    _atom_property_dispatcher[type(value)](atom, attr, value)
 
 
 def _infer_bo_and_charges(mol):
@@ -690,11 +714,19 @@ def _standardize_patterns(mol, max_iter=200):
         fragments.append(reactant)
 
     # recombine fragments
-    mol = fragments.pop(0)
+    newmol = fragments.pop(0)
     for fragment in fragments:
-        mol = Chem.CombineMols(mol, fragment)
+        newmol = Chem.CombineMols(newmol, fragment)
 
-    return mol
+    # reassign all properties
+    _transfer_properties(mol, newmol)
+
+    # fix bonds with "crossed" stereo
+    for bond in newmol.GetBonds():
+        if bond.GetStereo() == Chem.BondStereo.STEREOANY:
+            bond.SetStereo(Chem.BondStereo.STEREONONE)
+
+    return newmol
 
 
 def _run_reaction(reaction, reactant):
@@ -724,7 +756,7 @@ def _run_reaction(reaction, reactant):
         if products:
             product = products[0][0]
             # map back atom properties from the reactant to the product
-            _reassign_props_after_reaction(reactant, product)
+            _reassign_index_after_reaction(reactant, product)
             # apply the next reaction to the product
             reactant = product
         else:
@@ -903,23 +935,34 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
                   "reasonable number of iterations")
 
 
-def _reassign_props_after_reaction(reactant, product):
-    """Maps back atomic properties from the reactant to the product.
+def _reassign_index_after_reaction(reactant, product):
+    """Maps back MDAnalysis index from the reactant to the product.
     The product molecule is modified inplace.
     """
-    for atom in product.GetAtoms():
-        try:
-            atom.GetIntProp("old_mapno")
-        except KeyError:
-            pass
-        else:
-            atom.ClearProp("old_mapno")
+    prop = "_MDAnalysis_index"
+    if reactant.GetAtoms()[0].HasProp(prop):
+        for atom in product.GetAtoms():
             idx = atom.GetUnsignedProp("react_atom_idx")
             old_atom = reactant.GetAtomWithIdx(idx)
-            for prop, value in old_atom.GetPropsAsDict().items():
-                _set_atom_property(atom, prop, value)
-            # fix bonds with "crossed" stereo
-            for bond in atom.GetBonds():
-                if bond.GetStereo() == Chem.BondStereo.STEREOANY:
-                    bond.SetStereo(Chem.BondStereo.STEREONONE)
-        atom.ClearProp("react_atom_idx")
+            value = old_atom.GetIntProp(prop)
+            _set_atom_property(atom, prop, value)
+
+
+def _transfer_properties(mol, newmol):
+    """Transfer properties between two RDKit molecules. Requires the
+    `_MDAnalysis_index` property to be present. Modifies `newmol` inplace.
+    """
+    atoms = mol.GetAtoms()
+    if atoms[0].HasProp("_MDAnalysis_index"):
+        props = {}
+        for atom in atoms:
+            ix = atom.GetIntProp("_MDAnalysis_index")
+            p = atom.GetPropsAsDict()
+            # remove properties assigned during reactions
+            p.pop("old_mapno", None)
+            p.pop("react_atom_idx", None)
+            props[ix] = p
+        for atom in newmol.GetAtoms():
+            ix = atom.GetIntProp("_MDAnalysis_index")
+            for attr, value in props[ix].items():
+                _set_atom_property(atom, attr, value)
diff --git a/testsuite/MDAnalysisTests/converters/test_rdkit.py b/testsuite/MDAnalysisTests/converters/test_rdkit.py
index 9e5ab09cb2d..5a096d3ed09 100644
--- a/testsuite/MDAnalysisTests/converters/test_rdkit.py
+++ b/testsuite/MDAnalysisTests/converters/test_rdkit.py
@@ -43,7 +43,7 @@
         _standardize_patterns,
         _rebuild_conjugated_bonds,
         _set_atom_property,
-        _reassign_props_after_reaction,
+        _reassign_index_after_reaction,
     )
 except ImportError:
     pass
@@ -82,26 +82,10 @@ def dummy_product():
         mol.AddAtom(atom)
         return mol
 
-    def dummy_product_nomap():
-        mol = Chem.RWMol()
-        atom = Chem.Atom(1)
-        atom.SetUnsignedProp("react_atom_idx", 0)
-        mol.AddAtom(atom)
-        return mol
-
-    def dummy_reactant_noprops():
-        mol = Chem.RWMol()
-        atom = Chem.Atom(1)
-        mol.AddAtom(atom)
-        return mol
-
     def dummy_reactant():
         mol = Chem.RWMol()
         atom = Chem.Atom(1)
-        atom.SetProp("foo", "bar")
         atom.SetIntProp("_MDAnalysis_index", 1)
-        atom.SetDoubleProp("_MDAnalysis_charge", 4.2)
-        atom.SetProp("_MDAnalysis_type", "C.3")
         mol.AddAtom(atom)
         return mol
 
@@ -515,33 +499,36 @@ def test_set_atom_property(self, attr, value, getter):
         _set_atom_property(atom, prop, value)
         assert getattr(atom, getter)(prop) == value
 
-    @pytest.mark.parametrize("rdmol, product, name", [
-        ("dummy_reactant", "dummy_product", "props"),
-        ("dummy_reactant_noprops", "dummy_product", "noprops"),
-        ("dummy_reactant", "dummy_product_nomap", "nomap"),
+    @pytest.mark.parametrize("rdmol, product", [
+        ("dummy_reactant", "dummy_product"),
     ], indirect=["rdmol", "product"])
-    def test_reassign_props_after_reaction(self, rdmol, product, name):
-        _reassign_props_after_reaction(rdmol, product)
+    def test_reassign_index_after_reaction(self, rdmol, product):
+        _reassign_index_after_reaction(rdmol, product)
         atom = product.GetAtomWithIdx(0)
-        if name == "props":
-            assert atom.GetProp("foo") == "bar"
-            assert atom.GetIntProp("_MDAnalysis_index") == 1
-            assert atom.GetDoubleProp("_MDAnalysis_charge") == 4.2
-            assert atom.GetProp("_MDAnalysis_type") == "C.3"
-            with pytest.raises(KeyError, match="old_mapno"):
-                atom.GetIntProp("old_mapno")
-            with pytest.raises(KeyError, match="react_atom_idx"):
-                atom.GetUnsignedProp("react_atom_idx")
-        elif name == "noprops":
-            with pytest.raises(KeyError, match="old_mapno"):
-                atom.GetIntProp("old_mapno")
-            with pytest.raises(KeyError, match="react_atom_idx"):
-                atom.GetUnsignedProp("react_atom_idx")
-        elif name == "nomap":
-            with pytest.raises(KeyError, match="react_atom_idx"):
-                atom.GetUnsignedProp("react_atom_idx")
-            with pytest.raises(KeyError, match="_MDAnalysis_index"):
-                atom.GetIntProp("_MDAnalysis_index")
+        assert atom.GetIntProp("_MDAnalysis_index") == 1
+
+    @pytest.mark.parametrize("smi", [
+        "c1ccc(cc1)-c1ccccc1-c1ccccc1",
+        "c1cc[nH]c1",
+        "O=C([C@H](CC1=C[NH1+]=CN1)[NH3+])[O-]",
+    ])
+    def test_transfer_properties(self, smi):
+        mol = Chem.MolFromSmiles(smi)
+        mol = self.mol_to_template(mol)
+        old = {}
+        for atom in mol.GetAtoms():
+            ix = atom.GetIdx()
+            atom.SetIntProp("_MDAnalysis_index", ix)
+            atom.SetProp("dummy", f"foo_{ix}")
+            old[ix] = {"_MDAnalysis_index": ix, "dummy": f"foo_{ix}"}
+        _infer_bo_and_charges(mol)
+        newmol = _standardize_patterns(mol)
+        new = {}
+        for a in newmol.GetAtoms():
+            ix = a.GetIntProp("_MDAnalysis_index")
+            new[ix] = {"_MDAnalysis_index": ix,
+                       "dummy": a.GetProp("dummy")}
+        assert new == old
 
     @pytest.mark.parametrize("input_type, input_str", [
         ("smi", "c1ccc(cc1)-c1ccccc1-c1ccccc1"),

From df4feb4781cf999d74f687e96d14b41b8ae64f2e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <bouysset.cedric@gmail.com>
Date: Wed, 12 May 2021 12:27:51 +0200
Subject: [PATCH 25/47] fix tests + pep8

---
 package/MDAnalysis/converters/RDKit.py             | 13 +++++++++++--
 testsuite/MDAnalysisTests/converters/test_rdkit.py |  2 +-
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/package/MDAnalysis/converters/RDKit.py b/package/MDAnalysis/converters/RDKit.py
index 8d251a64374..70ff046fbcb 100644
--- a/package/MDAnalysis/converters/RDKit.py
+++ b/package/MDAnalysis/converters/RDKit.py
@@ -497,18 +497,27 @@ def _add_mda_attr_to_rdkit(attr, value, mi):
 def _set_str_prop(atom, attr, value):
     atom.SetProp(attr, value)
 
+
 def _set_float_prop(atom, attr, value):
     atom.SetDoubleProp(attr, value)
 
+
 def _set_np_float_prop(atom, attr, value):
     atom.SetDoubleProp(attr, float(value))
 
+
 def _set_int_prop(atom, attr, value):
     atom.SetIntProp(attr, value)
 
+
 def _set_np_int_prop(atom, attr, value):
     atom.SetIntProp(attr, int(value))
 
+
+def _ignore_prop(atom, attr, value):
+    pass
+
+
 _atom_property_dispatcher = {
     str: _set_str_prop,
     float: _set_float_prop,
@@ -527,7 +536,7 @@ def _set_np_int_prop(atom, attr, value):
 
 def _set_atom_property(atom, attr, value):
     """Saves any attribute and value into an RDKit atom property"""
-    _atom_property_dispatcher[type(value)](atom, attr, value)
+    _atom_property_dispatcher.get(type(value), _ignore_prop)(atom, attr, value)
 
 
 def _infer_bo_and_charges(mol):
@@ -863,7 +872,7 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
             # increment the charge
             else:
                 term_atom.SetFormalCharge(term_atom.GetFormalCharge() + 1)
-            # common to all cases: 
+            # common to all cases:
             # [*-]-*=*-[R] --> *=*-*=[R]
             # increment the charge and switch single and double bonds
             a = mol.GetAtomWithIdx(anion1)
diff --git a/testsuite/MDAnalysisTests/converters/test_rdkit.py b/testsuite/MDAnalysisTests/converters/test_rdkit.py
index 5a096d3ed09..c61fb48cec0 100644
--- a/testsuite/MDAnalysisTests/converters/test_rdkit.py
+++ b/testsuite/MDAnalysisTests/converters/test_rdkit.py
@@ -547,7 +547,7 @@ def test_transfer_properties(self, smi):
         ("smi", "O=C([C@H](CC1=CNC=N1)N)O"),
         ("smi", "O=C([C@H](CC1=CN=CN1)N)O"),
         ("smi", "O=C([C@H](CC1=C[NH1+]=CN1)[NH3+])[O-]"),
-        # fixes from PR #???
+        # fixes from PR #3044
         ("smi", "CCOC(=O)c1cc2cc(C(=O)O)ccc2[nH]1"),
         ("smi", "[O-][n+]1cccnc1"),
         ("smi", "C[n+]1ccccc1"),

From 359fdf4526904bbb915c76e808ed90050419cb01 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <bouysset.cedric@gmail.com>
Date: Wed, 12 May 2021 13:26:30 +0200
Subject: [PATCH 26/47] fix tests

---
 package/MDAnalysis/converters/RDKit.py        |  9 ++++-----
 .../MDAnalysisTests/converters/test_rdkit.py  | 20 ++++++++++++++++---
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/package/MDAnalysis/converters/RDKit.py b/package/MDAnalysis/converters/RDKit.py
index 70ff046fbcb..339e413d3bf 100644
--- a/package/MDAnalysis/converters/RDKit.py
+++ b/package/MDAnalysis/converters/RDKit.py
@@ -966,12 +966,11 @@ def _transfer_properties(mol, newmol):
         props = {}
         for atom in atoms:
             ix = atom.GetIntProp("_MDAnalysis_index")
-            p = atom.GetPropsAsDict()
-            # remove properties assigned during reactions
-            p.pop("old_mapno", None)
-            p.pop("react_atom_idx", None)
-            props[ix] = p
+            props[ix] = atom.GetPropsAsDict()
         for atom in newmol.GetAtoms():
             ix = atom.GetIntProp("_MDAnalysis_index")
             for attr, value in props[ix].items():
                 _set_atom_property(atom, attr, value)
+            # remove properties assigned during reactions
+            atom.ClearProp("old_mapno")
+            atom.ClearProp("react_atom_idx")
diff --git a/testsuite/MDAnalysisTests/converters/test_rdkit.py b/testsuite/MDAnalysisTests/converters/test_rdkit.py
index c61fb48cec0..0281de4cf58 100644
--- a/testsuite/MDAnalysisTests/converters/test_rdkit.py
+++ b/testsuite/MDAnalysisTests/converters/test_rdkit.py
@@ -486,8 +486,14 @@ def test_standardize_patterns(self, smi, out):
 
     @pytest.mark.parametrize("attr, value, getter", [
         ("index", 42, "GetIntProp"),
+        ("index", np.int8(42), "GetIntProp"),
+        ("index", np.int16(42), "GetIntProp"),
         ("index", np.int32(42), "GetIntProp"),
         ("index", np.int64(42), "GetIntProp"),
+        ("index", np.uint8(42), "GetIntProp"),
+        ("index", np.uint16(42), "GetIntProp"),
+        ("index", np.uint32(42), "GetIntProp"),
+        ("index", np.uint64(42), "GetIntProp"),
         ("charge", 4.2, "GetDoubleProp"),
         ("charge", np.float32(4.2), "GetDoubleProp"),
         ("charge", np.float64(4.2), "GetDoubleProp"),
@@ -499,6 +505,11 @@ def test_set_atom_property(self, attr, value, getter):
         _set_atom_property(atom, prop, value)
         assert getattr(atom, getter)(prop) == value
 
+    def test_ignore_prop(self):
+        atom = Chem.Atom(1)
+        _set_atom_property(atom, "foo", {"bar": "baz"})
+        assert "foo" not in atom.GetPropsAsDict().items()
+
     @pytest.mark.parametrize("rdmol, product", [
         ("dummy_reactant", "dummy_product"),
     ], indirect=["rdmol", "product"])
@@ -528,6 +539,9 @@ def test_transfer_properties(self, smi):
             ix = a.GetIntProp("_MDAnalysis_index")
             new[ix] = {"_MDAnalysis_index": ix,
                        "dummy": a.GetProp("dummy")}
+            props = a.GetPropsAsDict().keys()
+            assert "old_mapno" not in props
+            assert "react_atom_idx" not in props
         assert new == old
 
     @pytest.mark.parametrize("input_type, input_str", [
@@ -634,9 +648,9 @@ def test_warn_conjugated_max_iter(self):
             _rebuild_conjugated_bonds(mol, 2)
 
     @pytest.mark.parametrize("smi", [
-        "[Li+]", "[Na+]", "[K+]", "[Ag+]",
-        "[Mg+2]", "[Ca+2]", "[Cu+2]", "[Zn+2]", "[Fe+2]",
-        "[Al+3]",
+        "[Li+]", "[Na+]", "[K+]", "[Rb+]", "[Ag+]", "[Cs+]",
+        "[Mg+2]", "[Ca+2]", "[Cu+2]", "[Zn+2]", "[Sr+2]", "[Ba+2]",
+        "[Al+3]", "[Fe+2]",
         "[Cl-]",
         "[O-2]",
         "[Na+].[Cl-]",

From e3585043a7e09e950c12d51f61474617bd16c475 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <bouysset.cedric@gmail.com>
Date: Thu, 13 May 2021 22:41:40 +0200
Subject: [PATCH 27/47] add more comments and UGM presentation

---
 package/MDAnalysis/converters/RDKit.py | 55 ++++++++++++++++++++------
 1 file changed, 43 insertions(+), 12 deletions(-)

diff --git a/package/MDAnalysis/converters/RDKit.py b/package/MDAnalysis/converters/RDKit.py
index 339e413d3bf..d3cfc051c68 100644
--- a/package/MDAnalysis/converters/RDKit.py
+++ b/package/MDAnalysis/converters/RDKit.py
@@ -252,6 +252,11 @@ class RDKitConverter(base.ConverterBase):
     cache since the arguments given are different. You can pass a
     ``cache=False`` argument to the converter to bypass the caching system.
 
+    To get a better understanding of how the converter works under the hood,
+    please refer to the following RDKit UGM presentation:
+    - `Video (4:55 to 8:05) <https://youtu.be/5b5wYmK4URU>`__
+    - Slides <https://github.com/rdkit/UGM_2020/blob/master/Presentations/C%C3%A9dricBouysset_From_RDKit_to_the_Universe.pdf>`__
+
 
     .. versionadded:: 2.0.0
 
@@ -478,15 +483,18 @@ def _add_mda_attr_to_rdkit(attr, value, mi):
     if attr == "names":
         # RDKit needs the name to be properly formatted for a
         # PDB file (1 letter elements start at col 14)
-        name = re.findall(r'(\D+|\d+)', value)
+        name = re.findall(r'(\D+|\d+)', value)  # splits alphabet from numeric
+        # alpha-numeric name
         if len(name) == 2:
+            # only two characters
             if len(value) == 2:
                 value = f" {value} "
+            # more than two --> cut to 4 characters
             else:
                 symbol, number = name
                 value = f"{symbol + number:>4.4}"
+        # no number in the name
         else:
-            # no number in the name
             value = f" {name[0]:<3.3}"
 
     # set attribute value in RDKit MonomerInfo
@@ -563,10 +571,15 @@ def _infer_bo_and_charges(mol):
     R-C(-O)-O the first oxygen read will receive a double bond and the other
     one will be charged. It will also affect more complex conjugated systems.
     """
+    # sort atoms according to their NUE
     atoms = sorted([a for a in mol.GetAtoms() if a.GetAtomicNum() > 1],
                    reverse=True,
                    key=lambda a: _get_nb_unpaired_electrons(a)[0])
 
+    # charges that should be assigned to monatomic cations
+    # structure --> atomic number : formal charge
+    # anion charges are directly handled by the code using the typical valence
+    # of the atom
     MONATOMIC_CATION_CHARGES = {
         3: 1, 11: 1, 19: 1, 37: 1, 47: 1, 55: 1,
         12: 2, 20: 2, 29: 2, 30: 2, 38: 2, 56: 2,
@@ -757,19 +770,27 @@ def _run_reaction(reaction, reactant):
     product : rdkit.Chem.rdchem.Mol
         The final product of the reaction
     """
+    # repeat the reaction until all matching moeities have been transformed
+    # note: this loop is meant to be ended by a `break` statement
+    # but let's avoid using `while` loops just in case
     for n in range(reactant.GetNumAtoms()):
         reactant.UpdatePropertyCache(strict=False)
         Chem.Kekulize(reactant)
         products = reaction.RunReactants((reactant,))
-        # only keep the first product
         if products:
+            # structure: tuple[tuple[mol]]
+            # length of 1st tuple: number of matches in reactant
+            # length of 2nd tuple: number of products yielded by the reaction
+            # if there's no match in reactant, returns an empty tuple
+            # here we have at least one match, and the reaction used yield
+            # a single product hence `products[0][0]`
             product = products[0][0]
             # map back atom properties from the reactant to the product
             _reassign_index_after_reaction(reactant, product)
             # apply the next reaction to the product
             reactant = product
+        # exit the loop if there was nothing to transform
         else:
-            # exit the loop if there's no product
             break
     reactant.UpdatePropertyCache(strict=False)
     Chem.Kekulize(reactant)
@@ -805,26 +826,36 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
         The molecule to transform, modified inplace
     max_iter : int
         Maximum number of iterations performed by the function
+
+    Notes
+    -----
+    The molecule is modified inplace
     """
     mol.UpdatePropertyCache(strict=False)
     Chem.Kekulize(mol)
+    # pattern used to find problematic conjugated bonds
+    # there's usually an even number of matches for this
     pattern = Chem.MolFromSmarts("[*-{1-2}]-,=[*+0]=,#[*+0]")
+    # pattern used to finish fixing a series of conjugated bonds
     base_end_pattern = Chem.MolFromSmarts(
         "[*-{1-2}]-,=[*+0]=,#[*+0]-,=[*-{1-2}]")
+    # used when there's an odd number of matches for `pattern`
     odd_end_pattern = Chem.MolFromSmarts(
         "[*-]-[*+0]=[*+0]-[*-,$([#7;X3;v3]),$([#6+0,#7+1]=O),"
         "$([S;D4;v4]-[O-])]")
     # number of unique matches with the pattern
     n_matches = len(set([match[0]
                          for match in mol.GetSubstructMatches(pattern)]))
+    # nothing to standardize
     if n_matches == 0:
-        # nothing to standardize
         return
+    # single match (unusual)
     elif n_matches == 1:
         # as a last resort, the only way to standardize is to find a nitrogen
         # that can accept a double bond and a positive charge
         # or a carbonyl that will become an enolate
         end_pattern = odd_end_pattern
+    # at least 2 matches
     else:
         # give priority to base end pattern and then deal with the odd one if
         # necessary
@@ -886,23 +917,23 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
             mol.UpdatePropertyCache(strict=False)
             continue
 
-        # shorten the anion-anion pattern from n to n-1
+        # switch the position of the charge and the double bond
         matches = mol.GetSubstructMatches(pattern)
         if matches:
             # check if we haven't already transformed this triplet
             for match in matches:
-                # sort the indices for the comparison
+                # store order-independent atom indices
                 g = set(match)
+                # already transformed --> try the next one
                 if g in backtrack:
-                    # already transformed
                     continue
+                # add to backtracking and start the switch
                 else:
-                    # take the first one that hasn't been tried
                     anion, a1, a2 = match
                     backtrack.append(g)
                     break
+            # already performed all changes
             else:
-                # already performed all changes
                 if backtrack_cycles == 1:
                     # might be stuck because there were 2 "odd" end patterns
                     # misqualified as a single base one
@@ -917,12 +948,12 @@ def _rebuild_conjugated_bonds(mol, max_iter=200):
                 backtrack = [set(match)]
                 backtrack_cycles += 1
 
-            # charges
+            # switch charges
             a = mol.GetAtomWithIdx(anion)
             a.SetFormalCharge(a.GetFormalCharge() + 1)
             a = mol.GetAtomWithIdx(a2)
             a.SetFormalCharge(a.GetFormalCharge() - 1)
-            # bonds
+            # switch bond orders
             b = mol.GetBondBetweenAtoms(anion, a1)
             b.SetBondType(RDBONDORDER[b.GetBondTypeAsDouble() + 1])
             b = mol.GetBondBetweenAtoms(a1, a2)

From 8fbc8520df7b398195a0620a90f14d77788bfe65 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <bouysset.cedric@gmail.com>
Date: Thu, 13 May 2021 23:04:15 +0200
Subject: [PATCH 28/47] fix links

---
 package/MDAnalysis/converters/RDKit.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/package/MDAnalysis/converters/RDKit.py b/package/MDAnalysis/converters/RDKit.py
index d3cfc051c68..bc8e822e7b1 100644
--- a/package/MDAnalysis/converters/RDKit.py
+++ b/package/MDAnalysis/converters/RDKit.py
@@ -254,8 +254,9 @@ class RDKitConverter(base.ConverterBase):
 
     To get a better understanding of how the converter works under the hood,
     please refer to the following RDKit UGM presentation:
-    - `Video (4:55 to 8:05) <https://youtu.be/5b5wYmK4URU>`__
-    - Slides <https://github.com/rdkit/UGM_2020/blob/master/Presentations/C%C3%A9dricBouysset_From_RDKit_to_the_Universe.pdf>`__
+
+    * `Video (4:55 to 8:05) <https://youtu.be/5b5wYmK4URU>`__
+    * `Slides <https://github.com/rdkit/UGM_2020/blob/master/Presentations/C%C3%A9dricBouysset_From_RDKit_to_the_Universe.pdf>`__
 
 
     .. versionadded:: 2.0.0

From 2c3e2fab60210a205c9440e817eeb6bccb8890dd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <bouysset.cedric@gmail.com>
Date: Mon, 24 May 2021 18:52:37 +0200
Subject: [PATCH 29/47] add test for new atom reordering

---
 .../MDAnalysisTests/converters/test_rdkit.py   | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/testsuite/MDAnalysisTests/converters/test_rdkit.py b/testsuite/MDAnalysisTests/converters/test_rdkit.py
index 0281de4cf58..667e8e306cf 100644
--- a/testsuite/MDAnalysisTests/converters/test_rdkit.py
+++ b/testsuite/MDAnalysisTests/converters/test_rdkit.py
@@ -22,6 +22,7 @@
 #
 
 import copy
+from io import StringIO
 import pytest
 import MDAnalysis as mda
 from MDAnalysis.topology.guessers import guess_atom_element
@@ -662,3 +663,20 @@ def test_ions(self, smi):
         mol = _standardize_patterns(mol)
         Chem.SanitizeMol(mol)
         assert mol.HasSubstructMatch(ref) and ref.HasSubstructMatch(mol)
+
+    @pytest.mark.parametrize("smi", [
+        "O=C([C@H](CC1=C[NH1+]=CN1)[NH3+])[O-]",
+        "O=S(C)(C)=NC",
+    ])
+    def test_reorder_atoms(self, smi):
+        mol = Chem.MolFromSmiles(smi)
+        mol = Chem.AddHs(mol)
+        # remove bond order and charges info
+        pdb = Chem.MolToPDBBlock(mol)
+        u = mda.Universe(StringIO(pdb), format="PDB")
+        # atoms are reordered during infering, and will be reordered back to
+        # their original order with Chem.RenumberAtoms
+        mu = u.atoms.convert_to.rdkit()
+        values = [a.GetSymbol() for a in mu.GetAtoms()]
+        expected = [a.GetSymbol() for a in mol.GetAtoms()]
+        assert values == expected

From 9ba698d9666a7512c22537134b486917079345ee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <bouysset.cedric@gmail.com>
Date: Mon, 24 May 2021 18:54:54 +0200
Subject: [PATCH 30/47] remove unnecessary sorting when using rdkit-based
 guessers or selection

---
 package/MDAnalysis/core/selection.py    | 8 +++-----
 package/MDAnalysis/topology/guessers.py | 9 +++------
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py
index d72ec4f6c03..f28c9c0d1fd 100644
--- a/package/MDAnalysis/core/selection.py
+++ b/package/MDAnalysis/core/selection.py
@@ -660,12 +660,10 @@ def apply(self, group):
             raise ValueError(f"{self.pattern!r} is not a valid SMARTS query")
         mol = group.convert_to("RDKIT")
         matches = mol.GetSubstructMatches(pattern, useChirality=True)
-        # convert rdkit indices to mdanalysis'
-        indices = [
-            mol.GetAtomWithIdx(idx).GetIntProp("_MDAnalysis_index")
-            for match in matches for idx in match]
+        # flatten all matches and remove duplicated indices
+        indices = np.unique([idx for match in matches for idx in match])
         # create boolean mask for atoms based on index
-        mask = np.in1d(range(group.n_atoms), np.unique(indices))
+        mask = np.in1d(range(group.n_atoms), indices)
         return group[mask]
 
 
diff --git a/package/MDAnalysis/topology/guessers.py b/package/MDAnalysis/topology/guessers.py
index b85842bc439..948b91fa7bf 100644
--- a/package/MDAnalysis/topology/guessers.py
+++ b/package/MDAnalysis/topology/guessers.py
@@ -494,9 +494,7 @@ def guess_aromaticities(atomgroup):
     .. versionadded:: 2.0.0
     """
     mol = atomgroup.convert_to("RDKIT")
-    atoms = sorted(mol.GetAtoms(),
-                   key=lambda a: a.GetIntProp("_MDAnalysis_index"))
-    return np.array([atom.GetIsAromatic() for atom in atoms])
+    return np.array([atom.GetIsAromatic() for atom in mol.GetAtoms()])
 
 
 def guess_gasteiger_charges(atomgroup):
@@ -518,7 +516,6 @@ def guess_gasteiger_charges(atomgroup):
     mol = atomgroup.convert_to("RDKIT")
     from rdkit.Chem.rdPartialCharges import ComputeGasteigerCharges
     ComputeGasteigerCharges(mol, throwOnParamFailure=True)
-    atoms = sorted(mol.GetAtoms(),
-                   key=lambda a: a.GetIntProp("_MDAnalysis_index"))
-    return np.array([atom.GetDoubleProp("_GasteigerCharge") for atom in atoms],
+    return np.array([atom.GetDoubleProp("_GasteigerCharge")
+                     for atom in mol.GetAtoms()],
                     dtype=np.float32)

From 3f6ff64dc0c512e793783ad32b56bfb3526b2471 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <bouysset.cedric@gmail.com>
Date: Wed, 16 Feb 2022 20:21:05 +0100
Subject: [PATCH 31/47] add missing test

---
 testsuite/MDAnalysisTests/converters/test_rdkit.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/testsuite/MDAnalysisTests/converters/test_rdkit.py b/testsuite/MDAnalysisTests/converters/test_rdkit.py
index d260551e1d4..ca5c10eb760 100644
--- a/testsuite/MDAnalysisTests/converters/test_rdkit.py
+++ b/testsuite/MDAnalysisTests/converters/test_rdkit.py
@@ -571,6 +571,7 @@ def test_transfer_properties(self, smi):
         ("smi", "[O-][n+]1onc2ccccc21"),
         ("smi", "Cc1cc[n+](CC[n+]2ccc(C)cc2)cc1"),
         ("smi", "[O-]c1ccccc1"),
+        ("smi", "[O-]C=CC=CCC=CC=[N+](C)C"),
         # test amino acids
         ("aa", "A"),
         ("aa", "G"),

From 58725594aed4540079abb617bf5018c9fe37e66f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <bouysset.cedric@gmail.com>
Date: Thu, 17 Feb 2022 18:09:10 +0100
Subject: [PATCH 32/47] add bond stereo test

---
 testsuite/MDAnalysisTests/converters/test_rdkit.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/testsuite/MDAnalysisTests/converters/test_rdkit.py b/testsuite/MDAnalysisTests/converters/test_rdkit.py
index ca5c10eb760..372a5bc62e5 100644
--- a/testsuite/MDAnalysisTests/converters/test_rdkit.py
+++ b/testsuite/MDAnalysisTests/converters/test_rdkit.py
@@ -673,3 +673,13 @@ def test_reorder_atoms(self, smi):
         values = [a.GetSymbol() for a in mu.GetAtoms()]
         expected = [a.GetSymbol() for a in mol.GetAtoms()]
         assert values == expected
+
+    def test_convert_stereoany_to_stereonone(self):
+        mol = Chem.MolFromSmiles("FC(Cl)=C(Br)I")
+        bond = next(b for b in mol.GetBonds()
+                    if b.GetBondTypeAsDouble() == 2)
+        bond.SetStereo(Chem.BondStereo.STEREOANY)
+        newmol = _standardize_patterns(mol)
+        bond = next(b for b in newmol.GetBonds()
+                    if b.GetBondTypeAsDouble() == 2)
+        assert bond.GetStereo() == Chem.BondStereo.STEREONONE

From 428d341f48e84e9a0b3b22dcd6ae745021a3d8b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <bouysset.cedric@gmail.com>
Date: Mon, 7 Mar 2022 20:08:51 +0100
Subject: [PATCH 33/47] more docs and comments

---
 package/MDAnalysis/converters/RDKit.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/package/MDAnalysis/converters/RDKit.py b/package/MDAnalysis/converters/RDKit.py
index 3e6a8ff7f51..3cc6f684cfa 100644
--- a/package/MDAnalysis/converters/RDKit.py
+++ b/package/MDAnalysis/converters/RDKit.py
@@ -53,7 +53,7 @@
     infers the structures of approximately 90% of the `ChEMBL27`_ dataset.
     Work is currently ongoing on further improving this and updates to the
     converter are expected in future releases of MDAnalysis.
-    Please see `Pull Request #3044`_ for more details.
+    Please see `Pull Request #3044`_ and `Issue #3339`_ for more details.
 
 
 
@@ -77,6 +77,7 @@
 
 .. _`ChEMBL27`: https://ftp.ebi.ac.uk/pub/databases/chembl/ChEMBLdb/releases/chembl_27/
 .. _`Pull Request #3044`: https://github.com/MDAnalysis/mdanalysis/pull/3044
+.. _`Issue #3339`: https://github.com/MDAnalysis/mdanalysis/issues/3339
 
 """
 
@@ -261,6 +262,10 @@ class RDKitConverter(base.ConverterBase):
     cache since the arguments given are different. You can pass a
     ``cache=False`` argument to the converter to bypass the caching system.
 
+    The ``_MDAnalysis_index`` property of the resulting molecule corresponds
+    to the index of the specific :class:`~MDAnalysis.core.groups.AtomGroup`
+    that was converted, which may not always match the ``index`` property.
+
     To get a better understanding of how the converter works under the hood,
     please refer to the following RDKit UGM presentation:
 
@@ -270,6 +275,11 @@ class RDKitConverter(base.ConverterBase):
 
     .. versionadded:: 2.0.0
 
+    .. versionchanged:: 2.2.0
+        Improved the accuracy of the converter. Atoms in the resulting molecule
+        follow the same order as in the AtomGroup. Fixed a
+        ``SanitizationError`` when disabling the bond order inferring step.
+
     """
 
     lib = 'RDKIT'
@@ -440,7 +450,8 @@ def atomgroup_to_mol(ag, NoImplicit=True, max_iter=200, force=False):
         # infer bond orders and formal charges from the connectivity
         _infer_bo_and_charges(mol)
         mol = _standardize_patterns(mol, max_iter)
-        # reorder atoms to match MDAnalysis
+        # reorder atoms to match MDAnalysis, since the reactions from
+        # _standardize_patterns will mess up the original order
         order = np.argsort([atom.GetIntProp("_MDAnalysis_index")
                             for atom in mol.GetAtoms()])
         mol = Chem.RenumberAtoms(mol, order.astype(int).tolist())

From 50de418b8df968660e5ec0cf507f25efdc32edc3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <bouysset.cedric@gmail.com>
Date: Mon, 7 Mar 2022 20:10:39 +0100
Subject: [PATCH 34/47] use PDBWriter.deduce_pdb_atom_name instead of custom
 code

---
 package/MDAnalysis/converters/RDKit.py | 36 +++++++++++++-------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/package/MDAnalysis/converters/RDKit.py b/package/MDAnalysis/converters/RDKit.py
index 3cc6f684cfa..1b2b366cdbd 100644
--- a/package/MDAnalysis/converters/RDKit.py
+++ b/package/MDAnalysis/converters/RDKit.py
@@ -82,9 +82,9 @@
 """
 
 import warnings
-import re
 import copy
 from functools import lru_cache
+from io import StringIO
 
 import numpy as np
 
@@ -92,6 +92,7 @@
 from ..core.topologyattrs import _TOPOLOGY_ATTRS
 from ..coordinates import memory
 from ..coordinates import base
+from ..coordinates.PDB import PDBWriter
 
 try:
     from rdkit import Chem
@@ -122,6 +123,9 @@
     PERIODIC_TABLE = Chem.GetPeriodicTable()
 
 
+_deduce_PDB_atom_name = PDBWriter(StringIO())._deduce_PDB_atom_name
+
+
 class RDKitReader(memory.MemoryReader):
     """Coordinate reader for RDKit.
 
@@ -398,6 +402,13 @@ def atomgroup_to_mol(ag, NoImplicit=True, max_iter=200, force=False):
     for attr in RDATTRIBUTES.keys():
         if hasattr(ag, attr):
             pdb_attrs[attr] = getattr(ag, attr)
+    resnames = pdb_attrs.get("resnames", None)
+    if resnames is None:
+        def get_resname(idx):
+            return ""
+    else:
+        def get_resname(idx):
+            return resnames[idx]
 
     other_attrs = {}
     for attr in ["charges", "segids", "types"]:
@@ -416,7 +427,7 @@ def atomgroup_to_mol(ag, NoImplicit=True, max_iter=200, force=False):
         # add PDB-like properties
         mi = Chem.AtomPDBResidueInfo()
         for attr, values in pdb_attrs.items():
-            _add_mda_attr_to_rdkit(attr, values[i], mi)
+            _add_mda_attr_to_rdkit(attr, values[i], mi, get_resname(i))
         rdatom.SetMonomerInfo(mi)
         # other properties
         for attr in other_attrs.keys():
@@ -479,7 +490,7 @@ def set_converter_cache_size(maxsize):
     atomgroup_to_mol = lru_cache(maxsize=maxsize)(atomgroup_to_mol.__wrapped__)
 
 
-def _add_mda_attr_to_rdkit(attr, value, mi):
+def _add_mda_attr_to_rdkit(attr, value, mi, resname=""):
     """Converts an MDAnalysis atom attribute into the RDKit equivalent and
     stores it into an RDKit :class:`~rdkit.Chem.rdchem.AtomPDBResidueInfo`.
 
@@ -491,26 +502,15 @@ def _add_mda_attr_to_rdkit(attr, value, mi):
         Attribute value as found in the AtomGroup
     mi : rdkit.Chem.rdchem.AtomPDBResidueInfo
         MonomerInfo object that will store the relevant atom attributes
+    resname : str
+        Residue name of the atom, if available
     """
     if isinstance(value, np.generic):
         # convert numpy types to python standard types
         value = value.item()
     if attr == "names":
-        # RDKit needs the name to be properly formatted for a
-        # PDB file (1 letter elements start at col 14)
-        name = re.findall(r'(\D+|\d+)', value)  # splits alphabet from numeric
-        # alpha-numeric name
-        if len(name) == 2:
-            # only two characters
-            if len(value) == 2:
-                value = f" {value} "
-            # more than two --> cut to 4 characters
-            else:
-                symbol, number = name
-                value = f"{symbol + number:>4.4}"
-        # no number in the name
-        else:
-            value = f" {name[0]:<3.3}"
+        # RDKit needs the name to be properly formatted for a PDB file
+        value = _deduce_PDB_atom_name(value, resname)
 
     # set attribute value in RDKit MonomerInfo
     rdattr = RDATTRIBUTES[attr]

From eec6b1d56cf26f16e0bf7896e267e4add0320a11 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <bouysset.cedric@gmail.com>
Date: Mon, 7 Mar 2022 20:11:35 +0100
Subject: [PATCH 35/47] remove unnecessary bond stereo assignment

---
 package/MDAnalysis/converters/RDKit.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/package/MDAnalysis/converters/RDKit.py b/package/MDAnalysis/converters/RDKit.py
index 1b2b366cdbd..bb063962f1e 100644
--- a/package/MDAnalysis/converters/RDKit.py
+++ b/package/MDAnalysis/converters/RDKit.py
@@ -758,11 +758,6 @@ def _standardize_patterns(mol, max_iter=200):
     # reassign all properties
     _transfer_properties(mol, newmol)
 
-    # fix bonds with "crossed" stereo
-    for bond in newmol.GetBonds():
-        if bond.GetStereo() == Chem.BondStereo.STEREOANY:
-            bond.SetStereo(Chem.BondStereo.STEREONONE)
-
     return newmol
 
 

From 393d1419e48e4b7ef6150aae833b3060c2dc376c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <bouysset.cedric@gmail.com>
Date: Fri, 11 Mar 2022 13:17:51 +0100
Subject: [PATCH 36/47] remove unused bond test

---
 testsuite/MDAnalysisTests/converters/test_rdkit.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/testsuite/MDAnalysisTests/converters/test_rdkit.py b/testsuite/MDAnalysisTests/converters/test_rdkit.py
index 372a5bc62e5..ca5c10eb760 100644
--- a/testsuite/MDAnalysisTests/converters/test_rdkit.py
+++ b/testsuite/MDAnalysisTests/converters/test_rdkit.py
@@ -673,13 +673,3 @@ def test_reorder_atoms(self, smi):
         values = [a.GetSymbol() for a in mu.GetAtoms()]
         expected = [a.GetSymbol() for a in mol.GetAtoms()]
         assert values == expected
-
-    def test_convert_stereoany_to_stereonone(self):
-        mol = Chem.MolFromSmiles("FC(Cl)=C(Br)I")
-        bond = next(b for b in mol.GetBonds()
-                    if b.GetBondTypeAsDouble() == 2)
-        bond.SetStereo(Chem.BondStereo.STEREOANY)
-        newmol = _standardize_patterns(mol)
-        bond = next(b for b in newmol.GetBonds()
-                    if b.GetBondTypeAsDouble() == 2)
-        assert bond.GetStereo() == Chem.BondStereo.STEREONONE

From a2886e5e5ed8fe8bd845bcb158e939b0f6ac3af8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <bouysset.cedric@gmail.com>
Date: Wed, 16 Mar 2022 17:57:06 +0100
Subject: [PATCH 37/47] add _MDAnalysis_name atom property

---
 package/MDAnalysis/converters/RDKit.py             | 13 +++++++++----
 testsuite/MDAnalysisTests/converters/test_rdkit.py |  8 ++++++++
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/package/MDAnalysis/converters/RDKit.py b/package/MDAnalysis/converters/RDKit.py
index bb063962f1e..fdaa8d34e91 100644
--- a/package/MDAnalysis/converters/RDKit.py
+++ b/package/MDAnalysis/converters/RDKit.py
@@ -195,6 +195,7 @@ class RDKitConverter(base.ConverterBase):
     | icodes                | atom.GetMonomerInfo().GetInsertionCode()  |
     +-----------------------+-------------------------------------------+
     | names                 | atom.GetMonomerInfo().GetName()           |
+    |                       | atom.GetProp("_MDAnalysis_name")          |
     +-----------------------+-------------------------------------------+
     | occupancies           | atom.GetMonomerInfo().GetOccupancy()      |
     +-----------------------+-------------------------------------------+
@@ -280,9 +281,13 @@ class RDKitConverter(base.ConverterBase):
     .. versionadded:: 2.0.0
 
     .. versionchanged:: 2.2.0
-        Improved the accuracy of the converter. Atoms in the resulting molecule
-        follow the same order as in the AtomGroup. Fixed a
-        ``SanitizationError`` when disabling the bond order inferring step.
+        See the full changelog for more details:
+
+        - Improved the accuracy of the converter
+        - Atoms in the resulting molecule follow the same order as in the
+          AtomGroup
+        - Fixed a ``SanitizationError`` when disabling the bond order inferring
+          step
 
     """
 
@@ -411,7 +416,7 @@ def get_resname(idx):
             return resnames[idx]
 
     other_attrs = {}
-    for attr in ["charges", "segids", "types"]:
+    for attr in ["charges", "segids", "types", "names"]:
         if hasattr(ag, attr):
             other_attrs[attr] = getattr(ag, attr)
 
diff --git a/testsuite/MDAnalysisTests/converters/test_rdkit.py b/testsuite/MDAnalysisTests/converters/test_rdkit.py
index ca5c10eb760..883fd1158b3 100644
--- a/testsuite/MDAnalysisTests/converters/test_rdkit.py
+++ b/testsuite/MDAnalysisTests/converters/test_rdkit.py
@@ -673,3 +673,11 @@ def test_reorder_atoms(self, smi):
         values = [a.GetSymbol() for a in mu.GetAtoms()]
         expected = [a.GetSymbol() for a in mol.GetAtoms()]
         assert values == expected
+
+    def test_pdb_names(self):
+        u = mda.Universe(PDB_helix)
+        mol = u.atoms.convert_to.rdkit()
+        names = u.atoms.names
+        rd_names = np.array([a.GetProp("_MDAnalysis_name")
+                                 for a in mol.GetAtoms()])
+        assert (names == rd_names).all()

From 055692bf0941ad9d414eeec49e729ae849289ad0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <bouysset.cedric@gmail.com>
Date: Wed, 16 Mar 2022 19:05:50 +0100
Subject: [PATCH 38/47] add bond stereo test

---
 testsuite/MDAnalysisTests/converters/test_rdkit.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/testsuite/MDAnalysisTests/converters/test_rdkit.py b/testsuite/MDAnalysisTests/converters/test_rdkit.py
index 883fd1158b3..7434ab5e2f6 100644
--- a/testsuite/MDAnalysisTests/converters/test_rdkit.py
+++ b/testsuite/MDAnalysisTests/converters/test_rdkit.py
@@ -681,3 +681,15 @@ def test_pdb_names(self):
         rd_names = np.array([a.GetProp("_MDAnalysis_name")
                                  for a in mol.GetAtoms()])
         assert (names == rd_names).all()
+
+    @pytest.mark.parametrize("smi", [
+        r"F/C(Br)=C(Cl)/I",
+        r"F\C(Br)=C(Cl)\I",
+        "F-C(Br)=C(Cl)-I",
+    ])
+    def test_bond_stereo_not_stereoany(self, smi):
+        u = mda.Universe.from_smiles(smi)
+        mol = u.atoms.convert_to.rdkit(force=True)
+        for bond in mol.GetBonds():
+            if bond.GetBondTypeAsDouble() == 2:
+                assert bond.GetStereo() != Chem.BondStereo.STEREOANY

From 3c9d874fd06eb05a656083c2ceb76714d912e62b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <bouysset.cedric@gmail.com>
Date: Wed, 16 Mar 2022 19:06:05 +0100
Subject: [PATCH 39/47] mention failing molecules in docs

---
 package/MDAnalysis/converters/RDKit.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/package/MDAnalysis/converters/RDKit.py b/package/MDAnalysis/converters/RDKit.py
index fdaa8d34e91..5bf73a8bd8e 100644
--- a/package/MDAnalysis/converters/RDKit.py
+++ b/package/MDAnalysis/converters/RDKit.py
@@ -277,6 +277,10 @@ class RDKitConverter(base.ConverterBase):
     * `Video (4:55 to 8:05) <https://youtu.be/5b5wYmK4URU>`__
     * `Slides <https://github.com/rdkit/UGM_2020/blob/master/Presentations/C%C3%A9dricBouysset_From_RDKit_to_the_Universe.pdf>`__
 
+    Their are some molecules containing specific patterns that the converter
+    cannot currently tackle correctly. See
+    `Issue #3339 <https://github.com/MDAnalysis/mdanalysis/issues/3339>`__ for
+    more info.
 
     .. versionadded:: 2.0.0
 

From 1a094faa8af0cbb4638aeac57193fc0ef6a21be8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <bouysset.cedric@gmail.com>
Date: Wed, 16 Mar 2022 19:27:59 +0100
Subject: [PATCH 40/47] update changelog

---
 package/CHANGELOG | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/package/CHANGELOG b/package/CHANGELOG
index 3e68e15fecc..9f6412b85e0 100644
--- a/package/CHANGELOG
+++ b/package/CHANGELOG
@@ -13,13 +13,23 @@ The rules for this file:
   * release numbers follow "Semantic Versioning" http://semver.org
 
 ------------------------------------------------------------------------------
-??/??/?? IAlibay
+??/??/?? IAlibay, cbouy
 
  * 2.2.0
 
 Fixes
+  * When converting an AtomGroup to an RDKit molecule (PR #3044):
+    - the atoms are now in the same order
+    - the output of `atom.GetMonomerInfo().GetName()` now follows the
+      guidelines for PDB files while the original name is still available
+      through `atom.GetProp("_MDAnalysis_name")`
+    - using `NoImplicit=False` should no longer throw a `SanitizationError`
 
 Enhancements
+  * Improves the RDKitConverter's accuracy (PR #3044): AtomGroups containing
+    monatomic ion charges or edge cases with nitrogen, sulfur, phosphorus and
+    conjugated systems should now have correctly assigned bond orders and
+    charges.
 
 Changes
 

From 9b3b7b5972e0d8b90c7f6e62b710cd531bf61e93 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <bouysset.cedric@gmail.com>
Date: Wed, 16 Mar 2022 21:15:04 +0100
Subject: [PATCH 41/47] improve readability

---
 .../MDAnalysisTests/converters/test_rdkit.py  | 234 +++++++++---------
 1 file changed, 117 insertions(+), 117 deletions(-)

diff --git a/testsuite/MDAnalysisTests/converters/test_rdkit.py b/testsuite/MDAnalysisTests/converters/test_rdkit.py
index 7434ab5e2f6..a49f5767f08 100644
--- a/testsuite/MDAnalysisTests/converters/test_rdkit.py
+++ b/testsuite/MDAnalysisTests/converters/test_rdkit.py
@@ -101,6 +101,11 @@ def product(request):
     return getattr(MolFactory, request.param)()
 
 
+def is_isomorphic(mol, ref, useChirality=False):
+    return (mol.HasSubstructMatch(ref, useChirality=useChirality)
+            and ref.HasSubstructMatch(mol, useChirality=useChirality))
+
+
 @requires_rdkit
 class TestRDKitReader(object):
     @pytest.mark.parametrize("rdmol, n_frames", [
@@ -204,7 +209,7 @@ def test_monomer_info(self, pdb, sel_str, atom_index):
     def test_identical_topology(self, rdmol):
         u = mda.Universe(rdmol)
         umol = u.atoms.convert_to("RDKIT")
-        assert rdmol.HasSubstructMatch(umol) and umol.HasSubstructMatch(rdmol)
+        assert is_isomorphic(rdmol, umol)
         u2 = mda.Universe(umol)
         assert_equal(u.atoms.bonds, u2.atoms.bonds)
         assert_equal(u.atoms.elements, u2.atoms.elements)
@@ -303,9 +308,7 @@ def test_assign_coordinates(self, pdb):
     def test_assign_stereochemistry(self, mol2):
         umol = mol2.atoms.convert_to("RDKIT")
         rdmol = Chem.MolFromMol2File(mol2_molecule, removeHs=False)
-        assert rdmol.HasSubstructMatch(
-            umol, useChirality=True) and umol.HasSubstructMatch(
-            rdmol, useChirality=True)
+        assert is_isomorphic(rdmol, umol, useChirality=True)
 
     def test_trajectory_coords(self):
         u = mda.Universe.from_smiles(
@@ -395,18 +398,46 @@ def test_sanitize_fail_warning(self):
 @requires_rdkit
 class TestRDKitFunctions(object):
 
-    def mol_to_template(self, mol):
-        """Remove bond orders and charges from molecule"""
-        template = Chem.AddHs(mol)
-        for atom in template.GetAtoms():
+    def add_Hs_remove_bo_and_charges(self, mol):
+        """Add hydrogens and remove bond orders and charges from a molecule"""
+        mH = Chem.AddHs(mol)
+        for atom in mH.GetAtoms():
             atom.SetIsAromatic(False)
             atom.SetFormalCharge(0)
             atom.SetNoImplicit(True)
-        for bond in template.GetBonds():
+        for bond in mH.GetBonds():
             bond.SetIsAromatic(False)
             bond.SetBondType(Chem.BondType.SINGLE)
-        template.UpdatePropertyCache(strict=False)
-        return template
+        mH.UpdatePropertyCache(strict=False)
+        return mH
+    
+    def enumerate_reordered_mol(self, mol):
+        """Enumerates all possible starting atoms for a given molecule"""
+        # go through each possible starting atom
+        for root_atom in mol.GetAtoms():
+            smi = Chem.MolToSmiles(mol, rootedAtAtom=root_atom.GetIdx())
+            reordered_mol = Chem.MolFromSmiles(smi, sanitize=False)
+            for atom in reordered_mol.GetAtoms():
+                atom.SetNoImplicit(True)
+            reordered_mol.UpdatePropertyCache(strict=False)
+            yield reordered_mol
+    
+    def assign_bond_orders_and_charges(self, mol):
+        """Returns a sanitized molecule with infered bond orders and charges"""
+        _infer_bo_and_charges(mol)
+        mol = _standardize_patterns(mol)
+        Chem.SanitizeMol(mol)
+        return mol
+
+    def assert_mols_equal_or_resonance_structure(self, mol, ref):
+        """Checks if 2 molecules are equal. If not, enumerates the resonance
+        structures of the first molecule and checks again"""
+        isomorphic = is_isomorphic(mol, ref)
+        if not is_isomorphic:
+            # try resonance structures
+            isomorphic = any(is_isomorphic(res_mol, ref)
+                             for res_mol in Chem.ResonanceMolSupplier(mol))
+        assert isomorphic
 
     @pytest.mark.parametrize("smi, out", [
         ("C(-[H])(-[H])(-[H])-[H]", "C"),
@@ -431,8 +462,7 @@ def test_infer_bond_orders(self, smi, out):
         Chem.SanitizeMol(mol)
         mol = Chem.RemoveHs(mol)
         molref = Chem.MolFromSmiles(out)
-        assert mol.HasSubstructMatch(molref) and molref.HasSubstructMatch(
-            mol), "{} != {}".format(Chem.MolToSmiles(mol), out)
+        assert is_isomorphic(mol, molref), "{} != {}".format(Chem.MolToSmiles(mol), out)
 
     @pytest.mark.parametrize("smi, atom_idx, charge", [
         ("[C](-[H])(-[H])(-[H])-[O]", 4, -1),
@@ -469,13 +499,10 @@ def test_infer_charges(self, smi, atom_idx, charge):
     def test_standardize_patterns(self, smi, out):
         mol = Chem.MolFromSmiles(smi, sanitize=False)
         mol.UpdatePropertyCache(strict=False)
-        _infer_bo_and_charges(mol)
-        mol = _standardize_patterns(mol)
-        Chem.SanitizeMol(mol)
+        mol = self.assign_bond_orders_and_charges(mol)
         mol = Chem.RemoveHs(mol)
         molref = Chem.MolFromSmiles(out)
-        assert mol.HasSubstructMatch(molref) and molref.HasSubstructMatch(
-            mol), "{} != {}".format(Chem.MolToSmiles(mol), out)
+        assert is_isomorphic(mol, molref), "{} != {}".format(Chem.MolToSmiles(mol), out)
 
     @pytest.mark.parametrize("attr, value, getter", [
         ("index", 42, "GetIntProp"),
@@ -518,15 +545,14 @@ def test_reassign_index_after_reaction(self, rdmol, product):
     ])
     def test_transfer_properties(self, smi):
         mol = Chem.MolFromSmiles(smi)
-        mol = self.mol_to_template(mol)
+        mol = self.add_Hs_remove_bo_and_charges(mol)
         old = {}
         for atom in mol.GetAtoms():
             ix = atom.GetIdx()
             atom.SetIntProp("_MDAnalysis_index", ix)
             atom.SetProp("dummy", f"foo_{ix}")
             old[ix] = {"_MDAnalysis_index": ix, "dummy": f"foo_{ix}"}
-        _infer_bo_and_charges(mol)
-        newmol = _standardize_patterns(mol)
+        newmol = self.assign_bond_orders_and_charges(mol)
         new = {}
         for a in newmol.GetAtoms():
             ix = a.GetIntProp("_MDAnalysis_index")
@@ -537,102 +563,78 @@ def test_transfer_properties(self, smi):
             assert "react_atom_idx" not in props
         assert new == old
 
-    @pytest.mark.parametrize("input_type, input_str", [
-        ("smi", "c1ccc(cc1)-c1ccccc1-c1ccccc1"),
-        ("smi", "c1cc[nH]c1"),
-        ("smi", "c1ccc(cc1)-c1ccc(-c2ccccc2)c(-c2ccccc2)c1-c1ccccc1"),
-        ("smi", "c1ccc2c(c1)c1ccccc1c1ccccc1c1ccccc1c1ccccc21"),
-        ("smi", "c1csc(c1)-c1ccoc1-c1cc[nH]c1"),
-        ("smi", "C1=C2C(=NC=N1)N=CN2"),
-        ("smi", "CN1C=NC(=C1SC2=NC=NC3=C2NC=N3)[N+](=O)[O-]"),
-        ("smi", "c1c[nH]c(c1)-c1ccc(s1)-c1ccoc1-c1c[nH]cc1-c1ccccc1"),
-        ("smi", "C=CC=CC=CC=CC=CC=C"),
-        ("smi", "NCCCCC([NH3+])C(=O)[O-]"),
-        ("smi", "CC(C=CC1=C(C)CCCC1(C)C)=CC=CC(C)=CC=[NH+]C"),
-        ("smi", "C#CC=C"),
+    @pytest.mark.parametrize("smi", [
+        "c1ccc(cc1)-c1ccccc1-c1ccccc1",
+        "c1cc[nH]c1",
+        "c1ccc(cc1)-c1ccc(-c2ccccc2)c(-c2ccccc2)c1-c1ccccc1",
+        "c1ccc2c(c1)c1ccccc1c1ccccc1c1ccccc1c1ccccc21",
+        "c1csc(c1)-c1ccoc1-c1cc[nH]c1",
+        "C1=C2C(=NC=N1)N=CN2",
+        "CN1C=NC(=C1SC2=NC=NC3=C2NC=N3)[N+](=O)[O-]",
+        "c1c[nH]c(c1)-c1ccc(s1)-c1ccoc1-c1c[nH]cc1-c1ccccc1",
+        "C=CC=CC=CC=CC=CC=C",
+        "NCCCCC([NH3+])C(=O)[O-]",
+        "CC(C=CC1=C(C)CCCC1(C)C)=CC=CC(C)=CC=[NH+]C",
+        "C#CC=C",
         # HID HIE HIP residues, see PR #2941
-        ("smi", "O=C([C@H](CC1=CNC=N1)N)O"),
-        ("smi", "O=C([C@H](CC1=CN=CN1)N)O"),
-        ("smi", "O=C([C@H](CC1=C[NH1+]=CN1)[NH3+])[O-]"),
+        "O=C([C@H](CC1=CNC=N1)N)O",
+        "O=C([C@H](CC1=CN=CN1)N)O",
+        "O=C([C@H](CC1=C[NH1+]=CN1)[NH3+])[O-]",
         # fixes from PR #3044
-        ("smi", "CCOC(=O)c1cc2cc(C(=O)O)ccc2[nH]1"),
-        ("smi", "[O-][n+]1cccnc1"),
-        ("smi", "C[n+]1ccccc1"),
-        ("smi", "[PH4+]"),
-        ("smi", "c1nc[nH]n1"),
-        ("smi", "CC(=O)C=C(C)N"),
-        ("smi", "CC(C)=CC=C[O-]"),
-        ("smi", "O=S(C)(C)=NC"),
-        ("smi", "Cc1ccc2c3ncn(Cc4ccco4)c(O)c-3nc2c1"),
-        ("smi", "CCCC/C=C/C#CC#CCCCCCCCC(=O)O"),
-        ("smi", "c1c2c(=O)n3cccc(C)c3nc2n(C)c(=N)c1C(=O)NCc1cnccc1"),
-        ("smi", "N#Cc1c[nH]c(C(=O)NC(=O)c2cc[n+]([O-])cc2)n1"),
-        ("smi", "C[C@@H](Oc1cc(F)ccc1Nc1ncnc2cc(N=S3(=O)CCC3)cc(F)c12)C(=O)NCC#N"),
-        ("smi", "[O-][n+]1onc2ccccc21"),
-        ("smi", "Cc1cc[n+](CC[n+]2ccc(C)cc2)cc1"),
-        ("smi", "[O-]c1ccccc1"),
-        ("smi", "[O-]C=CC=CCC=CC=[N+](C)C"),
-        # test amino acids
-        ("aa", "A"),
-        ("aa", "G"),
-        ("aa", "I"),
-        ("aa", "L"),
-        ("aa", "P"),
-        ("aa", "V"),
-        ("aa", "F"),
-        ("aa", "W"),
-        ("aa", "Y"),
-        ("aa", "D"),
-        ("aa", "E"),
-        ("aa", "R"),
-        ("aa", "H"),
-        ("aa", "K"),
-        ("aa", "S"),
-        ("aa", "T"),
-        ("aa", "C"),
-        ("aa", "M"),
-        ("aa", "N"),
-        ("aa", "Q"),
-        # test nucleic acids
-        ("na", "A"),
-        ("na", "T"),
-        ("na", "U"),
-        ("na", "C"),
-        ("na", "G"),
+        "CCOC(=O)c1cc2cc(C(=O)O)ccc2[nH]1",
+        "[O-][n+]1cccnc1",
+        "C[n+]1ccccc1",
+        "[PH4+]",
+        "c1nc[nH]n1",
+        "CC(=O)C=C(C)N",
+        "CC(C)=CC=C[O-]",
+        "O=S(C)(C)=NC",
+        "Cc1ccc2c3ncn(Cc4ccco4)c(O)c-3nc2c1",
+        "CCCC/C=C/C#CC#CCCCCCCCC(=O)O",
+        "c1c2c(=O)n3cccc(C)c3nc2n(C)c(=N)c1C(=O)NCc1cnccc1",
+        "N#Cc1c[nH]c(C(=O)NC(=O)c2cc[n+]([O-])cc2)n1",
+        "C[C@@H](Oc1cc(F)ccc1Nc1ncnc2cc(N=S3(=O)CCC3)cc(F)c12)C(=O)NCC#N",
+        "[O-][n+]1onc2ccccc21",
+        "Cc1cc[n+](CC[n+]2ccc(C)cc2)cc1",
+        "[O-]c1ccccc1",
+        "[O-]C=CC=CCC=CC=[N+](C)C",
+        # amino acids
+        "C[C@H](N)C(=O)O",  # A
+        "NCC(=O)O",  # G
+        "CC[C@H](C)[C@H](N)C(=O)O",  # I
+        "CC(C)C[C@H](N)C(=O)O",  # L
+        "O=C(O)[C@@H]1CCCN1",  # P
+        "CC(C)[C@H](N)C(=O)O",  # V
+        "N[C@@H](Cc1ccccc1)C(=O)O",  # F
+        "N[C@@H](Cc1c[nH]c2ccccc12)C(=O)O",  # W
+        "N[C@@H](Cc1ccc(O)cc1)C(=O)O",  # Y
+        "N[C@@H](CC(=O)O)C(=O)O",  # D
+        "N[C@@H](CCC(=O)O)C(=O)O",  # E
+        "N=C(N)NCCC[C@H](N)C(=O)O",  # R
+        "N[C@@H](Cc1c[nH]cn1)C(=O)O",  # H
+        "NCCCC[C@H](N)C(=O)O",  # K
+        "N[C@@H](CO)C(=O)O",  # S
+        "C[C@@H](O)[C@H](N)C(=O)O",  # T
+        "N[C@@H](CS)C(=O)O",  # C
+        "CSCC[C@H](N)C(=O)O",  # M
+        "NC(=O)C[C@H](N)C(=O)O",  # N
+        "NC(=O)CC[C@H](N)C(=O)O",  # Q
+        # nucleic acids
+        "Nc1ncnc2c1ncn2[C@H]1C[C@H](OP(=O)(O)O)[C@@H](COP(=O)(O)O)O1",  # A
+        "Cc1cn([C@H]2C[C@H](OP(=O)(O)O)[C@@H](COP(=O)(O)O)O2)c(=O)[nH]c1=O",  # T
+        "O=c1ccn([C@H]2C[C@H](OP(=O)(O)O)[C@@H](COP(=O)(O)O)O2)c(=O)[nH]1",  # U
+        "Nc1ccn([C@H]2C[C@H](OP(=O)(O)O)[C@@H](COP(=O)(O)O)O2)c(=O)n1",  # C
+        "Nc1nc2c(ncn2[C@H]2C[C@H](OP(=O)(O)O)[C@@H](COP(=O)(O)O)O2)c(=O)[nH]1",  # G
     ])
-    def test_order_independant(self, input_type, input_str):
+    def test_order_independant(self, smi):
         # generate mol with hydrogens but without bond orders
-        if input_type == "smi":
-            ref = Chem.MolFromSmiles(input_str)
-        elif input_type == "na":
-            ref = Chem.MolFromSequence(input_str, flavor=9)
-        else:
-            ref = Chem.MolFromSequence(input_str)
-        template = self.mol_to_template(ref)
-        # go through each possible starting atom
-        for a in template.GetAtoms():
-            smi = Chem.MolToSmiles(template, rootedAtAtom=a.GetIdx())
-            m = Chem.MolFromSmiles(smi, sanitize=False)
-            for atom in m.GetAtoms():
-                atom.SetFormalCharge(0)
-                atom.SetIsAromatic(False)
-                atom.SetNoImplicit(True)
-            m.UpdatePropertyCache(strict=False)
-            _infer_bo_and_charges(m)
-            m = _standardize_patterns(m)
-            Chem.SanitizeMol(m)
+        ref = Chem.MolFromSmiles(smi)
+        stripped_mol = self.add_Hs_remove_bo_and_charges(ref)
+        # enumerate mols with different starting atoms
+        for m in self.enumerate_reordered_mol(stripped_mol):
+            m = self.assign_bond_orders_and_charges(m)
             m = Chem.RemoveHs(m)
-            match = m.HasSubstructMatch(ref) and ref.HasSubstructMatch(m)
-            if not match:
-                # try resonance structures for charged conjugated systems
-                for mol in Chem.ResonanceMolSupplier(m, maxStructs=20):
-                    match = (mol.HasSubstructMatch(ref) and
-                             ref.HasSubstructMatch(mol))
-                    if match:
-                        break
-            assert match, (f"(input) {Chem.MolToSmiles(ref)} != "
-                           f"{Chem.MolToSmiles(m)} (output) "
-                           f"root atom {a.GetIdx()}")
+            self.assert_mols_equal_or_resonance_structure(m, ref)
 
     def test_warn_conjugated_max_iter(self):
         smi = "[C-]C=CC=CC=CC=CC=CC=C[C-]"
@@ -651,11 +653,9 @@ def test_warn_conjugated_max_iter(self):
     ])
     def test_ions(self, smi):
         ref = Chem.MolFromSmiles(smi)
-        mol = self.mol_to_template(ref)
-        _infer_bo_and_charges(mol)
-        mol = _standardize_patterns(mol)
-        Chem.SanitizeMol(mol)
-        assert mol.HasSubstructMatch(ref) and ref.HasSubstructMatch(mol)
+        stripped_mol = self.add_Hs_remove_bo_and_charges(ref)
+        mol = self.assign_bond_orders_and_charges(stripped_mol)
+        assert is_isomorphic(mol, ref)
 
     @pytest.mark.parametrize("smi", [
         "O=C([C@H](CC1=C[NH1+]=CN1)[NH3+])[O-]",

From b18150f5720569fb0a4ffc8307ed14d78f8728ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <bouysset.cedric@gmail.com>
Date: Wed, 16 Mar 2022 22:00:35 +0100
Subject: [PATCH 42/47] improve tests readability

---
 .../MDAnalysisTests/converters/test_rdkit.py  | 27 ++++++++++---------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/testsuite/MDAnalysisTests/converters/test_rdkit.py b/testsuite/MDAnalysisTests/converters/test_rdkit.py
index a49f5767f08..a039e021bba 100644
--- a/testsuite/MDAnalysisTests/converters/test_rdkit.py
+++ b/testsuite/MDAnalysisTests/converters/test_rdkit.py
@@ -192,17 +192,19 @@ def test_mol_from_selection(self, peptide, resname, n_atoms, n_fragments):
     ])
     def test_monomer_info(self, pdb, sel_str, atom_index):
         sel = pdb.select_atoms(sel_str)
+        mda_atom = sel.atoms[atom_index]
         umol = sel.convert_to.rdkit(NoImplicit=False)
         atom = umol.GetAtomWithIdx(atom_index)
         mi = atom.GetMonomerInfo()
-
-        for mda_attr, rd_attr in RDATTRIBUTES.items():
-            rd_value = getattr(mi, "Get%s" % rd_attr)()
-            if hasattr(sel, mda_attr):
-                mda_value = getattr(sel, mda_attr)[atom_index]
-                if mda_attr == "names":
-                    rd_value = rd_value.strip()
-                assert rd_value == mda_value
+        assert mda_atom.altLoc == mi.GetAltLoc()
+        assert mda_atom.chainID == mi.GetChainId()
+        assert mda_atom.icode == mi.GetInsertionCode()
+        assert mda_atom.name == mi.GetName().strip()
+        assert mda_atom.occupancy == mi.GetOccupancy()
+        assert mda_atom.resname == mi.GetResidueName()
+        assert mda_atom.resid == mi.GetResidueNumber()
+        assert mda_atom.segindex == mi.GetSegmentNumber()
+        assert mda_atom.tempfactor == mi.GetTempFactor()
 
     @pytest.mark.parametrize("rdmol", ["mol2_mol", "smiles_mol"],
                              indirect=True)
@@ -283,11 +285,10 @@ def test_add_mda_attr_to_rdkit(self, attr, value, expected):
     def test_other_attributes(self, mol2, idx):
         mol = mol2.atoms.convert_to("RDKIT")
         rdatom = mol.GetAtomWithIdx(idx)
-        rdprops = rdatom.GetPropsAsDict()
-        for prop in ["charge", "segid", "type"]:
-            rdprop = rdprops["_MDAnalysis_%s" % prop]
-            mdaprop = getattr(mol2.atoms[idx], prop)
-            assert rdprop == mdaprop
+        mda_atom = mol2.atoms[idx]
+        assert mda_atom.charge == rdatom.GetDoubleProp("_MDAnalysis_charge")
+        assert mda_atom.segid == rdatom.GetProp("_MDAnalysis_segid")
+        assert mda_atom.type == rdatom.GetProp("_MDAnalysis_type")
 
     @pytest.mark.parametrize("sel_str", [
         "resname ALA",

From e01fc07e704449688f357250684ce8f07b4cfb87 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <bouysset.cedric@gmail.com>
Date: Thu, 17 Mar 2022 20:59:41 +0100
Subject: [PATCH 43/47] fix resonance struct test

---
 .../MDAnalysisTests/converters/test_rdkit.py  | 21 ++++++++++---------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/testsuite/MDAnalysisTests/converters/test_rdkit.py b/testsuite/MDAnalysisTests/converters/test_rdkit.py
index a039e021bba..b799c8ffb83 100644
--- a/testsuite/MDAnalysisTests/converters/test_rdkit.py
+++ b/testsuite/MDAnalysisTests/converters/test_rdkit.py
@@ -430,15 +430,15 @@ def assign_bond_orders_and_charges(self, mol):
         Chem.SanitizeMol(mol)
         return mol
 
-    def assert_mols_equal_or_resonance_structure(self, mol, ref):
-        """Checks if 2 molecules are equal. If not, enumerates the resonance
-        structures of the first molecule and checks again"""
-        isomorphic = is_isomorphic(mol, ref)
-        if not is_isomorphic:
-            # try resonance structures
-            isomorphic = any(is_isomorphic(res_mol, ref)
-                             for res_mol in Chem.ResonanceMolSupplier(mol))
-        assert isomorphic
+    def assert_isomorphic_resonance_structure(self, mol, ref):
+        """Checks if 2 molecules are isomorphic using their resonance
+        structures
+        """
+        isomorphic = mol.HasSubstructMatch(ref)
+        if not isomorphic:
+            isomorphic = bool(Chem.ResonanceMolSupplier(mol)
+                              .GetSubstructMatch(ref))
+        assert isomorphic, f"{Chem.MolToSmiles(ref)} != {Chem.MolToSmiles(mol)}"
 
     @pytest.mark.parametrize("smi, out", [
         ("C(-[H])(-[H])(-[H])-[H]", "C"),
@@ -635,7 +635,8 @@ def test_order_independant(self, smi):
         for m in self.enumerate_reordered_mol(stripped_mol):
             m = self.assign_bond_orders_and_charges(m)
             m = Chem.RemoveHs(m)
-            self.assert_mols_equal_or_resonance_structure(m, ref)
+            self.assert_isomorphic_resonance_structure(m, ref)
+
 
     def test_warn_conjugated_max_iter(self):
         smi = "[C-]C=CC=CC=CC=CC=CC=C[C-]"

From 4e4a40bc70a0c3b1175926a63dc06e9fe5a6ef3d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <bouysset.cedric@gmail.com>
Date: Thu, 17 Mar 2022 20:59:55 +0100
Subject: [PATCH 44/47] add xfail test

---
 testsuite/MDAnalysisTests/converters/test_rdkit.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/testsuite/MDAnalysisTests/converters/test_rdkit.py b/testsuite/MDAnalysisTests/converters/test_rdkit.py
index b799c8ffb83..3c57c4dce6d 100644
--- a/testsuite/MDAnalysisTests/converters/test_rdkit.py
+++ b/testsuite/MDAnalysisTests/converters/test_rdkit.py
@@ -599,6 +599,8 @@ def test_transfer_properties(self, smi):
         "Cc1cc[n+](CC[n+]2ccc(C)cc2)cc1",
         "[O-]c1ccccc1",
         "[O-]C=CC=CCC=CC=[N+](C)C",
+        "C=[N+](-[O-])-C",
+        "C-[N-]-C(=O)C",
         # amino acids
         "C[C@H](N)C(=O)O",  # A
         "NCC(=O)O",  # G
@@ -637,6 +639,15 @@ def test_order_independant(self, smi):
             m = Chem.RemoveHs(m)
             self.assert_isomorphic_resonance_structure(m, ref)
 
+    @pytest.mark.xfail(reason="Not currently tackled by the RDKitConverter")
+    @pytest.mark.parametrize("smi", [
+        "C-[N+]#N",
+        "C-N=[N+]=[N-]",
+        "C-[O+]=C",
+        "C-[N+]#[C-]",
+    ])
+    def test_order_independant_issue_3339(self, smi):
+        self.test_order_independant(smi)
 
     def test_warn_conjugated_max_iter(self):
         smi = "[C-]C=CC=CC=CC=CC=CC=C[C-]"

From 5a48bd693ecef9d1400b13f4feb5494e7afab179 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <bouysset.cedric@gmail.com>
Date: Wed, 30 Mar 2022 14:52:58 +0200
Subject: [PATCH 45/47] lint

---
 testsuite/MDAnalysisTests/converters/test_rdkit.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/testsuite/MDAnalysisTests/converters/test_rdkit.py b/testsuite/MDAnalysisTests/converters/test_rdkit.py
index 3c57c4dce6d..24a6066b4eb 100644
--- a/testsuite/MDAnalysisTests/converters/test_rdkit.py
+++ b/testsuite/MDAnalysisTests/converters/test_rdkit.py
@@ -411,7 +411,7 @@ def add_Hs_remove_bo_and_charges(self, mol):
             bond.SetBondType(Chem.BondType.SINGLE)
         mH.UpdatePropertyCache(strict=False)
         return mH
-    
+
     def enumerate_reordered_mol(self, mol):
         """Enumerates all possible starting atoms for a given molecule"""
         # go through each possible starting atom
@@ -422,7 +422,7 @@ def enumerate_reordered_mol(self, mol):
                 atom.SetNoImplicit(True)
             reordered_mol.UpdatePropertyCache(strict=False)
             yield reordered_mol
-    
+
     def assign_bond_orders_and_charges(self, mol):
         """Returns a sanitized molecule with infered bond orders and charges"""
         _infer_bo_and_charges(mol)
@@ -692,7 +692,7 @@ def test_pdb_names(self):
         mol = u.atoms.convert_to.rdkit()
         names = u.atoms.names
         rd_names = np.array([a.GetProp("_MDAnalysis_name")
-                                 for a in mol.GetAtoms()])
+                             for a in mol.GetAtoms()])
         assert (names == rd_names).all()
 
     @pytest.mark.parametrize("smi", [

From 430f3e6a78442cc6555945f0ce846048db315f1c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <bouysset.cedric@gmail.com>
Date: Wed, 30 Mar 2022 14:53:48 +0200
Subject: [PATCH 46/47] docs update from code review

---
 package/MDAnalysis/converters/RDKit.py | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/package/MDAnalysis/converters/RDKit.py b/package/MDAnalysis/converters/RDKit.py
index 5bf73a8bd8e..ed209362c94 100644
--- a/package/MDAnalysis/converters/RDKit.py
+++ b/package/MDAnalysis/converters/RDKit.py
@@ -50,10 +50,11 @@
 .. warning::
     The RDKit converter is currently *experimental* and may not work as
     expected for all molecules. Currently the converter accurately
-    infers the structures of approximately 90% of the `ChEMBL27`_ dataset.
+    infers the structures of approximately 99% of the `ChEMBL27`_ dataset.
     Work is currently ongoing on further improving this and updates to the
     converter are expected in future releases of MDAnalysis.
-    Please see `Pull Request #3044`_ and `Issue #3339`_ for more details.
+    Please see `Issue #3339`_ and the `RDKitConverter benchmark`_ for more
+    details.
 
 
 
@@ -76,9 +77,8 @@
 .. Links
 
 .. _`ChEMBL27`: https://ftp.ebi.ac.uk/pub/databases/chembl/ChEMBLdb/releases/chembl_27/
-.. _`Pull Request #3044`: https://github.com/MDAnalysis/mdanalysis/pull/3044
 .. _`Issue #3339`: https://github.com/MDAnalysis/mdanalysis/issues/3339
-
+.. _`RDKitConverter benchmark`: https://github.com/MDAnalysis/RDKitConverter-benchmark
 """
 
 import warnings
@@ -285,13 +285,11 @@ class RDKitConverter(base.ConverterBase):
     .. versionadded:: 2.0.0
 
     .. versionchanged:: 2.2.0
-        See the full changelog for more details:
-
-        - Improved the accuracy of the converter
-        - Atoms in the resulting molecule follow the same order as in the
-          AtomGroup
-        - Fixed a ``SanitizationError`` when disabling the bond order inferring
-          step
+        Improved the accuracy of the converter. Atoms in the resulting molecule
+        now follow the same order as in the AtomGroup. The output of
+        `atom.GetMonomerInfo().GetName()` now follows the guidelines for PDB
+        files while the original name is still available through
+        `atom.GetProp("_MDAnalysis_name")`
 
     """
 
@@ -305,8 +303,7 @@ def convert(self, obj, cache=True, NoImplicit=True, max_iter=200,
 
         Parameters
         -----------
-        obj : :class:`~MDAnalysis.core.groups.AtomGroup` or
-            :class:`~MDAnalysis.core.universe.Universe`
+        obj : `AtomGroup` or `Universe`
         cache : bool
             Use a cached copy of the molecule's topology when available. To be
             used, the cached molecule and the new one have to be made from the

From f019d28350ef8adb9ba778b20dc1200e3fc4b426 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <bouysset.cedric@gmail.com>
Date: Wed, 30 Mar 2022 23:49:31 +0200
Subject: [PATCH 47/47] fix docs

---
 package/MDAnalysis/converters/RDKit.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/package/MDAnalysis/converters/RDKit.py b/package/MDAnalysis/converters/RDKit.py
index ed209362c94..782cd82f238 100644
--- a/package/MDAnalysis/converters/RDKit.py
+++ b/package/MDAnalysis/converters/RDKit.py
@@ -277,7 +277,7 @@ class RDKitConverter(base.ConverterBase):
     * `Video (4:55 to 8:05) <https://youtu.be/5b5wYmK4URU>`__
     * `Slides <https://github.com/rdkit/UGM_2020/blob/master/Presentations/C%C3%A9dricBouysset_From_RDKit_to_the_Universe.pdf>`__
 
-    Their are some molecules containing specific patterns that the converter
+    There are some molecules containing specific patterns that the converter
     cannot currently tackle correctly. See
     `Issue #3339 <https://github.com/MDAnalysis/mdanalysis/issues/3339>`__ for
     more info.
@@ -287,9 +287,9 @@ class RDKitConverter(base.ConverterBase):
     .. versionchanged:: 2.2.0
         Improved the accuracy of the converter. Atoms in the resulting molecule
         now follow the same order as in the AtomGroup. The output of
-        `atom.GetMonomerInfo().GetName()` now follows the guidelines for PDB
+        ``atom.GetMonomerInfo().GetName()`` now follows the guidelines for PDB
         files while the original name is still available through
-        `atom.GetProp("_MDAnalysis_name")`
+        ``atom.GetProp("_MDAnalysis_name")``
 
     """
 
@@ -303,7 +303,7 @@ def convert(self, obj, cache=True, NoImplicit=True, max_iter=200,
 
         Parameters
         -----------
-        obj : `AtomGroup` or `Universe`
+        obj : :class:`~MDAnalysis.core.groups.AtomGroup` or :class:`~MDAnalysis.core.universe.Universe`
         cache : bool
             Use a cached copy of the molecule's topology when available. To be
             used, the cached molecule and the new one have to be made from the