From 9ebc1b23e83eaa270a3241fe2ede21afacbdb17a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <cedric.bouysset@unice.fr>
Date: Fri, 21 Aug 2020 17:14:30 -0700
Subject: [PATCH] Towards #2468 (RDKit interoperability GSOC project)

* Overview of work done in PR

  This PR adds the `RDKitConverter` class which converts MDAnalysis
  `Universe`/`Atomgroup` objects to `RDKit rdchem.Mol` objects.

* Limitations
  - Bonds and elements must be present (the former will be inferred if not
    present).
  - This converter mainly aims at supporting cases where explicit hydrogens
    are present.

* Extra implementation details
  - Bond order and formal charges are inferred via atomic valencies & the
    number of unpaired electrons (see `_infer_bo_and_charges`).
  - In part due to the influence of atom read order on inferring, bond
    conjugation and functional groups are standardized using SMARTS
    reactions (see `_standardize_patterns`).

*  Other changes

   Also includes some PEP8 changes and some cleaning up of the tests for the
   `RDKITReader`.

* References
  For more information on the development process for this PR, see:
  1) https://cbouy.github.io/blog/2020/07/01/rdkit-converter
  2) https://cbouy.github.io/blog/2020/07/22/rdkit-converter-part2
---
 package/CHANGELOG                             |   2 +
 package/MDAnalysis/coordinates/RDKit.py       | 769 +++++++++++++++++-
 package/doc/sphinx/source/conf.py             |   1 +
 .../source/documentation_pages/converters.rst |   1 +
 .../converters/RDKitParser.rst                |   3 +
 .../topology/RDKitParser.rst                  |   1 -
 .../documentation_pages/topology_modules.rst  |   1 -
 .../MDAnalysisTests/coordinates/test_rdkit.py | 498 +++++++++++-
 8 files changed, 1229 insertions(+), 47 deletions(-)
 create mode 100644 package/doc/sphinx/source/documentation_pages/converters/RDKitParser.rst
 delete mode 100644 package/doc/sphinx/source/documentation_pages/topology/RDKitParser.rst
diff --git a/package/CHANGELOG b/package/CHANGELOG
index 1df27904a8e..9df19de4a28 100644
--- a/package/CHANGELOG
+++ b/package/CHANGELOG
@@ -79,6 +79,8 @@ Enhancements
   * Added new kwargs `select_remove` and `select_protein` to
     analysis.dihedrals.Janin analysis to give user more fine grained control
     over selections (PR #2899)
+  * Added an RDKit converter that works for any input with all hydrogens
+    explicit in the topology (Issue #2468, PR #2775)
 
 Changes
   * deprecated NumPy type aliases have been replaced with their actual types
diff --git a/package/MDAnalysis/coordinates/RDKit.py b/package/MDAnalysis/coordinates/RDKit.py
index 3c64241d109..54184371912 100644
--- a/package/MDAnalysis/coordinates/RDKit.py
+++ b/package/MDAnalysis/coordinates/RDKit.py
@@ -21,25 +21,30 @@
 # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
 #
 
-"""RDKit molecule --- :mod:`MDAnalysis.coordinates.RDKit`
+"""RDKit molecule I/O --- :mod:`MDAnalysis.coordinates.RDKit`
 ================================================================
 
-Read coordinates data from an `RDKit <https://www.rdkit.org/docs/source/rdkit.Chem.rdchem.html#rdkit.Chem.rdchem.Mol>`_ :class:`rdkit.Chem.rdchem.Mol` with :class:`RDKitReader` 
-into a MDAnalysis Universe. Convert it back to a :class:`rdkit.Chem.rdchem.Mol` with 
-:class:`RDKitConverter`.
+Read coordinates data from an `RDKit <https://www.rdkit.org/docs/>`__ :class:`rdkit.Chem.rdchem.Mol` with
+:class:`RDKitReader` into an MDAnalysis Universe. Convert it back to a
+:class:`rdkit.Chem.rdchem.Mol` with :class:`RDKitConverter`.
 
 
 Example
 -------
 
->>> from rdkit import Chem
->>> import MDAnalysis as mda
->>> mol = Chem.MolFromMol2File("docking_poses.mol2", removeHs=False)
->>> u = mda.Universe(mol)
->>> u
-<Universe with 42 atoms>
->>> u.trajectory
-<RDKitReader with 10 frames of 42 atoms>
+To read an RDKit molecule and then convert the AtomGroup back to an RDKit
+molecule::
+
+    >>> from rdkit import Chem
+    >>> import MDAnalysis as mda
+    >>> mol = Chem.MolFromMol2File("docking_poses.mol2", removeHs=False)
+    >>> u = mda.Universe(mol)
+    >>> u
+    <Universe with 42 atoms>
+    >>> u.trajectory
+    <RDKitReader with 10 frames of 42 atoms>
+    >>> u.atoms.convert_to("RDKIT")
+    <rdkit.Chem.rdchem.Mol object at 0x7fcebb958148>
 
 
 Classes
@@ -51,19 +56,59 @@
 .. autoclass:: RDKitConverter
    :members:
 
+.. autofunction:: _infer_bo_and_charges
+
+.. autofunction:: _standardize_patterns
+
+.. autofunction:: _rebuild_conjugated_bonds
 
 """
 
 import warnings
+import re
+import copy
 
 import numpy as np
 
+from ..exceptions import NoDataError
+from ..topology.guessers import guess_atom_element
+from ..core.topologyattrs import _TOPOLOGY_ATTRS
 from . import memory
+from . import base
+
+try:
+    from rdkit import Chem
+    from rdkit.Chem import AllChem
+except ImportError:
+    pass
+else:
+    RDBONDORDER = {
+        1: Chem.BondType.SINGLE,
+        1.5: Chem.BondType.AROMATIC,
+        "ar": Chem.BondType.AROMATIC,
+        2: Chem.BondType.DOUBLE,
+        3: Chem.BondType.TRIPLE,
+    }
+    # add string version of the key for each bond
+    RDBONDORDER.update({str(key): value for key, value in RDBONDORDER.items()})
+    RDATTRIBUTES = {
+        "altLocs": "AltLoc",
+        "chainIDs": "ChainId",
+        "icodes": "InsertionCode",
+        "names": "Name",
+        "occupancies": "Occupancy",
+        "resnames": "ResidueName",
+        "resids": "ResidueNumber",
+        "segindices": "SegmentNumber",
+        "tempfactors": "TempFactor",
+        "bfactors": "TempFactor",
+    }
+    PERIODIC_TABLE = Chem.GetPeriodicTable()
 
 
 class RDKitReader(memory.MemoryReader):
     """Coordinate reader for RDKit.
-    
+
     .. versionadded:: 2.0.0
     """
     format = 'RDKIT'
@@ -84,21 +129,709 @@ def _format_hint(thing):
 
     def __init__(self, filename, **kwargs):
         """Read coordinates from an RDKit molecule.
-        Each conformer in the original RDKit molecule will be read as a frame 
+        Each conformer in the original RDKit molecule will be read as a frame
         in the resulting universe.
 
         Parameters
         ----------
-
         filename : rdkit.Chem.rdchem.Mol
             RDKit molecule
         """
         n_atoms = filename.GetNumAtoms()
         coordinates = np.array([
-            conf.GetPositions() for conf in filename.GetConformers()], 
+            conf.GetPositions() for conf in filename.GetConformers()],
             dtype=np.float32)
         if coordinates.size == 0:
             warnings.warn("No coordinates found in the RDKit molecule")
-            coordinates = np.empty((1,n_atoms,3), dtype=np.float32)
+            coordinates = np.empty((1, n_atoms, 3), dtype=np.float32)
             coordinates[:] = np.nan
-        super(RDKitReader, self).__init__(coordinates, order='fac', **kwargs)
\ No newline at end of file
+        super(RDKitReader, self).__init__(coordinates, order='fac', **kwargs)
+
+
+class RDKitConverter(base.ConverterBase):
+    """Convert MDAnalysis :class:`~MDAnalysis.core.groups.AtomGroup` or
+    :class:`~MDAnalysis.core.universe.Universe` to RDKit
+    :class:`~rdkit.Chem.rdchem.Mol`
+
+    MDanalysis attributes are stored in each RDKit
+    :class:`~rdkit.Chem.rdchem.Atom` of the resulting molecule in two different
+    ways:
+
+    * in an :class:`~rdkit.Chem.rdchem.AtomPDBResidueInfo` object available
+      through the :meth:`~rdkit.Chem.rdchem.Atom.GetMonomerInfo` method if it's
+      an attribute that is typically found in a PDB file,
+    * directly as an atom property available through the
+      :meth:`~rdkit.Chem.rdchem.Atom.GetProp` methods for the others.
+
+    Supported attributes:
+
+    +-----------------------+-------------------------------------------+
+    | MDAnalysis attribute  | RDKit                                     |
+    +=======================+===========================================+
+    | altLocs               | atom.GetMonomerInfo().GetAltLoc()         |
+    +-----------------------+-------------------------------------------+
+    | chainIDs              | atom.GetMonomerInfo().GetChainId()        |
+    +-----------------------+-------------------------------------------+
+    | icodes                | atom.GetMonomerInfo().GetInsertionCode()  |
+    +-----------------------+-------------------------------------------+
+    | names                 | atom.GetMonomerInfo().GetName()           |
+    +-----------------------+-------------------------------------------+
+    | occupancies           | atom.GetMonomerInfo().GetOccupancy()      |
+    +-----------------------+-------------------------------------------+
+    | resnames              | atom.GetMonomerInfo().GetResidueName()    |
+    +-----------------------+-------------------------------------------+
+    | resids                | atom.GetMonomerInfo().GetResidueNumber()  |
+    +-----------------------+-------------------------------------------+
+    | segindices            | atom.GetMonomerInfo().GetSegmentNumber()  |
+    +-----------------------+-------------------------------------------+
+    | tempfactors           | atom.GetMonomerInfo().GetTempFactor()     |
+    +-----------------------+-------------------------------------------+
+    | bfactors              | atom.GetMonomerInfo().GetTempFactor()     |
+    +-----------------------+-------------------------------------------+
+    | charges               | atom.GetDoubleProp("_MDAnalysis_charge")  |
+    +-----------------------+-------------------------------------------+
+    | indices               | atom.GetIntProp("_MDAnalysis_index")      |
+    +-----------------------+-------------------------------------------+
+    | segids                | atom.GetProp("_MDAnalysis_segid")         |
+    +-----------------------+-------------------------------------------+
+    | types                 | atom.GetProp("_MDAnalysis_type")          |
+    +-----------------------+-------------------------------------------+
+
+    Example
+    -------
+
+    To access MDAnalysis properties::
+
+        >>> import MDAnalysis as mda
+        >>> from MDAnalysis.tests.datafiles import PDB_full
+        >>> u = mda.Universe(PDB_full)
+        >>> mol = u.select_atoms('resname DMS').convert_to('RDKIT')
+        >>> mol.GetAtomWithIdx(0).GetMonomerInfo().GetResidueName()
+        'DMS'
+
+    To create a molecule for each frame of a trajectory::
+
+        from MDAnalysisTests.datafiles import PSF, DCD
+        from rdkit.Chem.Descriptors3D import Asphericity
+
+        u = mda.Universe(PSF, DCD)
+        elements = mda.topology.guessers.guess_types(u.atoms.names)
+        u.add_TopologyAttr('elements', elements)
+        ag = u.select_atoms("resid 1-10")
+
+        for ts in u.trajectory:
+            mol = ag.convert_to("RDKIT")
+            x = Asphericity(mol)
+
+
+    Notes
+    -----
+
+    The converter requires the :class:`~MDAnalysis.core.topologyattrs.Elements`
+    attribute to be present in the topology, else it will fail.
+
+    It also requires the `bonds` attribute, although they will be automatically
+    guessed if not present.
+
+    If both ``tempfactors`` and ``bfactors`` attributes are present, the
+    conversion will fail, since only one of these should be present.
+    Refer to Issue #1901 for a solution
+
+    Hydrogens should be explicit in the topology file. If this is not the case,
+    use the parameter ``NoImplicit=False`` when using the converter to allow
+    implicit hydrogens and disable inferring bond orders and charges.
+
+    Since one of the main use case of the converter is converting trajectories
+    and not just a topology, creating a new molecule from scratch for every
+    frame would be too slow so the converter uses a caching system. The cache
+    only remembers the id of the last AtomGroup that was converted, as well
+    as the arguments that were passed to the converter. This means that using
+    ``u.select_atoms("protein").convert_to("RDKIT")`` will not benefit from the
+    cache since the selection is deleted from memory as soon as the conversion
+    is finished. Instead, users should do this in two steps by first saving the
+    selection in a variable and then converting the saved AtomGroup. It also
+    means that ``ag.convert_to("RDKIT")`` followed by
+    ``ag.convert_to("RDKIT", NoImplicit=False)`` will not use the cache.
+    Finally if you're modifying the AtomGroup in place between two conversions,
+    the id of the AtomGroup won't change and thus the converter will use the
+    cached molecule. For this reason, you can pass a ``cache=False`` argument
+    to the converter to bypass the caching system.
+    Note that the cached molecule doesn't contain the coordinates of the atoms.
+
+
+    .. versionadded:: 2.0.0
+
+    """
+
+    lib = 'RDKIT'
+    units = {'time': None, 'length': 'Angstrom'}
+    _cache = dict()
+
+    def convert(self, obj, cache=True, NoImplicit=True, max_iter=200):
+        """Write selection at current trajectory frame to
+        :class:`~rdkit.Chem.rdchem.Mol`.
+
+        Parameters
+        -----------
+        obj : :class:`~MDAnalysis.core.groups.AtomGroup` or :class:`~MDAnalysis.core.universe.Universe`
+
+        cache : bool
+            Use a cached copy of the molecule's topology when available. To be
+            used, the cached molecule and the new one have to be made from the
+            same AtomGroup object (same id) and with the same arguments passed
+            to the converter (with the exception of this `cache` argument)
+        NoImplicit : bool
+            Prevent adding hydrogens to the molecule
+        max_iter : int
+            Maximum number of iterations to standardize conjugated systems.
+            See :func:`_rebuild_conjugated_bonds`
+        """
+        # parameters passed to atomgroup_to_mol and used by the cache
+        kwargs = dict(NoImplicit=NoImplicit, max_iter=max_iter)
+
+        try:
+            from rdkit import Chem
+        except ImportError:
+            raise ImportError("RDKit is required for the RDKitConverter but "
+                              "it's not installed. Try installing it with \n"
+                              "conda install -c conda-forge rdkit")
+        try:
+            # make sure to use atoms (Issue 46)
+            ag = obj.atoms
+        except AttributeError:
+            raise TypeError("No `atoms` attribute in object of type {}, "
+                            "please use a valid AtomGroup or Universe".format(
+                                type(obj))) from None
+
+        if cache:
+            # key used to search the cache
+            key = f"<{id(ag):#x}>" + ",".join(f"{key}={value}"
+                                            for key, value in kwargs.items())
+            try:
+                mol = self._cache[key]
+            except KeyError:
+                # only keep the current molecule in cache
+                self._cache.clear()
+                # create the topology
+                self._cache[key] = mol = self.atomgroup_to_mol(ag, **kwargs)
+            # continue on copy of the cached molecule
+            mol = copy.deepcopy(mol)
+        else:
+            self._cache.clear()
+            mol = self.atomgroup_to_mol(ag, **kwargs)
+
+        # add a conformer for the current Timestep
+        if hasattr(ag, "positions"):
+            if np.isnan(ag.positions).any():
+                warnings.warn("NaN detected in coordinates, the output "
+                              "molecule will not have 3D coordinates assigned")
+            else:
+                # assign coordinates
+                conf = Chem.Conformer(mol.GetNumAtoms())
+                for atom in mol.GetAtoms():
+                    idx = atom.GetIntProp("_MDAnalysis_index")
+                    xyz = ag.positions[idx].astype(float)
+                    conf.SetAtomPosition(atom.GetIdx(), xyz)
+                mol.AddConformer(conf)
+                # assign R/S to atoms and Z/E to bonds
+                Chem.AssignStereochemistryFrom3D(mol)
+                Chem.SetDoubleBondNeighborDirections(mol)
+
+        return mol
+
+    def atomgroup_to_mol(self, ag, NoImplicit=True, max_iter=200):
+        """Converts an AtomGroup to an RDKit molecule.
+
+        Parameters
+        -----------
+        ag : MDAnalysis.core.groups.AtomGroup
+            The AtomGroup to convert
+        NoImplicit : bool
+            Prevent adding hydrogens to the molecule
+        max_iter : int
+            Maximum number of iterations to standardize conjugated systems.
+            See :func:`_rebuild_conjugated_bonds`
+        """
+        try:
+            elements = ag.elements
+        except NoDataError:
+            raise AttributeError(
+                "The `elements` attribute is required for the RDKitConverter "
+                "but is not present in this AtomGroup. Please refer to the "
+                "documentation to guess elements from other attributes or "
+                "type `help(mda.topology.guessers)`") from None
+
+        if "H" not in ag.elements:
+            warnings.warn(
+                "No hydrogen atom could be found in the topology, but the "
+                "converter requires all hydrogens to be explicit. Please "
+                "check carefully the output molecule as the converter is "
+                "likely to add negative charges and assign incorrect bond "
+                "orders to structures with implicit hydrogens. Alternatively, "
+                "you can use the parameter `NoImplicit=False` when using the "
+                "converter to allow implicit hydrogens and disable inferring "
+                "bond orders and charges."
+            )
+
+        # attributes accepted in PDBResidueInfo object
+        pdb_attrs = {}
+        if hasattr(ag, "bfactors") and hasattr(ag, "tempfactors"):
+            raise AttributeError(
+                "Both `tempfactors` and `bfactors` attributes are present but "
+                "only one can be assigned to the RDKit molecule. Please "
+                "delete the unnecessary one and retry."
+            )
+        for attr in RDATTRIBUTES.keys():
+            if hasattr(ag, attr):
+                pdb_attrs[attr] = getattr(ag, attr)
+
+        other_attrs = {}
+        for attr in ["charges", "segids", "types"]:
+            if hasattr(ag, attr):
+                other_attrs[attr] = getattr(ag, attr)
+
+        mol = Chem.RWMol()
+        # map index in universe to index in mol
+        atom_mapper = {}
+
+        for i, (atom, element) in enumerate(zip(ag, elements)):
+            # create atom
+            rdatom = Chem.Atom(element.capitalize())
+            # enable/disable adding implicit H to the molecule
+            rdatom.SetNoImplicit(NoImplicit)
+            # add PDB-like properties
+            mi = Chem.AtomPDBResidueInfo()
+            for attr, values in pdb_attrs.items():
+                _add_mda_attr_to_rdkit(attr, values[i], mi)
+            rdatom.SetMonomerInfo(mi)
+            # other properties
+            for attr in other_attrs.keys():
+                value = other_attrs[attr][i]
+                attr = "_MDAnalysis_%s" % _TOPOLOGY_ATTRS[attr].singular
+                _set_atom_property(rdatom, attr, value)
+            _set_atom_property(rdatom, "_MDAnalysis_index", i)
+            # add atom
+            index = mol.AddAtom(rdatom)
+            atom_mapper[atom.ix] = index
+
+        try:
+            ag.bonds
+        except NoDataError:
+            warnings.warn(
+                "No `bonds` attribute in this AtomGroup. Guessing bonds based "
+                "on atoms coordinates")
+            ag.guess_bonds()
+
+        for bond in ag.bonds:
+            try:
+                bond_indices = [atom_mapper[i] for i in bond.indices]
+            except KeyError:
+                continue
+            bond_type = RDBONDORDER.get(bond.order, Chem.BondType.SINGLE)
+            mol.AddBond(*bond_indices, bond_type)
+
+        mol.UpdatePropertyCache(strict=False)
+
+        if NoImplicit:
+            # infer bond orders and formal charges from the connectivity
+            _infer_bo_and_charges(mol)
+            mol = _standardize_patterns(mol, max_iter)
+
+        # sanitize
+        Chem.SanitizeMol(mol)
+
+        return mol
+
+
+def _add_mda_attr_to_rdkit(attr, value, mi):
+    """Converts an MDAnalysis atom attribute into the RDKit equivalent and
+    stores it into an RDKit :class:`~rdkit.Chem.rdchem.AtomPDBResidueInfo`.
+
+    Parameters
+    ----------
+    attr : str
+        Name of the atom attribute in MDAnalysis in the singular form
+    value : object, np.int or np.float
+        Attribute value as found in the AtomGroup
+    mi : rdkit.Chem.rdchem.AtomPDBResidueInfo
+        MonomerInfo object that will store the relevant atom attributes
+    """
+    if isinstance(value, np.generic):
+        # convert numpy types to python standard types
+        value = value.item()
+    if attr == "names":
+        # RDKit needs the name to be properly formatted for a
+        # PDB file (1 letter elements start at col 14)
+        name = re.findall(r'(\D+|\d+)', value)
+        if len(name) == 2:
+            symbol, number = name
+            if len(number) > 2 and len(symbol) == 1:
+                value = "{}{}".format(symbol, number)
+            else:
+                value = "{:>2.2}{:<2.2}".format(symbol, number)
+        else:
+            # no number in the name
+            value = " {:<}".format(name[0])
+
+    # set attribute value in RDKit MonomerInfo
+    rdattr = RDATTRIBUTES[attr]
+    getattr(mi, "Set%s" % rdattr)(value)
+
+
+def _set_atom_property(atom, attr, value):
+    """Saves any attribute and value into an RDKit atom property"""
+    if isinstance(value, (float, np.float)):
+        atom.SetDoubleProp(attr, float(value))
+    elif isinstance(value, (int, np.int)):
+        atom.SetIntProp(attr, int(value))
+    else:
+        atom.SetProp(attr, value)
+
+
+def _infer_bo_and_charges(mol):
+    """Infer bond orders and formal charges from a molecule.
+
+    Since most MD topology files don't explicitly retain information on bond
+    orders or charges, it has to be guessed from the topology. This is done by
+    looping over each atom and comparing its expected valence to the current
+    valence to get the Number of Unpaired Electrons (NUE).
+    If an atom has a negative NUE, it needs a positive formal charge (-NUE).
+    If two neighbouring atoms have UEs, the bond between them most
+    likely has to be increased by the value of the smallest NUE.
+    If after this process, an atom still has UEs, it needs a negative formal
+    charge of -NUE.
+
+    Parameters
+    ----------
+    mol : rdkit.Chem.rdchem.RWMol
+        The molecule is modified inplace and must have all hydrogens added
+
+    Notes
+    -----
+    This algorithm is order dependant. For example, for a carboxylate group
+    R-C(-O)-O the first oxygen read will receive a double bond and the other
+    one will be charged. It will also affect more complex conjugated systems.
+    """
+
+    for atom in sorted(mol.GetAtoms(), reverse=True,
+                       key=lambda a: _get_nb_unpaired_electrons(a)[0]):
+        # get NUE for each possible valence
+        nue = _get_nb_unpaired_electrons(atom)
+        # if there's only one possible valence state and the corresponding
+        # NUE is negative, it means we can only add a positive charge to
+        # the atom
+        if (len(nue) == 1) and (nue[0] < 0):
+            atom.SetFormalCharge(-nue[0])
+            mol.UpdatePropertyCache(strict=False)
+        # go to next atom if above case or atom has no unpaired electron
+        if (len(nue) == 1) and (nue[0] <= 0):
+            continue
+        else:
+            neighbors = sorted(atom.GetNeighbors(), reverse=True,
+                               key=lambda a: _get_nb_unpaired_electrons(a)[0])
+            # check if one of the neighbors has a common NUE
+            for i, na in enumerate(neighbors, start=1):
+                # get NUE for the neighbor
+                na_nue = _get_nb_unpaired_electrons(na)
+                # smallest common NUE
+                common_nue = min(
+                    min([i for i in nue if i >= 0], default=0),
+                    min([i for i in na_nue if i >= 0], default=0)
+                )
+                # a common NUE of 0 means we don't need to do anything
+                if common_nue != 0:
+                    # increase bond order
+                    bond = mol.GetBondBetweenAtoms(
+                        atom.GetIdx(), na.GetIdx())
+                    order = common_nue + 1
+                    bond.SetBondType(RDBONDORDER[order])
+                    mol.UpdatePropertyCache(strict=False)
+                    if i < len(neighbors):
+                        # recalculate nue for atom
+                        nue = _get_nb_unpaired_electrons(atom)
+
+            # if the atom still has unpaired electrons
+            nue = _get_nb_unpaired_electrons(atom)[0]
+            if nue > 0:
+                # transform it to a negative charge
+                atom.SetFormalCharge(-nue)
+                atom.SetNumRadicalElectrons(0)
+                mol.UpdatePropertyCache(strict=False)
+
+
+def _get_nb_unpaired_electrons(atom):
+    """Calculate the number of unpaired electrons (NUE) of an atom
+
+    Parameters
+    ----------
+    atom: rdkit.Chem.rdchem.Atom
+        The atom for which the NUE will be computed
+
+    Returns
+    -------
+    nue : list
+        The NUE for each possible valence of the atom
+    """
+    expected_vs = PERIODIC_TABLE.GetValenceList(atom.GetAtomicNum())
+    current_v = atom.GetTotalValence() - atom.GetFormalCharge()
+    return [v - current_v for v in expected_vs]
+
+
+def _standardize_patterns(mol, max_iter=200):
+    """Standardizes functional groups
+
+    Uses :func:`_rebuild_conjugated_bonds` to standardize conjugated systems,
+    and SMARTS reactions for other functional groups.
+    Due to the way reactions work, we first have to split the molecule by
+    fragments. Then, for each fragment, we apply the standardization reactions.
+    Finally, the fragments are recombined.
+
+    Parameters
+    ----------
+    mol : rdkit.Chem.rdchem.RWMol
+        The molecule to standardize
+    max_iter : int
+        Maximum number of iterations to standardize conjugated systems
+
+    Returns
+    -------
+    mol : rdkit.Chem.rdchem.Mol
+        The standardized molecule
+
+    Notes
+    -----
+    The following functional groups are transformed in this order:
+
+    +---------------+-----------------------------------------------------------------------------+
+    | Name          | Reaction                                                                    |
+    +===============+=============================================================================+
+    | conjugated    | ``[*-;!O:1]-[*:2]=[*:3]-[*-:4]>>[*+0:1]=[*:2]-[*:3]=[*+0:4]``               |
+    +---------------+-----------------------------------------------------------------------------+
+    | conjugated-N+ | ``[N;X3;v3:1]-[*:2]=[*:3]-[*-:4]>>[N+:1]=[*:2]-[*:3]=[*+0:4]``              |
+    +---------------+-----------------------------------------------------------------------------+
+    | conjugated-O- | ``[O:1]=[#6:2]-[*:3]=[*:4]-[*-:5]>>[O-:1]-[*:2]=[*:3]-[*:4]=[*+0:5]``       |
+    +---------------+-----------------------------------------------------------------------------+
+    | Cterm         | ``[C-;X2:1]=[O:2]>>[C+0:1]=[O:2]``                                          |
+    +---------------+-----------------------------------------------------------------------------+
+    | Nterm         | ``[N-;X2;H1:1]>>[N+0:1]``                                                   |
+    +---------------+-----------------------------------------------------------------------------+
+    | keto-enolate  | ``[#6-:1]-[#6:2]=[O:3]>>[#6+0:1]=[#6:2]-[O-:3]``                            |
+    +---------------+-----------------------------------------------------------------------------+
+    | arginine      | ``[N;H1:1]-[C-;X3;H0:2](-[N;H2:3])-[N;H2:4]>>[N:1]-[C+0:2](-[N:3])=[N+:4]`` |
+    +---------------+-----------------------------------------------------------------------------+
+    | sulfone       | ``[S;X4;v4:1](-[O-;X1:2])-[O-;X1:3]>>[S:1](=[O+0:2])=[O+0:3]``              |
+    +---------------+-----------------------------------------------------------------------------+
+    | nitro         | ``[N;X3;v3:1](-[O-;X1:2])-[O-;X1:3]>>[N+:1](-[O-:2])=[O+0:3]``              |
+    +---------------+-----------------------------------------------------------------------------+
+
+    """
+
+    # standardize conjugated systems
+    _rebuild_conjugated_bonds(mol, max_iter)
+
+    fragments = []
+    for reactant in Chem.GetMolFrags(mol, asMols=True):
+
+        for name, reaction in [
+            ("Cterm", "[C-;X2:1]=[O:2]>>[C+0:1]=[O:2]"),
+            ("Nterm", "[N-;X2;H1:1]>>[N+0:1]"),
+            ("keto-enolate", "[#6-:1]-[#6:2]=[O:3]>>[#6+0:1]=[#6:2]-[O-:3]"),
+            ("ARG", "[N;H1:1]-[C-;X3;H0:2](-[N;H2:3])-[N;H2:4]"
+                    ">>[N:1]-[C+0:2](-[N:3])=[N+:4]"),
+            ("sulfone", "[S;X4;v4:1](-[O-;X1:2])-[O-;X1:3]"
+                        ">>[S:1](=[O+0:2])=[O+0:3]"),
+            ("nitro", "[N;X3;v3:1](-[O-;X1:2])-[O-;X1:3]"
+                      ">>[N+:1](-[O-:2])=[O+0:3]"),
+        ]:
+            reactant.UpdatePropertyCache(strict=False)
+            Chem.Kekulize(reactant)
+            reactant = _run_reaction(reaction, reactant)
+
+        fragments.append(reactant)
+
+    # recombine fragments
+    mol = fragments.pop(0)
+    for fragment in fragments:
+        mol = Chem.CombineMols(mol, fragment)
+
+    return mol
+
+
+def _run_reaction(reaction, reactant):
+    """Runs a reaction until all reactants are transformed
+
+    If a pattern is matched N times in the molecule, the reaction will return N
+    products as an array of shape (N, 1). Only the first product will be kept
+    and the same reaction will be reapplied to the product N times in total.
+
+    Parameters
+    ----------
+    reaction : str
+        SMARTS reaction
+    reactant : rdkit.Chem.rdchem.Mol
+        The molecule to transform
+
+    Returns
+    -------
+    product : rdkit.Chem.rdchem.Mol
+        The final product of the reaction
+    """
+    # count how many times the reaction should be run
+    pattern = Chem.MolFromSmarts(reaction.split(">>")[0])
+    n_matches = len(reactant.GetSubstructMatches(pattern))
+
+    # run the reaction for each matched pattern
+    rxn = AllChem.ReactionFromSmarts(reaction)
+    for n in range(n_matches):
+        products = rxn.RunReactants((reactant,))
+        # only keep the first product
+        if products:
+            product = products[0][0]
+            # map back atom properties from the reactant to the product
+            _reassign_props_after_reaction(reactant, product)
+            # apply the next reaction to the product
+            product.UpdatePropertyCache(strict=False)
+            reactant = product
+        else:
+            # exit the n_matches loop if there's no product. Example
+            # where this is needed: SO^{4}_{2-} will match the sulfone
+            # pattern 6 times but the reaction is only needed once
+            break
+    return reactant
+
+
+def _rebuild_conjugated_bonds(mol, max_iter=200):
+    """Rebuild conjugated bonds without negatively charged atoms at the
+    beginning and end of the conjugated system
+
+    Depending on the order in which atoms are read during the conversion, the
+    :func:`_infer_bo_and_charges` function might write conjugated systems with
+    a double bond less and both edges of the system as anions instead of the
+    usual alternating single and double bonds. This function corrects this
+    behaviour by using an iterative procedure.
+    The problematic molecules always follow the same pattern:
+    ``anion[-*=*]n-anion`` instead of ``*=[*-*=]n*``, where ``n`` is the number
+    of successive single and double bonds. The goal of the iterative procedure
+    is to make ``n`` as small as possible by consecutively transforming
+    ``anion-*=*`` into ``*=*-anion`` until it reaches the smallest pattern with
+    ``n=1``. This last pattern is then transformed from ``anion-*=*-anion`` to
+    ``*=*-*=*``.
+    Since ``anion-*=*`` is the same as ``*=*-anion`` in terms of SMARTS, we can
+    control that we don't transform the same triplet of atoms back and forth by
+    adding their indices to a list.
+    The molecule needs to be kekulized first to also cover systems
+    with aromatic rings.
+
+    Parameters
+    ----------
+    mol : rdkit.Chem.rdchem.RWMol
+        The molecule to transform, modified inplace
+    max_iter : int
+        Maximum number of iterations performed by the function
+    """
+    mol.UpdatePropertyCache(strict=False)
+    Chem.Kekulize(mol)
+    pattern = Chem.MolFromSmarts("[*-;!O]-[*+0]=[*+0]")
+    # number of unique matches with the pattern
+    n_matches = len(set([match[0]
+                         for match in mol.GetSubstructMatches(pattern)]))
+    if n_matches == 0:
+        # nothing to standardize
+        return
+    # check if there's an even number of anion-*=* patterns
+    elif n_matches % 2 == 0:
+        end_pattern = Chem.MolFromSmarts("[*-;!O]-[*+0]=[*+0]-[*-]")
+    else:
+        # as a last resort, the only way to standardize is to find a nitrogen
+        # that can accept a double bond and a positive charge
+        # or a carbonyl that will become an enolate
+        end_pattern = Chem.MolFromSmarts(
+            "[*-;!O]-[*+0]=[*+0]-[$([#7;X3;v3]),$([#6+0]=O)]")
+    backtrack = []
+    for _ in range(max_iter):
+        # simplest case where n=1
+        end_match = mol.GetSubstructMatch(end_pattern)
+        if end_match:
+            # index of each atom
+            anion1, a1, a2, anion2 = end_match
+            term_atom = mol.GetAtomWithIdx(anion2)
+            # [*-]-*=*-C=O
+            if term_atom.GetAtomicNum() == 6 and term_atom.GetFormalCharge() == 0:
+                for neighbor in term_atom.GetNeighbors():
+                    bond = mol.GetBondBetweenAtoms(anion2, neighbor.GetIdx())
+                    if neighbor.GetAtomicNum() == 8 and bond.GetBondTypeAsDouble() == 2:
+                        bond.SetBondType(Chem.BondType.SINGLE)
+                        neighbor.SetFormalCharge(-1)
+            else:
+                # [*-]-*=*-N
+                if term_atom.GetAtomicNum() == 7 and term_atom.GetFormalCharge() == 0:
+                    end_charge = 1
+                # [*-]-*=*-[*-]
+                else:
+                    end_charge = 0
+                mol.GetAtomWithIdx(anion2).SetFormalCharge(end_charge)
+            # common part of the conjugated systems: [*-]-*=*
+            mol.GetAtomWithIdx(anion1).SetFormalCharge(0)
+            mol.GetBondBetweenAtoms(anion1, a1).SetBondType(
+                Chem.BondType.DOUBLE)
+            mol.GetBondBetweenAtoms(a1, a2).SetBondType(Chem.BondType.SINGLE)
+            mol.GetBondBetweenAtoms(a2, anion2).SetBondType(
+                Chem.BondType.DOUBLE)
+            mol.UpdatePropertyCache(strict=False)
+
+        # shorten the anion-anion pattern from n to n-1
+        matches = mol.GetSubstructMatches(pattern)
+        if matches:
+            # check if we haven't already transformed this triplet
+            for match in matches:
+                # sort the indices for the comparison
+                g = tuple(sorted(match))
+                if g in backtrack:
+                    # already transformed
+                    continue
+                else:
+                    # take the first one that hasn't been tried
+                    anion, a1, a2 = match
+                    backtrack.append(g)
+                    break
+            else:
+                anion, a1, a2 = matches[0]
+            # charges
+            mol.GetAtomWithIdx(anion).SetFormalCharge(0)
+            mol.GetAtomWithIdx(a2).SetFormalCharge(-1)
+            # bonds
+            mol.GetBondBetweenAtoms(anion, a1).SetBondType(
+                Chem.BondType.DOUBLE)
+            mol.GetBondBetweenAtoms(a1, a2).SetBondType(Chem.BondType.SINGLE)
+            mol.UpdatePropertyCache(strict=False)
+            # start new iteration
+            continue
+
+        # no more changes to apply
+        return
+
+    # reached max_iter
+    warnings.warn("The standardization could not be completed within a "
+                  "reasonable number of iterations")
+
+
+def _reassign_props_after_reaction(reactant, product):
+    """Maps back atomic properties from the reactant to the product.
+    The product molecule is modified inplace.
+    """
+    for atom in product.GetAtoms():
+        try:
+            atom.GetIntProp("old_mapno")
+        except KeyError:
+            pass
+        else:
+            atom.ClearProp("old_mapno")
+            idx = atom.GetUnsignedProp("react_atom_idx")
+            old_atom = reactant.GetAtomWithIdx(idx)
+            for prop, value in old_atom.GetPropsAsDict().items():
+                _set_atom_property(atom, prop, value)
+            # fix bonds with "crossed" stereo
+            for bond in atom.GetBonds():
+                if bond.GetStereo() == Chem.BondStereo.STEREOANY:
+                    bond.SetStereo(Chem.BondStereo.STEREONONE)
+        atom.ClearProp("react_atom_idx")
diff --git a/package/doc/sphinx/source/conf.py b/package/doc/sphinx/source/conf.py
index 296b479d6f9..a149c769a8f 100644
--- a/package/doc/sphinx/source/conf.py
+++ b/package/doc/sphinx/source/conf.py
@@ -346,4 +346,5 @@
                        'https://gsd.readthedocs.io/en/stable/': None,
                        'https://parmed.github.io/ParmEd/html/': None,
                        'https://docs.h5py.org/en/stable': None,
+                       'https://www.rdkit.org/docs/': None,
                        }
diff --git a/package/doc/sphinx/source/documentation_pages/converters.rst b/package/doc/sphinx/source/documentation_pages/converters.rst
index 8bdcb913fc0..c70d2324184 100644
--- a/package/doc/sphinx/source/documentation_pages/converters.rst
+++ b/package/doc/sphinx/source/documentation_pages/converters.rst
@@ -33,4 +33,5 @@ you will have to specify a package name (case-insensitive). ::
    :maxdepth: 1
 
    converters/ParmEdParser
+   converters/RDKitParser
 
diff --git a/package/doc/sphinx/source/documentation_pages/converters/RDKitParser.rst b/package/doc/sphinx/source/documentation_pages/converters/RDKitParser.rst
new file mode 100644
index 00000000000..174a1ff1115
--- /dev/null
+++ b/package/doc/sphinx/source/documentation_pages/converters/RDKitParser.rst
@@ -0,0 +1,3 @@
+.. automodule:: MDAnalysis.topology.RDKitParser
+
+.. automodule:: MDAnalysis.coordinates.RDKit
diff --git a/package/doc/sphinx/source/documentation_pages/topology/RDKitParser.rst b/package/doc/sphinx/source/documentation_pages/topology/RDKitParser.rst
deleted file mode 100644
index 9f6bbd1dac9..00000000000
--- a/package/doc/sphinx/source/documentation_pages/topology/RDKitParser.rst
+++ /dev/null
@@ -1 +0,0 @@
-.. automodule:: MDAnalysis.topology.RDKitParser
\ No newline at end of file
diff --git a/package/doc/sphinx/source/documentation_pages/topology_modules.rst b/package/doc/sphinx/source/documentation_pages/topology_modules.rst
index 04ebf433e27..ed8caba8ce6 100644
--- a/package/doc/sphinx/source/documentation_pages/topology_modules.rst
+++ b/package/doc/sphinx/source/documentation_pages/topology_modules.rst
@@ -43,7 +43,6 @@ topology file format in the *topology_format* keyword argument to
    topology/PDBQTParser
    topology/PQRParser
    topology/PSFParser
-   topology/RDKitParser
    topology/TOPParser
    topology/TPRParser
    topology/TXYZParser
diff --git a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py
index 7bdb845f95a..930e65fa730 100644
--- a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py
+++ b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py
@@ -20,59 +20,503 @@
 # MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
 # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
 #
-import warnings
 
+import copy
 import pytest
 import MDAnalysis as mda
+from MDAnalysis.topology.guessers import guess_atom_element
 import numpy as np
 from numpy.testing import (assert_equal,
                            assert_almost_equal)
 
-from MDAnalysisTests.datafiles import mol2_molecule
+from MDAnalysisTests.datafiles import mol2_molecule, PDB_full, GRO, PDB_helix
+from MDAnalysisTests.util import import_not_available
 
-Chem = pytest.importorskip("rdkit.Chem")
-AllChem = pytest.importorskip("rdkit.Chem.AllChem")
 
-def mol2_mol():
-    return Chem.MolFromMol2File(mol2_molecule, removeHs=False)
+try:
+    from rdkit import Chem
+    from rdkit.Chem import AllChem
+    from MDAnalysis.coordinates.RDKit import (
+        RDATTRIBUTES,
+        _add_mda_attr_to_rdkit,
+        _infer_bo_and_charges,
+        _standardize_patterns,
+        _rebuild_conjugated_bonds,
+        _set_atom_property,
+        _reassign_props_after_reaction,
+    )
+except ImportError:
+    pass
 
-def smiles_mol():
-    mol = Chem.MolFromSmiles("CCO")
-    mol = Chem.AddHs(mol)
-    cids = AllChem.EmbedMultipleConfs(mol, numConfs=3)
-    return mol
 
+requires_rdkit = pytest.mark.skipif(import_not_available("rdkit"),
+                                    reason="requires RDKit")
+
+
+@pytest.mark.skipif(not import_not_available("rdkit"),
+                    reason="only for min dependencies build")
+class TestRequiresRDKit(object):
+    def test_converter_requires_rdkit(self):
+        u = mda.Universe(PDB_full)
+        with pytest.raises(ImportError,
+                           match="RDKit is required for the RDKitConverter"):
+            u.atoms.convert_to("RDKIT")
+
+
+@requires_rdkit
+class MolFactory:
+    def mol2_mol():
+        return Chem.MolFromMol2File(mol2_molecule, removeHs=False)
+
+    def smiles_mol():
+        mol = Chem.MolFromSmiles("CN1C=NC2=C1C(=O)N(C(=O)N2C)C")
+        mol = Chem.AddHs(mol)
+        cids = AllChem.EmbedMultipleConfs(mol, numConfs=3)
+        return mol
+
+    def dummy_product():
+        mol = Chem.RWMol()
+        atom = Chem.Atom(1)
+        atom.SetIntProp("old_mapno", 0)
+        atom.SetUnsignedProp("react_atom_idx", 0)
+        mol.AddAtom(atom)
+        return mol
+
+    def dummy_product_nomap():
+        mol = Chem.RWMol()
+        atom = Chem.Atom(1)
+        atom.SetUnsignedProp("react_atom_idx", 0)
+        mol.AddAtom(atom)
+        return mol
+
+    def dummy_reactant_noprops():
+        mol = Chem.RWMol()
+        atom = Chem.Atom(1)
+        mol.AddAtom(atom)
+        return mol
+
+    def dummy_reactant():
+        mol = Chem.RWMol()
+        atom = Chem.Atom(1)
+        atom.SetProp("foo", "bar")
+        atom.SetIntProp("_MDAnalysis_index", 1)
+        atom.SetDoubleProp("_MDAnalysis_charge", 4.2)
+        atom.SetProp("_MDAnalysis_type", "C.3")
+        mol.AddAtom(atom)
+        return mol
+
+
+@pytest.fixture(scope="function")
+def rdmol(request):
+    return getattr(MolFactory, request.param)()
+
+
+@pytest.fixture(scope="function")
+def product(request):
+    return getattr(MolFactory, request.param)()
+
+
+@requires_rdkit
 class TestRDKitReader(object):
     @pytest.mark.parametrize("rdmol, n_frames", [
-        (mol2_mol(), 1),
-        (smiles_mol(), 3),
-    ])
+        ("mol2_mol", 1),
+        ("smiles_mol", 3),
+    ], indirect=["rdmol"])
     def test_coordinates(self, rdmol, n_frames):
         universe = mda.Universe(rdmol)
         assert universe.trajectory.n_frames == n_frames
         expected = np.array([
-            conf.GetPositions() for conf in rdmol.GetConformers()], 
+            conf.GetPositions() for conf in rdmol.GetConformers()],
             dtype=np.float32)
         assert_equal(expected, universe.trajectory.coordinate_array)
 
     def test_no_coordinates(self):
-        with warnings.catch_warnings(record=True) as w:
-            # Cause all warnings to always be triggered.
-            warnings.simplefilter("always")
-            # Trigger a warning.
+        with pytest.warns(UserWarning, match="No coordinates found"):
             u = mda.Universe.from_smiles("CCO", generate_coordinates=False)
-            # Verify the warning
-            assert len(w) == 1
-            assert "No coordinates found" in str(
-                w[-1].message)
-        expected = np.empty((1,u.atoms.n_atoms,3), dtype=np.float32)
+        expected = np.empty((1, u.atoms.n_atoms, 3), dtype=np.float32)
         expected[:] = np.nan
         assert_equal(u.trajectory.coordinate_array, expected)
 
     def test_compare_mol2reader(self):
-        universe = mda.Universe(mol2_mol())
+        universe = mda.Universe(MolFactory.mol2_mol())
         mol2 = mda.Universe(mol2_molecule)
         assert universe.trajectory.n_frames == mol2.trajectory.n_frames
-        assert_equal(universe.trajectory.ts.positions, 
+        assert_equal(universe.trajectory.ts.positions,
                      mol2.trajectory.ts.positions)
-        
+
+
+@requires_rdkit
+class TestRDKitConverter(object):
+    @pytest.fixture
+    def pdb(self):
+        return mda.Universe(PDB_full)
+
+    @pytest.fixture
+    def mol2(self):
+        u = mda.Universe(mol2_molecule)
+        # add elements
+        elements = np.array([guess_atom_element(x) for x in u.atoms.types],
+                            dtype=object)
+        u.add_TopologyAttr('elements', elements)
+        return u
+
+    @pytest.fixture
+    def peptide(self):
+        u = mda.Universe(GRO)
+        elements = mda.topology.guessers.guess_types(u.atoms.names)
+        u.add_TopologyAttr('elements', elements)
+        return u.select_atoms("resid 2-12")
+
+    @pytest.mark.parametrize("smi", ["[H]", "C", "O", "[He]"])
+    def test_single_atom_mol(self, smi):
+        u = mda.Universe.from_smiles(smi, addHs=False,
+                                     generate_coordinates=False)
+        mol = u.atoms.convert_to("RDKIT")
+        assert mol.GetNumAtoms() == 1
+        assert mol.GetAtomWithIdx(0).GetSymbol() == smi.strip("[]")
+
+    @pytest.mark.parametrize("resname, n_atoms, n_fragments", [
+        ("PRO", 14, 1),
+        ("ILE", 38, 1),
+        ("ALA", 20, 2),
+        ("GLY", 21, 3),
+    ])
+    def test_mol_from_selection(self, peptide, resname, n_atoms, n_fragments):
+        mol = peptide.select_atoms("resname %s" % resname).convert_to("RDKIT")
+        assert n_atoms == mol.GetNumAtoms()
+        assert n_fragments == len(Chem.GetMolFrags(mol))
+
+    @pytest.mark.parametrize("sel_str, atom_index", [
+        ("resid 1", 0),
+        ("resname LYS and name NZ", 1),
+        ("resid 34 and altloc B", 2),
+    ])
+    def test_monomer_info(self, pdb, sel_str, atom_index):
+        sel = pdb.select_atoms(sel_str)
+        umol = sel.convert_to("RDKIT")
+        atom = umol.GetAtomWithIdx(atom_index)
+        mda_index = atom.GetIntProp("_MDAnalysis_index")
+        mi = atom.GetMonomerInfo()
+
+        for mda_attr, rd_attr in RDATTRIBUTES.items():
+            rd_value = getattr(mi, "Get%s" % rd_attr)()
+            if hasattr(sel, mda_attr):
+                mda_value = getattr(sel, mda_attr)[mda_index]
+                if mda_attr == "names":
+                    rd_value = rd_value.strip()
+                assert rd_value == mda_value
+
+    @pytest.mark.parametrize("rdmol", ["mol2_mol", "smiles_mol"],
+                             indirect=True)
+    def test_identical_topology(self, rdmol):
+        u = mda.Universe(rdmol)
+        umol = u.atoms.convert_to("RDKIT")
+        assert rdmol.HasSubstructMatch(umol) and umol.HasSubstructMatch(rdmol)
+        u2 = mda.Universe(umol)
+        assert_equal(u.atoms.bonds, u2.atoms.bonds)
+        assert_equal(u.atoms.elements, u2.atoms.elements)
+        assert_equal(u.atoms.names, u2.atoms.names)
+        assert_almost_equal(u.atoms.positions, u2.atoms.positions, decimal=7)
+
+    def test_raise_requires_elements(self):
+        u = mda.Universe(mol2_molecule)
+        with pytest.raises(
+            AttributeError,
+            match="`elements` attribute is required for the RDKitConverter"
+        ):
+            u.atoms.convert_to("RDKIT")
+
+    def test_warn_guess_bonds(self):
+        u = mda.Universe(PDB_helix)
+        with pytest.warns(UserWarning,
+                          match="No `bonds` attribute in this AtomGroup"):
+            u.atoms.convert_to("RDKIT")
+
+    def test_warn_no_hydrogen(self):
+        u = mda.Universe.from_smiles("O=O")
+        with pytest.warns(
+            UserWarning,
+            match="No hydrogen atom could be found in the topology"
+        ):
+            u.atoms.convert_to("RDKIT")
+
+    @pytest.mark.parametrize("attr, value, expected", [
+        ("names", "C1", " C1 "),
+        ("names", "C12", " C12"),
+        ("names", "Cl1", "Cl1 "),
+        ("altLocs", "A", "A"),
+        ("chainIDs", "B", "B"),
+        ("icodes", "C", "C"),
+        ("occupancies", 0.5, 0.5),
+        ("resnames", "LIG", "LIG"),
+        ("resids", 123, 123),
+        ("segindices", 1, 1),
+        ("tempfactors", 0.8, 0.8),
+        ("bfactors", 0.8, 0.8),
+    ])
+    def test_add_mda_attr_to_rdkit(self, attr, value, expected):
+        mi = Chem.AtomPDBResidueInfo()
+        _add_mda_attr_to_rdkit(attr, value, mi)
+        rdvalue = getattr(mi, "Get%s" % RDATTRIBUTES[attr])()
+        assert rdvalue == expected
+
+    def test_bfactors_tempfactors_raises_error(self):
+        u = mda.Universe.from_smiles("C")
+        bfactors = np.array(u.atoms.n_atoms*[1.0], dtype=np.float32)
+        u.add_TopologyAttr('bfactors', bfactors)
+        u.add_TopologyAttr('tempfactors', bfactors)
+        with pytest.raises(
+            AttributeError,
+            match="Both `tempfactors` and `bfactors` attributes are present"
+        ):
+            u.atoms.convert_to("RDKIT")
+
+    @pytest.mark.parametrize("idx", [0, 10, 42])
+    def test_other_attributes(self, mol2, idx):
+        mol = mol2.atoms.convert_to("RDKIT")
+        rdatom = mol.GetAtomWithIdx(idx)
+        rdprops = rdatom.GetPropsAsDict()
+        mda_idx = int(rdprops["_MDAnalysis_index"])
+        for prop in ["charge", "segid", "type"]:
+            rdprop = rdprops["_MDAnalysis_%s" % prop]
+            mdaprop = getattr(mol2.atoms[mda_idx], prop)
+            assert rdprop == mdaprop
+
+    @pytest.mark.parametrize("sel_str", [
+        "resname ALA",
+        "resname PRO and segid A",
+    ])
+    def test_index_property(self, pdb, sel_str):
+        ag = pdb.select_atoms(sel_str)
+        mol = ag.convert_to("RDKIT")
+        expected = [i for i in range(len(ag))]
+        indices = sorted([a.GetIntProp("_MDAnalysis_index")
+                          for a in mol.GetAtoms()])
+        assert_equal(indices, expected)
+
+    def test_assign_coordinates(self, pdb):
+        mol = pdb.atoms.convert_to("RDKIT")
+        positions = mol.GetConformer().GetPositions()
+        indices = sorted(mol.GetAtoms(),
+                         key=lambda a: a.GetIntProp("_MDAnalysis_index"))
+        indices = [a.GetIdx() for a in indices]
+        assert_almost_equal(positions[indices], pdb.atoms.positions)
+
+    def test_assign_stereochemistry(self, mol2):
+        umol = mol2.atoms.convert_to("RDKIT")
+        rdmol = Chem.MolFromMol2File(mol2_molecule, removeHs=False)
+        assert rdmol.HasSubstructMatch(
+            umol, useChirality=True) and umol.HasSubstructMatch(
+            rdmol, useChirality=True)
+
+    def test_trajectory_coords(self):
+        u = mda.Universe.from_smiles(
+            "CCO", numConfs=3, rdkit_kwargs=dict(randomSeed=42))
+        for ts in u.trajectory:
+            mol = u.atoms.convert_to("RDKIT")
+            positions = mol.GetConformer().GetPositions()
+            indices = sorted(mol.GetAtoms(),
+                             key=lambda a: a.GetIntProp("_MDAnalysis_index"))
+            indices = [a.GetIdx() for a in indices]
+            assert_almost_equal(positions[indices], ts.positions)
+
+    def test_nan_coords(self):
+        u = mda.Universe.from_smiles("CCO")
+        xyz = u.atoms.positions
+        xyz[0][2] = np.nan
+        u.atoms.positions = xyz
+        with pytest.warns(UserWarning, match="NaN detected"):
+            mol = u.atoms.convert_to("RDKIT")
+        with pytest.raises(ValueError, match="Bad Conformer Id"):
+            mol.GetConformer()
+
+    def test_cache(self):
+        u = mda.Universe.from_smiles("CCO", numConfs=5)
+        ag = u.atoms
+        cache = mda.coordinates.RDKit.RDKitConverter._cache
+        previous_cache = None
+        for ts in u.trajectory:
+            mol = ag.convert_to("RDKIT")
+            if previous_cache:
+                # the cache shouldn't change when iterating on timesteps
+                assert cache == previous_cache
+                previous_cache = copy.deepcopy(cache)
+        # cached molecule shouldn't store coordinates
+        mol = list(cache.values())[0]
+        with pytest.raises(ValueError, match="Bad Conformer Id"):
+            mol.GetConformer()
+        # only 1 molecule should be cached
+        u = mda.Universe.from_smiles("C")
+        mol = u.atoms.convert_to("RDKIT")
+        assert len(cache) == 1
+        assert cache != previous_cache
+        # converter with kwargs
+        rdkit_converter = mda.coordinates.RDKit.RDKitConverter().convert
+        # cache should depend on passed arguments
+        previous_cache = copy.deepcopy(cache)
+        mol = rdkit_converter(u.atoms, NoImplicit=False)
+        assert cache != previous_cache
+        # skip cache
+        mol = rdkit_converter(u.atoms, cache=False)
+        assert cache == {}
+
+
+@requires_rdkit
+class TestRDKitFunctions(object):
+    @pytest.mark.parametrize("smi, out", [
+        ("C(-[H])(-[H])(-[H])-[H]", "C"),
+        ("[C](-[H])(-[H])-[C](-[H])-[H]", "C=C"),
+        ("[C]1(-[H])-[C](-[H])-[C](-[H])-[C](-[H])-[C](-[H])-[C]1(-[H])",
+         "c1ccccc1"),
+        ("C-[C](-[H])-[O]", "C(=O)C"),
+        ("[H]-[C](-[O])-[N](-[H])-[H]", "C(=O)N"),
+        ("[N]-[C]-[H]", "N#C"),
+        ("C-[C](-[O]-[H])-[O]", "CC(=O)O"),
+        ("[P](-[O]-[H])(-[O]-[H])(-[O]-[H])-[O]", "P(O)(O)(O)=O"),
+        ("[P](-[O]-[H])(-[O]-[H])(-[O])-[O]", "P([O-])(O)(O)=O"),
+        ("[P](-[O]-[H])(-[O])(-[O])-[O]", "P([O-])([O-])(O)=O"),
+        ("[P](-[O])(-[O])(-[O])-[O]", "P([O-])([O-])([O-])=O"),
+        ("[H]-[O]-[N]-[O]", "ON=O"),
+        ("[N]-[C]-[O]", "N#C[O-]"),
+    ])
+    def test_infer_bond_orders(self, smi, out):
+        mol = Chem.MolFromSmiles(smi, sanitize=False)
+        mol.UpdatePropertyCache(strict=False)
+        _infer_bo_and_charges(mol)
+        Chem.SanitizeMol(mol)
+        mol = Chem.RemoveHs(mol)
+        molref = Chem.MolFromSmiles(out)
+        assert mol.HasSubstructMatch(molref) and molref.HasSubstructMatch(
+            mol), "{} != {}".format(Chem.MolToSmiles(mol), out)
+
+    @pytest.mark.parametrize("smi, atom_idx, charge", [
+        ("[C](-[H])(-[H])(-[H])-[O]", 4, -1),
+        ("[N]-[C]-[O]", 2, -1),
+        ("[N](-[H])(-[H])(-[H])-[H]", 0, 1),
+        ("C-[C](-[O])-[O]", 3, -1),
+    ])
+    def test_infer_charges(self, smi, atom_idx, charge):
+        mol = Chem.MolFromSmiles(smi, sanitize=False)
+        mol.UpdatePropertyCache(strict=False)
+        _infer_bo_and_charges(mol)
+        Chem.SanitizeMol(mol)
+        assert mol.GetAtomWithIdx(atom_idx).GetFormalCharge() == charge
+
+    @pytest.mark.parametrize("smi, out", [
+        ("[S](-[O]-[H])(-[O]-[H])(-[O])-[O]", "S(=O)(=O)(O)O"),
+        ("[S](-[O]-[H])(-[O])(-[O])-[O]", "S(=O)(=O)([O-])O"),
+        ("[S](-[O])(-[O])(-[O])-[O]", "S(=O)(=O)([O-])[O-]"),
+        ("C-[N](-[H])-[C](-[N](-[H])-[H])-[N](-[H])-[H]",
+         "CNC(N)=[N+](-[H])-[H]"),
+        ("[O]-[C](-[H])-[C](-[H])-[H]", "C([O-])=C"),
+        ("C-[N](-[O])-[O]", "C[N+](=O)[O-]"),
+        ("C(-[N](-[O])-[O])-[N](-[O])-[O]", "C([N+](=O)[O-])[N+](=O)[O-]"),
+        ("C-[N](-[O])-[O].C-[N](-[O])-[O]", "C[N+](=O)[O-].C[N+](=O)[O-]"),
+        ("[C-](=O)-C", "[C](=O)-C"),
+        ("[H]-[N-]-C", "[H]-[N]-C"),
+        ("[O]-[C]1-[C](-[H])-[C](-[H])-[C](-[H])-[C](-[H])-[C](-[H])1", 
+         "[O-]c1ccccc1"),
+        ("[O]-[C]1-[C](-[H])-[C](-[H])-[C](-[H])-[C]1-[O]", 
+         "[O-]C1=CC=CC1=O"),
+        ("[H]-[C]-[C]-[C](-[H])-[C](-[H])-[H]", "C#CC=C"),
+        ("[H]-[C]-[C]-[C]-[C]-[H]", "C#CC#C"),
+    ])
+    def test_standardize_patterns(self, smi, out):
+        mol = Chem.MolFromSmiles(smi, sanitize=False)
+        mol.UpdatePropertyCache(strict=False)
+        _infer_bo_and_charges(mol)
+        mol = _standardize_patterns(mol)
+        Chem.SanitizeMol(mol)
+        mol = Chem.RemoveHs(mol)
+        molref = Chem.MolFromSmiles(out)
+        assert mol.HasSubstructMatch(molref) and molref.HasSubstructMatch(
+            mol), "{} != {}".format(Chem.MolToSmiles(mol), out)
+
+    @pytest.mark.parametrize("attr, value, getter", [
+        ("index", 42, "GetIntProp"),
+        ("index", np.int(42), "GetIntProp"),
+        ("charge", 4.2, "GetDoubleProp"),
+        ("charge", np.float(4.2), "GetDoubleProp"),
+        ("type", "C.3", "GetProp"),
+    ])
+    def test_set_atom_property(self, attr, value, getter):
+        atom = Chem.Atom(1)
+        prop = "_MDAnalysis_%s" % attr
+        _set_atom_property(atom, prop, value)
+        assert getattr(atom, getter)(prop) == value
+
+    @pytest.mark.parametrize("rdmol, product, name", [
+        ("dummy_reactant", "dummy_product", "props"),
+        ("dummy_reactant_noprops", "dummy_product", "noprops"),
+        ("dummy_reactant", "dummy_product_nomap", "nomap"),
+    ], indirect=["rdmol", "product"])
+    def test_reassign_props_after_reaction(self, rdmol, product, name):
+        _reassign_props_after_reaction(rdmol, product)
+        atom = product.GetAtomWithIdx(0)
+        if name == "props":
+            assert atom.GetProp("foo") == "bar"
+            assert atom.GetIntProp("_MDAnalysis_index") == 1
+            assert atom.GetDoubleProp("_MDAnalysis_charge") == 4.2
+            assert atom.GetProp("_MDAnalysis_type") == "C.3"
+            with pytest.raises(KeyError, match="old_mapno"):
+                atom.GetIntProp("old_mapno")
+            with pytest.raises(KeyError, match="react_atom_idx"):
+                atom.GetUnsignedProp("react_atom_idx")
+        elif name == "noprops":
+            with pytest.raises(KeyError, match="old_mapno"):
+                atom.GetIntProp("old_mapno")
+            with pytest.raises(KeyError, match="react_atom_idx"):
+                atom.GetUnsignedProp("react_atom_idx")
+        elif name == "nomap":
+            with pytest.raises(KeyError, match="react_atom_idx"):
+                atom.GetUnsignedProp("react_atom_idx")
+            with pytest.raises(KeyError, match="_MDAnalysis_index"):
+                atom.GetIntProp("_MDAnalysis_index")
+
+    @pytest.mark.parametrize("smi_in", [
+        "c1ccc(cc1)-c1ccccc1-c1ccccc1",
+        "c1cc[nH]c1",
+        "c1ccc(cc1)-c1ccc(-c2ccccc2)c(-c2ccccc2)c1-c1ccccc1",
+        "c1ccc2c(c1)c1ccccc1c1ccccc1c1ccccc1c1ccccc21",
+        "c1csc(c1)-c1ccoc1-c1cc[nH]c1",
+        "C1=C2C(=NC=N1)N=CN2",
+        "CN1C=NC(=C1SC2=NC=NC3=C2NC=N3)[N+](=O)[O-]",
+        "c1c[nH]c(c1)-c1ccc(s1)-c1ccoc1-c1c[nH]cc1-c1ccccc1",
+        "C=CC=CC=CC=CC=CC=C",
+        "NCCCCC([NH3+])C(=O)[O-]",
+        "CC(C=CC1=C(C)CCCC1(C)C)=CC=CC(C)=CC=[NH+]C",
+        "C#CC=C",
+    ])
+    def test_order_independant(self, smi_in):
+        # generate mol with hydrogens but without bond orders
+        ref = Chem.MolFromSmiles(smi_in)
+        template = Chem.AddHs(ref)
+        for atom in template.GetAtoms():
+            atom.SetIsAromatic(False)
+            atom.SetFormalCharge(0)
+        for bond in template.GetBonds():
+            bond.SetIsAromatic(False)
+            bond.SetBondType(Chem.BondType.SINGLE)
+
+        # go through each possible starting atom
+        for a in template.GetAtoms():
+            smi = Chem.MolToSmiles(template, rootedAtAtom=a.GetIdx())
+            m = Chem.MolFromSmiles(smi, sanitize=False)
+            for atom in m.GetAtoms():
+                atom.SetFormalCharge(0)
+                atom.SetNoImplicit(True)
+            m.UpdatePropertyCache(strict=False)
+            _infer_bo_and_charges(m)
+            m = _standardize_patterns(m)
+            Chem.SanitizeMol(m)
+            m = Chem.RemoveHs(m)
+            assert m.HasSubstructMatch(ref) and ref.HasSubstructMatch(
+                m), "Failed when starting from atom %s%d" % (
+                    a.GetSymbol(), a.GetIdx())
+
+    def test_warn_conjugated_max_iter(self):
+        smi = "[C-]C=CC=CC=CC=CC=CC=C[C-]"
+        mol = Chem.MolFromSmiles(smi)
+        with pytest.warns(UserWarning,
+                          match="reasonable number of iterations"):
+            _rebuild_conjugated_bonds(mol, 2)