Merge pull request #99 from ReactionMechanismGenerator/multiplicity

Preserve multiplicity throughout Species representations
ReactionMechanismGenerator · Mar 20, 2019 · 11b68e8 · 11b68e8
2 parents 1ff052f + 7fc0755
commit 11b68e8
Show file tree

Hide file tree

Showing 7 changed files with 164 additions and 30 deletions.
diff --git a/arc/processor.py b/arc/processor.py
@@ -5,6 +5,7 @@
 import os
 import shutil
 import logging
+from random import randint
 
 from arkane.input import species as arkane_species, transitionState as arkane_transition_state,\
     reaction as arkane_reaction
@@ -177,7 +178,14 @@ def process(self):
             if not species.is_ts and 'ALL converged' in self.output[species.label]['status']:
                 species_for_thermo_lib.append(species)
                 output_file_path = self._generate_arkane_species_file(species)
-                arkane_spc = arkane_species(str(species.label), species.arkane_file)
+                unique_arkane_species_label = False
+                while not unique_arkane_species_label:
+                    try:
+                        arkane_spc = arkane_species(str(species.label), species.arkane_file)
+                    except ValueError:
+                        species.label += '_(' + str(randint(0, 999)) + ')'
+                    else:
+                        unique_arkane_species_label = True
                 if species.mol_list:
                     arkane_spc.molecule = species.mol_list
                 stat_mech_job = StatMechJob(arkane_spc, species.arkane_file)

diff --git a/arc/rmgdb.py b/arc/rmgdb.py
@@ -9,6 +9,7 @@
 from rmgpy.data.rmg import RMGDatabase
 from rmgpy.reaction import isomorphic_species_lists
 from rmgpy.data.kinetics.common import find_degenerate_reactions
+from rmgpy.exceptions import KineticsError
 
 from arc.arc_exceptions import InputError
 
@@ -118,8 +119,12 @@ def load_rmg_database(rmgdb, thermo_libraries=None, reaction_libraries=None, kin
         depository=False,
     )
     for family in rmgdb.kinetics.families.values():
-        family.addKineticsRulesFromTrainingSet(thermoDatabase=rmgdb.thermo)
-        family.fillKineticsRulesByAveragingUp(verbose=False)
+        try:
+            family.addKineticsRulesFromTrainingSet(thermoDatabase=rmgdb.thermo)
+        except KineticsError:
+            logging.info('Could not train family {0}'.format(family))
+        else:
+            family.fillKineticsRulesByAveragingUp(verbose=False)
     logging.info('\n\n')
 
 

diff --git a/arc/scheduler.py b/arc/scheduler.py
@@ -503,9 +503,11 @@ def end_job(self, job, label, job_name):
                          fine=job.fine, software=job.software, shift=job.shift, trsh=job.trsh, memory=job.memory,
                          conformer=job.conformer, ess_trsh_methods=job.ess_trsh_methods, scan=job.scan,
                          pivots=job.pivots, occ=job.occ, scan_trsh=job.scan_trsh, scan_res=job.scan_res)
-            self.running_jobs[label].pop(self.running_jobs[label].index(job_name))
+            if job_name in self.running_jobs[label]:
+                self.running_jobs[label].pop(self.running_jobs[label].index(job_name))
         if job.job_status[0] != 'running' and job.job_status[1] != 'running':
-            self.running_jobs[label].pop(self.running_jobs[label].index(job_name))
+            if job_name in self.running_jobs[label]:
+                self.running_jobs[label].pop(self.running_jobs[label].index(job_name))
             self.timer = False
             job.write_completed_job_to_csv_file()
             logging.info('  Ending job {name} for {label} (run time: {time})'.format(name=job.job_name, label=label,
@@ -724,7 +726,7 @@ def determine_most_stable_conformer(self, label):
             energies, xyzs = (list(t) for t in zip(*sorted(zip(self.species_dict[label].conformer_energies, xyzs))))
             smiles_list = list()
             for xyz in xyzs:
-                _, b_mol = molecules_from_xyz(xyz)
+                b_mol = molecules_from_xyz(xyz, multiplicity=self.species_dict[label].multiplicity)[1]
                 smiles = b_mol.toSMILES() if b_mol is not None else 'no 2D structure'
                 smiles_list.append(smiles)
             geo_dir = os.path.join(self.project_directory, 'output', 'Species', label, 'geometry')
@@ -743,7 +745,7 @@ def determine_most_stable_conformer(self, label):
             # Run isomorphism checks if a 2D representation is available
             if self.species_dict[label].mol is not None:
                 for i, xyz in enumerate(xyzs):
-                    _, b_mol = molecules_from_xyz(xyz)
+                    b_mol = molecules_from_xyz(xyz, multiplicity=self.species_dict[label].multiplicity)[1]
                     if b_mol is not None:
                         if check_isomorphism(self.species_dict[label].mol, b_mol):
                             if i == 0:
@@ -754,30 +756,35 @@ def determine_most_stable_conformer(self, label):
                             else:
                                 logging.info('A conformer for species {0} was found to be isomorphic '
                                              'with the 2D graph representation {1}. This conformer is {2} kJ/mol '
-                                             'above the most stable one (which is not isomorphic). Using the '
-                                             'isomorphic conformer for further geometry optimization.'.format(
-                                              label, self.species_dict[label].mol.toSMILES(),
-                                              (energies[i] - energies[0]) * 0.001))
+                                             'above the most stable one which correspods to  {3} (and is not'
+                                             ' isomorphic). Using the isomorphic conformer for further geometry '
+                                             'optimization.'.format(label, self.species_dict[label].mol.toSMILES(),
+                                                                    (energies[i] - energies[0]) * 0.001,
+                                 molecules_from_xyz(xyzs[0], multiplicity=self.species_dict[label].multiplicity)[1]))
                                 conformer_xyz = xyz
                                 self.output[label]['status'] += 'passed isomorphism check but not for the most stable' \
                                                                 ' conformer; '
                             break
                         else:
                             if i == 0:
-                                logging.warn('Most stable conformer for species {0} was found to be NON-isomorphic '
-                                             'with the 2D graph representation {1}. Searching for a different '
-                                             'conformer that is isomorphic'.format(label, b_mol.toSMILES()))
+                                logging.warn('Most stable conformer for species {0} with structure {1} was found to '
+                                             'be NON-isomorphic with the 2D graph representation {2}. Searching for a '
+                                             'different conformer that is isomorphic...'.format(label, b_mol.toSMILES(),
+                                                                            self.species_dict[label].mol.toSMILES()))
                 else:
+                    smiles_list = list()
+                    for xyz in xyzs:
+                        smiles_list.append(molecules_from_xyz(xyz, multiplicity=self.species_dict[label].multiplicity)[1])
                     if self.allow_nonisomorphic_2d:
                         # we'll optimize the most stable conformer even if it not isomorphic to the 2D graph
                         logging.error('No conformer for {0} was found to be isomorphic with the 2D graph representation'
-                                      ' {1}. Optimizing the most stable conformer anyway.'.format(
-                                       label, self.species_dict[label].mol.toSMILES()))
+                                      ' {1} (got: {2}). Optimizing the most stable conformer anyway.'.format(
+                                       label, self.species_dict[label].mol.toSMILES(), smiles_list))
                         conformer_xyz = xyzs[0]
                     else:
                         logging.error('No conformer for {0} was found to be isomorphic with the 2D graph representation'
-                                      ' {1}. NOT optimizing this species.'.format(
-                                       label, self.species_dict[label].mol.toSMILES()))
+                                      ' {1} (got: {2}). NOT optimizing this species.'.format(
+                                       label, self.species_dict[label].mol.toSMILES(), smiles_list))
                         self.output[label]['status'] += 'Error: No conformer was found to be isomorphic with the 2D' \
                                                         ' graph representation! '
             else:

diff --git a/arc/species/converter.py b/arc/species/converter.py
@@ -169,13 +169,14 @@ def elementize(atom):
         atom.atomType = atom_type[0]
 
 
-def molecules_from_xyz(xyz):
+def molecules_from_xyz(xyz, multiplicity=None):
     """
     Creating RMG:Molecule objects from xyz with correct atom labeling
     `xyz` is in a string format
     returns `s_mol` (with only single bonds) and `b_mol` (with best guesses for bond orders)
     This function is based on the MolGraph.perceive_smiles method
     Returns None for b_mol is unsuccessful to infer bond orders
+    If `multiplicity` is given, the returned species multiplicity will be set to it.
     """
     if xyz is None:
         return None, None
@@ -197,13 +198,85 @@ def molecules_from_xyz(xyz):
     if pybel_mol is not None:
         inchi = pybel_to_inchi(pybel_mol)
         mol_bo = rmg_mol_from_inchi(inchi)  # An RMG Molecule with bond orders, but without preserved atom order
+        if mol_bo is not None:
+            if multiplicity is not None:
+                set_multiplicity(mol_bo, multiplicity)
+            mol_s1_updated.multiplicity = mol_bo.multiplicity
+            order_atoms(ref_mol=mol_s1_updated, mol=mol_bo)
+            set_multiplicity(mol_s1_updated, mol_bo.multiplicity, radical_map=mol_bo)
     else:
         mol_bo = None
-    order_atoms(ref_mol=mol_s1_updated, mol=mol_bo)
     s_mol, b_mol = mol_s1_updated, mol_bo
     return s_mol, b_mol
 
 
+def set_multiplicity(mol, multiplicity, radical_map=None):
+    """
+    Set the multiplicity of `mol` to `multiplicity` and change radicals as needed
+    if a `radical_map`, which is an RMG Molecule object with the same atom order, is given,
+    it'll be used to set radicals (useful if bond orders aren't known for a molecule)
+    """
+    mol.multiplicity = multiplicity
+    if radical_map is not None:
+        if not isinstance(radical_map, Molecule):
+            raise TypeError('radical_map sent to set_multiplicity() has to be a Molecule object. Got {0}'.format(
+                type(radical_map)))
+        set_radicals_by_map(mol, radical_map)
+    radicals = mol.getRadicalCount()
+    if mol.multiplicity != radicals + 1:
+        # this is not the trivial "multiplicity = number of radicals + 1" case
+        # either the number of radicals was not identified correctly from the 3D structure (i.e., should be lone pairs),
+        # or their spin isn't determined correctly
+        if mol.multiplicity > radicals + 1:
+            # there are sites that should have radicals, but were'nt identified as such.
+            # try adding radicals according to missing valances
+            add_rads_by_atom_valance(mol)
+            if mol.multiplicity > radicals + 1:
+                # still problematic, currently there's no automated solution to this case, raise an error
+                raise SpeciesError('A multiplicity of {0} was given, but only {1} radicals were identified. '
+                                   'Cannot infer 2D graph representation for this species.\nMore info:{2}\n{3}'.format(
+                                    mol.multiplicity, radicals, mol.toSMILES(), mol.toAdjacencyList()))
+        if len(mol.atoms) == 1 and mol.multiplicity == 1 and mol.atoms[0].radicalElectrons == 4:
+            # This is a singlet atomic C or Si
+            mol.atoms[0].radicalElectrons = 0
+            mol.atoms[0].lonePairs = 2
+        if mol.multiplicity < radicals + 1:
+            # make sure all cabene and nitrene sites, if exist, have lone pairs rather than two unpaired electrons
+            for atom in mol.atoms:
+                if atom.radicalElectrons == 2:
+                    atom.radicalElectrons = 0
+                    atom.lonePairs += 1
+    # final check: an even number of radicals results in an odd multiplicity, and vice versa
+    if divmod(mol.multiplicity, 2)[1] == divmod(radicals, 2)[1]:
+        raise SpeciesError('Number of radicals ({0}) and multiplicity ({1}) for {2} do not match.\n{3}'.format(
+            radicals, mol.multiplicity, mol.toSMILES(), mol.toAdjacencyList()))
+
+
+def add_rads_by_atom_valance(mol):
+    """
+    A helper function for assigning radicals if not identified automatically
+    and missing according to the given multiplicity
+    We assume here that all partial charges are already set, but this assumption could be wrong
+    This implementation might also be problematic for aromatic species with undefined bond orders
+    """
+    for atom in mol.atoms:
+        if atom.isNonHydrogen():
+            atomic_orbitals = atom.lonePairs + atom.radicalElectrons + atom.getBondOrdersForAtom()
+            missing_electrons = 4 - atomic_orbitals
+            if missing_electrons:
+                atom.radicalElectrons = missing_electrons
+            # print(mol.toAdjacencyList())
+
+
+def set_radicals_by_map(mol, radical_map):
+    """Set radicals in `mol` by `radical_map`, bot are RMG Molecule objects with the same atom order"""
+    for i, atom in enumerate(mol.atoms):
+        if atom.element.number != radical_map.atoms[i].element.number:
+            raise ValueError('Atom order in mol and radical_map in set_radicals_by_map() do not match. '
+                             '{0} is not {1}.'.format(atom.element.symbol, radical_map.atoms[i].symbol))
+        atom.radicalElectrons = radical_map.atoms[i].radicalElectrons
+
+
 def order_atoms_in_mol_list(ref_mol, mol_list):
     """Order the atoms in all molecules of mol_list by the atom order in ref_mol"""
     for mol in mol_list:
@@ -260,6 +333,7 @@ def update_molecule(mol, to_single_bonds=False):
             bond = Bond(atom_mapping[atom1], atom_mapping[atom2], bond_order)
             new_mol.addBond(bond)
     new_mol.updateAtomTypes()
+    new_mol.multiplicity = mol.multiplicity
     return new_mol
 
 

diff --git a/arc/species/converterTest.py b/arc/species/converterTest.py
@@ -9,6 +9,7 @@
 import unittest
 
 from rmgpy.molecule.molecule import Molecule
+from rmgpy.species import Species
 
 import arc.species.converter as converter
 from arc.species.species import ARCSpecies
@@ -97,6 +98,18 @@ def setUpClass(cls):
         H       1.1294795781    -0.8708998550     0.7537444446
         H       1.4015274689    -0.6230592706    -0.8487058662"""
 
+        nh_s_adj = str("""1 N u0 p2 c0 {2,S}
+                          2 H u0 p0 c0 {1,S}""")
+        nh_s_xyz = str("""N       0.50949998    0.00000000    0.00000000
+                          H      -0.50949998    0.00000000    0.00000000""")
+        cls.spc1 = ARCSpecies(label=str('NH2(S)'), adjlist=nh_s_adj, xyz=nh_s_xyz, multiplicity=1, charge=0)
+        spc = Species().fromAdjacencyList(nh_s_adj)
+        cls.spc2 = ARCSpecies(label=str('NH2(S)'), rmg_species=spc, xyz=nh_s_xyz)
+
+        cls.spc3 = ARCSpecies(label=str('NCN(S)'), smiles=str('[N]=C=[N]'), multiplicity=1, charge=0)
+
+        cls.spc4 = ARCSpecies(label=str('NCN(T)'), smiles=str('[N]=C=[N]'), multiplicity=3, charge=0)
+
     def test_get_xyz_string(self):
         """Test conversion of xyz array to string format"""
         xyz_array = [[-0.06618943, -0.12360663, -0.07631983],
@@ -801,6 +814,17 @@ def test_s_bonds_mol_from_xyz(self):
         self.assertEqual(len(mol4.atoms), 24)
         self.assertEqual(len(mol5.atoms), 3)
 
+    def set_radicals_correctly_from_xyz(self):
+        """Test that we determine the number of radicals correctly from given xyz and multiplicity"""
+        self.assertEqual(self.spc1.multiplicity, 1)  # NH(S), a nitrene
+        self.assertTrue(all([atom.radicalElectrons == 0 for atom in self.spc1.mol.atoms]))
+        self.assertEqual(self.spc2.multiplicity, 1)  # NH(S), a nitrene
+        self.assertTrue(all([atom.radicalElectrons == 0 for atom in self.spc2.mol.atoms]))
+        self.assertEqual(self.spc3.multiplicity, 1)  # NCN(S), a singlet birad
+        self.assertTrue(all([atom.radicalElectrons == 1 for atom in self.spc3.mol.atoms if atom.isNitrogen()]))
+        self.assertEqual(self.spc3.multiplicity, 3)  # NCN(T)
+        self.assertTrue(all([atom.radicalElectrons == 1 for atom in self.spc3.mol.atoms if atom.isNitrogen()]))
+
 
 ################################################################################
 

diff --git a/arc/species/species.py b/arc/species/species.py
@@ -811,22 +811,23 @@ def mol_from_xyz(self, xyz=None):
         if self.mol is not None:
             # self.mol should have come from another source, e.g. SMILES or yml
             original_mol = self.mol
-            self.mol = molecules_from_xyz(xyz)[1]
+            self.mol = molecules_from_xyz(xyz, multiplicity=self.multiplicity)[1]
 
             if self.mol is not None and not check_isomorphism(original_mol, self.mol):
                 raise InputError('XYZ and the 2D graph representation of the Molecule are not isomorphic.\n'
                                  'Got xyz:\n{0}\n\nwhich corresponds to {1}\n{2}\n\nand: {3}\n{4}'.format(
                                    xyz, self.mol.toSMILES(), self.mol.toAdjacencyList(),
                                    original_mol.toSMILES(), original_mol.toAdjacencyList()))
         else:
-            self.mol = molecules_from_xyz(xyz)[1]
+            self.mol = molecules_from_xyz(xyz, multiplicity=self.multiplicity)[1]
 
         if self.mol_list is None:
             # Assign atom ids first, so they carry through to the resonance structures
             self.mol.assignAtomIDs()
             # The generate_resonance_structures method changes atom order
             # Make a copy so we don't disturb the original order from xyz
-            self.mol_list = self.mol.copy(deep=True).generate_resonance_structures(keep_isomorphic=False, filter_structures=True)
+            self.mol_list = self.mol.copy(deep=True).generate_resonance_structures(keep_isomorphic=False,
+                                                                                   filter_structures=True)
         order_atoms_in_mol_list(ref_mol=self.mol, mol_list=self.mol_list)