diff --git a/arc/processor.py b/arc/processor.py index 3e4c1202e9..695aea3f19 100644 --- a/arc/processor.py +++ b/arc/processor.py @@ -5,6 +5,7 @@ import os import shutil import logging +from random import randint from arkane.input import species as arkane_species, transitionState as arkane_transition_state,\ reaction as arkane_reaction @@ -177,7 +178,14 @@ def process(self): if not species.is_ts and 'ALL converged' in self.output[species.label]['status']: species_for_thermo_lib.append(species) output_file_path = self._generate_arkane_species_file(species) - arkane_spc = arkane_species(str(species.label), species.arkane_file) + unique_arkane_species_label = False + while not unique_arkane_species_label: + try: + arkane_spc = arkane_species(str(species.label), species.arkane_file) + except ValueError: + species.label += '_(' + str(randint(0, 999)) + ')' + else: + unique_arkane_species_label = True if species.mol_list: arkane_spc.molecule = species.mol_list stat_mech_job = StatMechJob(arkane_spc, species.arkane_file) diff --git a/arc/rmgdb.py b/arc/rmgdb.py index d60694d00e..0edc870888 100644 --- a/arc/rmgdb.py +++ b/arc/rmgdb.py @@ -9,6 +9,7 @@ from rmgpy.data.rmg import RMGDatabase from rmgpy.reaction import isomorphic_species_lists from rmgpy.data.kinetics.common import find_degenerate_reactions +from rmgpy.exceptions import KineticsError from arc.arc_exceptions import InputError @@ -118,8 +119,12 @@ def load_rmg_database(rmgdb, thermo_libraries=None, reaction_libraries=None, kin depository=False, ) for family in rmgdb.kinetics.families.values(): - family.addKineticsRulesFromTrainingSet(thermoDatabase=rmgdb.thermo) - family.fillKineticsRulesByAveragingUp(verbose=False) + try: + family.addKineticsRulesFromTrainingSet(thermoDatabase=rmgdb.thermo) + except KineticsError: + logging.info('Could not train family {0}'.format(family)) + else: + family.fillKineticsRulesByAveragingUp(verbose=False) logging.info('\n\n') diff --git a/arc/scheduler.py b/arc/scheduler.py index 7cc87381c6..e5ff7f742e 100644 --- a/arc/scheduler.py +++ b/arc/scheduler.py @@ -503,9 +503,11 @@ def end_job(self, job, label, job_name): fine=job.fine, software=job.software, shift=job.shift, trsh=job.trsh, memory=job.memory, conformer=job.conformer, ess_trsh_methods=job.ess_trsh_methods, scan=job.scan, pivots=job.pivots, occ=job.occ, scan_trsh=job.scan_trsh, scan_res=job.scan_res) - self.running_jobs[label].pop(self.running_jobs[label].index(job_name)) + if job_name in self.running_jobs[label]: + self.running_jobs[label].pop(self.running_jobs[label].index(job_name)) if job.job_status[0] != 'running' and job.job_status[1] != 'running': - self.running_jobs[label].pop(self.running_jobs[label].index(job_name)) + if job_name in self.running_jobs[label]: + self.running_jobs[label].pop(self.running_jobs[label].index(job_name)) self.timer = False job.write_completed_job_to_csv_file() logging.info(' Ending job {name} for {label} (run time: {time})'.format(name=job.job_name, label=label, @@ -724,7 +726,7 @@ def determine_most_stable_conformer(self, label): energies, xyzs = (list(t) for t in zip(*sorted(zip(self.species_dict[label].conformer_energies, xyzs)))) smiles_list = list() for xyz in xyzs: - _, b_mol = molecules_from_xyz(xyz) + b_mol = molecules_from_xyz(xyz, multiplicity=self.species_dict[label].multiplicity)[1] smiles = b_mol.toSMILES() if b_mol is not None else 'no 2D structure' smiles_list.append(smiles) geo_dir = os.path.join(self.project_directory, 'output', 'Species', label, 'geometry') @@ -743,7 +745,7 @@ def determine_most_stable_conformer(self, label): # Run isomorphism checks if a 2D representation is available if self.species_dict[label].mol is not None: for i, xyz in enumerate(xyzs): - _, b_mol = molecules_from_xyz(xyz) + b_mol = molecules_from_xyz(xyz, multiplicity=self.species_dict[label].multiplicity)[1] if b_mol is not None: if check_isomorphism(self.species_dict[label].mol, b_mol): if i == 0: @@ -754,30 +756,35 @@ def determine_most_stable_conformer(self, label): else: logging.info('A conformer for species {0} was found to be isomorphic ' 'with the 2D graph representation {1}. This conformer is {2} kJ/mol ' - 'above the most stable one (which is not isomorphic). Using the ' - 'isomorphic conformer for further geometry optimization.'.format( - label, self.species_dict[label].mol.toSMILES(), - (energies[i] - energies[0]) * 0.001)) + 'above the most stable one which correspods to {3} (and is not' + ' isomorphic). Using the isomorphic conformer for further geometry ' + 'optimization.'.format(label, self.species_dict[label].mol.toSMILES(), + (energies[i] - energies[0]) * 0.001, + molecules_from_xyz(xyzs[0], multiplicity=self.species_dict[label].multiplicity)[1])) conformer_xyz = xyz self.output[label]['status'] += 'passed isomorphism check but not for the most stable' \ ' conformer; ' break else: if i == 0: - logging.warn('Most stable conformer for species {0} was found to be NON-isomorphic ' - 'with the 2D graph representation {1}. Searching for a different ' - 'conformer that is isomorphic'.format(label, b_mol.toSMILES())) + logging.warn('Most stable conformer for species {0} with structure {1} was found to ' + 'be NON-isomorphic with the 2D graph representation {2}. Searching for a ' + 'different conformer that is isomorphic...'.format(label, b_mol.toSMILES(), + self.species_dict[label].mol.toSMILES())) else: + smiles_list = list() + for xyz in xyzs: + smiles_list.append(molecules_from_xyz(xyz, multiplicity=self.species_dict[label].multiplicity)[1]) if self.allow_nonisomorphic_2d: # we'll optimize the most stable conformer even if it not isomorphic to the 2D graph logging.error('No conformer for {0} was found to be isomorphic with the 2D graph representation' - ' {1}. Optimizing the most stable conformer anyway.'.format( - label, self.species_dict[label].mol.toSMILES())) + ' {1} (got: {2}). Optimizing the most stable conformer anyway.'.format( + label, self.species_dict[label].mol.toSMILES(), smiles_list)) conformer_xyz = xyzs[0] else: logging.error('No conformer for {0} was found to be isomorphic with the 2D graph representation' - ' {1}. NOT optimizing this species.'.format( - label, self.species_dict[label].mol.toSMILES())) + ' {1} (got: {2}). NOT optimizing this species.'.format( + label, self.species_dict[label].mol.toSMILES(), smiles_list)) self.output[label]['status'] += 'Error: No conformer was found to be isomorphic with the 2D' \ ' graph representation! ' else: diff --git a/arc/species/converter.py b/arc/species/converter.py index 2a6a061f86..dd8f43f996 100644 --- a/arc/species/converter.py +++ b/arc/species/converter.py @@ -169,13 +169,14 @@ def elementize(atom): atom.atomType = atom_type[0] -def molecules_from_xyz(xyz): +def molecules_from_xyz(xyz, multiplicity=None): """ Creating RMG:Molecule objects from xyz with correct atom labeling `xyz` is in a string format returns `s_mol` (with only single bonds) and `b_mol` (with best guesses for bond orders) This function is based on the MolGraph.perceive_smiles method Returns None for b_mol is unsuccessful to infer bond orders + If `multiplicity` is given, the returned species multiplicity will be set to it. """ if xyz is None: return None, None @@ -197,13 +198,85 @@ def molecules_from_xyz(xyz): if pybel_mol is not None: inchi = pybel_to_inchi(pybel_mol) mol_bo = rmg_mol_from_inchi(inchi) # An RMG Molecule with bond orders, but without preserved atom order + if mol_bo is not None: + if multiplicity is not None: + set_multiplicity(mol_bo, multiplicity) + mol_s1_updated.multiplicity = mol_bo.multiplicity + order_atoms(ref_mol=mol_s1_updated, mol=mol_bo) + set_multiplicity(mol_s1_updated, mol_bo.multiplicity, radical_map=mol_bo) else: mol_bo = None - order_atoms(ref_mol=mol_s1_updated, mol=mol_bo) s_mol, b_mol = mol_s1_updated, mol_bo return s_mol, b_mol +def set_multiplicity(mol, multiplicity, radical_map=None): + """ + Set the multiplicity of `mol` to `multiplicity` and change radicals as needed + if a `radical_map`, which is an RMG Molecule object with the same atom order, is given, + it'll be used to set radicals (useful if bond orders aren't known for a molecule) + """ + mol.multiplicity = multiplicity + if radical_map is not None: + if not isinstance(radical_map, Molecule): + raise TypeError('radical_map sent to set_multiplicity() has to be a Molecule object. Got {0}'.format( + type(radical_map))) + set_radicals_by_map(mol, radical_map) + radicals = mol.getRadicalCount() + if mol.multiplicity != radicals + 1: + # this is not the trivial "multiplicity = number of radicals + 1" case + # either the number of radicals was not identified correctly from the 3D structure (i.e., should be lone pairs), + # or their spin isn't determined correctly + if mol.multiplicity > radicals + 1: + # there are sites that should have radicals, but were'nt identified as such. + # try adding radicals according to missing valances + add_rads_by_atom_valance(mol) + if mol.multiplicity > radicals + 1: + # still problematic, currently there's no automated solution to this case, raise an error + raise SpeciesError('A multiplicity of {0} was given, but only {1} radicals were identified. ' + 'Cannot infer 2D graph representation for this species.\nMore info:{2}\n{3}'.format( + mol.multiplicity, radicals, mol.toSMILES(), mol.toAdjacencyList())) + if len(mol.atoms) == 1 and mol.multiplicity == 1 and mol.atoms[0].radicalElectrons == 4: + # This is a singlet atomic C or Si + mol.atoms[0].radicalElectrons = 0 + mol.atoms[0].lonePairs = 2 + if mol.multiplicity < radicals + 1: + # make sure all cabene and nitrene sites, if exist, have lone pairs rather than two unpaired electrons + for atom in mol.atoms: + if atom.radicalElectrons == 2: + atom.radicalElectrons = 0 + atom.lonePairs += 1 + # final check: an even number of radicals results in an odd multiplicity, and vice versa + if divmod(mol.multiplicity, 2)[1] == divmod(radicals, 2)[1]: + raise SpeciesError('Number of radicals ({0}) and multiplicity ({1}) for {2} do not match.\n{3}'.format( + radicals, mol.multiplicity, mol.toSMILES(), mol.toAdjacencyList())) + + +def add_rads_by_atom_valance(mol): + """ + A helper function for assigning radicals if not identified automatically + and missing according to the given multiplicity + We assume here that all partial charges are already set, but this assumption could be wrong + This implementation might also be problematic for aromatic species with undefined bond orders + """ + for atom in mol.atoms: + if atom.isNonHydrogen(): + atomic_orbitals = atom.lonePairs + atom.radicalElectrons + atom.getBondOrdersForAtom() + missing_electrons = 4 - atomic_orbitals + if missing_electrons: + atom.radicalElectrons = missing_electrons + # print(mol.toAdjacencyList()) + + +def set_radicals_by_map(mol, radical_map): + """Set radicals in `mol` by `radical_map`, bot are RMG Molecule objects with the same atom order""" + for i, atom in enumerate(mol.atoms): + if atom.element.number != radical_map.atoms[i].element.number: + raise ValueError('Atom order in mol and radical_map in set_radicals_by_map() do not match. ' + '{0} is not {1}.'.format(atom.element.symbol, radical_map.atoms[i].symbol)) + atom.radicalElectrons = radical_map.atoms[i].radicalElectrons + + def order_atoms_in_mol_list(ref_mol, mol_list): """Order the atoms in all molecules of mol_list by the atom order in ref_mol""" for mol in mol_list: @@ -260,6 +333,7 @@ def update_molecule(mol, to_single_bonds=False): bond = Bond(atom_mapping[atom1], atom_mapping[atom2], bond_order) new_mol.addBond(bond) new_mol.updateAtomTypes() + new_mol.multiplicity = mol.multiplicity return new_mol diff --git a/arc/species/converterTest.py b/arc/species/converterTest.py index 67a90860ca..08d85c293e 100644 --- a/arc/species/converterTest.py +++ b/arc/species/converterTest.py @@ -9,6 +9,7 @@ import unittest from rmgpy.molecule.molecule import Molecule +from rmgpy.species import Species import arc.species.converter as converter from arc.species.species import ARCSpecies @@ -97,6 +98,18 @@ def setUpClass(cls): H 1.1294795781 -0.8708998550 0.7537444446 H 1.4015274689 -0.6230592706 -0.8487058662""" + nh_s_adj = str("""1 N u0 p2 c0 {2,S} + 2 H u0 p0 c0 {1,S}""") + nh_s_xyz = str("""N 0.50949998 0.00000000 0.00000000 + H -0.50949998 0.00000000 0.00000000""") + cls.spc1 = ARCSpecies(label=str('NH2(S)'), adjlist=nh_s_adj, xyz=nh_s_xyz, multiplicity=1, charge=0) + spc = Species().fromAdjacencyList(nh_s_adj) + cls.spc2 = ARCSpecies(label=str('NH2(S)'), rmg_species=spc, xyz=nh_s_xyz) + + cls.spc3 = ARCSpecies(label=str('NCN(S)'), smiles=str('[N]=C=[N]'), multiplicity=1, charge=0) + + cls.spc4 = ARCSpecies(label=str('NCN(T)'), smiles=str('[N]=C=[N]'), multiplicity=3, charge=0) + def test_get_xyz_string(self): """Test conversion of xyz array to string format""" xyz_array = [[-0.06618943, -0.12360663, -0.07631983], @@ -801,6 +814,17 @@ def test_s_bonds_mol_from_xyz(self): self.assertEqual(len(mol4.atoms), 24) self.assertEqual(len(mol5.atoms), 3) + def set_radicals_correctly_from_xyz(self): + """Test that we determine the number of radicals correctly from given xyz and multiplicity""" + self.assertEqual(self.spc1.multiplicity, 1) # NH(S), a nitrene + self.assertTrue(all([atom.radicalElectrons == 0 for atom in self.spc1.mol.atoms])) + self.assertEqual(self.spc2.multiplicity, 1) # NH(S), a nitrene + self.assertTrue(all([atom.radicalElectrons == 0 for atom in self.spc2.mol.atoms])) + self.assertEqual(self.spc3.multiplicity, 1) # NCN(S), a singlet birad + self.assertTrue(all([atom.radicalElectrons == 1 for atom in self.spc3.mol.atoms if atom.isNitrogen()])) + self.assertEqual(self.spc3.multiplicity, 3) # NCN(T) + self.assertTrue(all([atom.radicalElectrons == 1 for atom in self.spc3.mol.atoms if atom.isNitrogen()])) + ################################################################################ diff --git a/arc/species/species.py b/arc/species/species.py index 346b41a77c..b089782667 100644 --- a/arc/species/species.py +++ b/arc/species/species.py @@ -811,7 +811,7 @@ def mol_from_xyz(self, xyz=None): if self.mol is not None: # self.mol should have come from another source, e.g. SMILES or yml original_mol = self.mol - self.mol = molecules_from_xyz(xyz)[1] + self.mol = molecules_from_xyz(xyz, multiplicity=self.multiplicity)[1] if self.mol is not None and not check_isomorphism(original_mol, self.mol): raise InputError('XYZ and the 2D graph representation of the Molecule are not isomorphic.\n' @@ -819,14 +819,15 @@ def mol_from_xyz(self, xyz=None): xyz, self.mol.toSMILES(), self.mol.toAdjacencyList(), original_mol.toSMILES(), original_mol.toAdjacencyList())) else: - self.mol = molecules_from_xyz(xyz)[1] + self.mol = molecules_from_xyz(xyz, multiplicity=self.multiplicity)[1] if self.mol_list is None: # Assign atom ids first, so they carry through to the resonance structures self.mol.assignAtomIDs() # The generate_resonance_structures method changes atom order # Make a copy so we don't disturb the original order from xyz - self.mol_list = self.mol.copy(deep=True).generate_resonance_structures(keep_isomorphic=False, filter_structures=True) + self.mol_list = self.mol.copy(deep=True).generate_resonance_structures(keep_isomorphic=False, + filter_structures=True) order_atoms_in_mol_list(ref_mol=self.mol, mol_list=self.mol_list) diff --git a/arc/species/speciesTest.py b/arc/species/speciesTest.py index a54ce876a9..4da7b8fced 100644 --- a/arc/species/speciesTest.py +++ b/arc/species/speciesTest.py @@ -77,6 +77,12 @@ def setUpClass(cls): H 1.84477700 -0.57224200 0.35517700""") cls.spc8 = ARCSpecies(label=str('HSO3'), xyz=hso3_xyz, multiplicity=2, charge=0, smiles=str('O[S](=O)=O')) + nh_s_adj = str("""1 N u0 p2 c0 {2,S} + 2 H u0 p0 c0 {1,S}""") + nh_s_xyz = str("""N 0.50949998 0.00000000 0.00000000 + H -0.50949998 0.00000000 0.00000000""") + cls.spc9 = ARCSpecies(label=str('NH2(S)'), adjlist=nh_s_adj, xyz=nh_s_xyz, multiplicity=1, charge=0) + def test_conformers(self): """Test conformer generation""" self.spc1.generate_conformers() # vinoxy has two res. structures, each is assigned two conformers (RDkit/ob) @@ -260,18 +266,18 @@ def test_is_linear(self): C -0.77966935 0.95278385 0.00000000 H -1.23666197 3.17751246 0.00000000 H -0.56023545 -0.09447399 0.00000000""" # just 0.5 degree off from linearity, so NOT linear... - xyz9 = """C -1.1998 0.1610 0.0275 - C -1.4021 0.6223 -0.8489 - C -1.48302 0.80682 -1.19946""" # just 3 points in space on a straight line (not a physical molecule) + xyz9 = """O -1.1998 0.1610 0.0275 + O -1.4021 0.6223 -0.8489 + O -1.48302 0.80682 -1.19946""" # just 3 points in space on a straight line (not a physical molecule) spc1 = ARCSpecies(label=str('test_spc'), xyz=xyz1, multiplicity=1, charge=0, smiles=str('O=C=O')) spc2 = ARCSpecies(label=str('test_spc'), xyz=xyz2, multiplicity=1, charge=0, smiles=str('[NH-][S+](=O)(O)C')) - spc3 = ARCSpecies(label=str('test_spc'), xyz=xyz3, multiplicity=1, charge=0, smiles=str('[O]N=O')) - spc4 = ARCSpecies(label=str('test_spc'), xyz=xyz4, multiplicity=1, charge=0, smiles=str('[NH2]')) - spc5 = ARCSpecies(label=str('test_spc'), xyz=xyz5, multiplicity=1, charge=0, smiles=str('[O]S')) + spc3 = ARCSpecies(label=str('test_spc'), xyz=xyz3, multiplicity=2, charge=0, smiles=str('[O]N=O')) + spc4 = ARCSpecies(label=str('test_spc'), xyz=xyz4, multiplicity=2, charge=0, smiles=str('[NH2]')) + spc5 = ARCSpecies(label=str('test_spc'), xyz=xyz5, multiplicity=2, charge=0, smiles=str('[O]S')) spc6 = ARCSpecies(label=str('test_spc'), xyz=xyz6, multiplicity=1, charge=0, smiles=str('C#N')) spc7 = ARCSpecies(label=str('test_spc'), xyz=xyz7, multiplicity=1, charge=0, smiles=str('C#C')) spc8 = ARCSpecies(label=str('test_spc'), xyz=xyz8, multiplicity=1, charge=0, smiles=str('C#C')) - spc9 = ARCSpecies(label=str('test_spc'), xyz=xyz9, multiplicity=1, charge=0, smiles=str('[C+]C#[C-]')) + spc9 = ARCSpecies(label=str('test_spc'), xyz=xyz9, multiplicity=1, charge=0, smiles=str('[O-][O+]=O')) self.assertTrue(spc1.is_linear()) self.assertTrue(spc6.is_linear()) @@ -497,6 +503,15 @@ def test_mol_from_xyz_atom_id_2(self): self.assertEqual(mol_ids, res1_ids) self.assertEqual(mol_ids, res2_ids) + def test_preserving_multiplicity(self): + """Test that multiplicity is being preserved, especially when it is guessed differently from xyz""" + multiplicity_list = [2, 2, 1, 1, 1, 1, 1, 2, 1] + for i, spc in enumerate([self.spc1, self.spc2, self.spc3, self.spc4, self.spc5, self.spc6, self.spc7, + self.spc8, self.spc9]): + self.assertEqual(spc.multiplicity, multiplicity_list[i]) + self.assertEqual(spc.mol.multiplicity, multiplicity_list[i]) + self.assertTrue(all([structure.multiplicity == spc.multiplicity for structure in spc.mol_list])) + class TestTSGuess(unittest.TestCase): """