Skip to content

Commit

Permalink
Merge pull request #99 from ReactionMechanismGenerator/multiplicity
Browse files Browse the repository at this point in the history
 Preserve multiplicity throughout Species representations
  • Loading branch information
alongd committed Mar 20, 2019
2 parents 1ff052f + 7fc0755 commit 11b68e8
Show file tree
Hide file tree
Showing 7 changed files with 164 additions and 30 deletions.
10 changes: 9 additions & 1 deletion arc/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import os
import shutil
import logging
from random import randint

from arkane.input import species as arkane_species, transitionState as arkane_transition_state,\
reaction as arkane_reaction
Expand Down Expand Up @@ -177,7 +178,14 @@ def process(self):
if not species.is_ts and 'ALL converged' in self.output[species.label]['status']:
species_for_thermo_lib.append(species)
output_file_path = self._generate_arkane_species_file(species)
arkane_spc = arkane_species(str(species.label), species.arkane_file)
unique_arkane_species_label = False
while not unique_arkane_species_label:
try:
arkane_spc = arkane_species(str(species.label), species.arkane_file)
except ValueError:
species.label += '_(' + str(randint(0, 999)) + ')'
else:
unique_arkane_species_label = True
if species.mol_list:
arkane_spc.molecule = species.mol_list
stat_mech_job = StatMechJob(arkane_spc, species.arkane_file)
Expand Down
9 changes: 7 additions & 2 deletions arc/rmgdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from rmgpy.data.rmg import RMGDatabase
from rmgpy.reaction import isomorphic_species_lists
from rmgpy.data.kinetics.common import find_degenerate_reactions
from rmgpy.exceptions import KineticsError

from arc.arc_exceptions import InputError

Expand Down Expand Up @@ -118,8 +119,12 @@ def load_rmg_database(rmgdb, thermo_libraries=None, reaction_libraries=None, kin
depository=False,
)
for family in rmgdb.kinetics.families.values():
family.addKineticsRulesFromTrainingSet(thermoDatabase=rmgdb.thermo)
family.fillKineticsRulesByAveragingUp(verbose=False)
try:
family.addKineticsRulesFromTrainingSet(thermoDatabase=rmgdb.thermo)
except KineticsError:
logging.info('Could not train family {0}'.format(family))
else:
family.fillKineticsRulesByAveragingUp(verbose=False)
logging.info('\n\n')


Expand Down
37 changes: 22 additions & 15 deletions arc/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,9 +503,11 @@ def end_job(self, job, label, job_name):
fine=job.fine, software=job.software, shift=job.shift, trsh=job.trsh, memory=job.memory,
conformer=job.conformer, ess_trsh_methods=job.ess_trsh_methods, scan=job.scan,
pivots=job.pivots, occ=job.occ, scan_trsh=job.scan_trsh, scan_res=job.scan_res)
self.running_jobs[label].pop(self.running_jobs[label].index(job_name))
if job_name in self.running_jobs[label]:
self.running_jobs[label].pop(self.running_jobs[label].index(job_name))
if job.job_status[0] != 'running' and job.job_status[1] != 'running':
self.running_jobs[label].pop(self.running_jobs[label].index(job_name))
if job_name in self.running_jobs[label]:
self.running_jobs[label].pop(self.running_jobs[label].index(job_name))
self.timer = False
job.write_completed_job_to_csv_file()
logging.info(' Ending job {name} for {label} (run time: {time})'.format(name=job.job_name, label=label,
Expand Down Expand Up @@ -724,7 +726,7 @@ def determine_most_stable_conformer(self, label):
energies, xyzs = (list(t) for t in zip(*sorted(zip(self.species_dict[label].conformer_energies, xyzs))))
smiles_list = list()
for xyz in xyzs:
_, b_mol = molecules_from_xyz(xyz)
b_mol = molecules_from_xyz(xyz, multiplicity=self.species_dict[label].multiplicity)[1]
smiles = b_mol.toSMILES() if b_mol is not None else 'no 2D structure'
smiles_list.append(smiles)
geo_dir = os.path.join(self.project_directory, 'output', 'Species', label, 'geometry')
Expand All @@ -743,7 +745,7 @@ def determine_most_stable_conformer(self, label):
# Run isomorphism checks if a 2D representation is available
if self.species_dict[label].mol is not None:
for i, xyz in enumerate(xyzs):
_, b_mol = molecules_from_xyz(xyz)
b_mol = molecules_from_xyz(xyz, multiplicity=self.species_dict[label].multiplicity)[1]
if b_mol is not None:
if check_isomorphism(self.species_dict[label].mol, b_mol):
if i == 0:
Expand All @@ -754,30 +756,35 @@ def determine_most_stable_conformer(self, label):
else:
logging.info('A conformer for species {0} was found to be isomorphic '
'with the 2D graph representation {1}. This conformer is {2} kJ/mol '
'above the most stable one (which is not isomorphic). Using the '
'isomorphic conformer for further geometry optimization.'.format(
label, self.species_dict[label].mol.toSMILES(),
(energies[i] - energies[0]) * 0.001))
'above the most stable one which correspods to {3} (and is not'
' isomorphic). Using the isomorphic conformer for further geometry '
'optimization.'.format(label, self.species_dict[label].mol.toSMILES(),
(energies[i] - energies[0]) * 0.001,
molecules_from_xyz(xyzs[0], multiplicity=self.species_dict[label].multiplicity)[1]))
conformer_xyz = xyz
self.output[label]['status'] += 'passed isomorphism check but not for the most stable' \
' conformer; '
break
else:
if i == 0:
logging.warn('Most stable conformer for species {0} was found to be NON-isomorphic '
'with the 2D graph representation {1}. Searching for a different '
'conformer that is isomorphic'.format(label, b_mol.toSMILES()))
logging.warn('Most stable conformer for species {0} with structure {1} was found to '
'be NON-isomorphic with the 2D graph representation {2}. Searching for a '
'different conformer that is isomorphic...'.format(label, b_mol.toSMILES(),
self.species_dict[label].mol.toSMILES()))
else:
smiles_list = list()
for xyz in xyzs:
smiles_list.append(molecules_from_xyz(xyz, multiplicity=self.species_dict[label].multiplicity)[1])
if self.allow_nonisomorphic_2d:
# we'll optimize the most stable conformer even if it not isomorphic to the 2D graph
logging.error('No conformer for {0} was found to be isomorphic with the 2D graph representation'
' {1}. Optimizing the most stable conformer anyway.'.format(
label, self.species_dict[label].mol.toSMILES()))
' {1} (got: {2}). Optimizing the most stable conformer anyway.'.format(
label, self.species_dict[label].mol.toSMILES(), smiles_list))
conformer_xyz = xyzs[0]
else:
logging.error('No conformer for {0} was found to be isomorphic with the 2D graph representation'
' {1}. NOT optimizing this species.'.format(
label, self.species_dict[label].mol.toSMILES()))
' {1} (got: {2}). NOT optimizing this species.'.format(
label, self.species_dict[label].mol.toSMILES(), smiles_list))
self.output[label]['status'] += 'Error: No conformer was found to be isomorphic with the 2D' \
' graph representation! '
else:
Expand Down
78 changes: 76 additions & 2 deletions arc/species/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,13 +169,14 @@ def elementize(atom):
atom.atomType = atom_type[0]


def molecules_from_xyz(xyz):
def molecules_from_xyz(xyz, multiplicity=None):
"""
Creating RMG:Molecule objects from xyz with correct atom labeling
`xyz` is in a string format
returns `s_mol` (with only single bonds) and `b_mol` (with best guesses for bond orders)
This function is based on the MolGraph.perceive_smiles method
Returns None for b_mol is unsuccessful to infer bond orders
If `multiplicity` is given, the returned species multiplicity will be set to it.
"""
if xyz is None:
return None, None
Expand All @@ -197,13 +198,85 @@ def molecules_from_xyz(xyz):
if pybel_mol is not None:
inchi = pybel_to_inchi(pybel_mol)
mol_bo = rmg_mol_from_inchi(inchi) # An RMG Molecule with bond orders, but without preserved atom order
if mol_bo is not None:
if multiplicity is not None:
set_multiplicity(mol_bo, multiplicity)
mol_s1_updated.multiplicity = mol_bo.multiplicity
order_atoms(ref_mol=mol_s1_updated, mol=mol_bo)
set_multiplicity(mol_s1_updated, mol_bo.multiplicity, radical_map=mol_bo)
else:
mol_bo = None
order_atoms(ref_mol=mol_s1_updated, mol=mol_bo)
s_mol, b_mol = mol_s1_updated, mol_bo
return s_mol, b_mol


def set_multiplicity(mol, multiplicity, radical_map=None):
"""
Set the multiplicity of `mol` to `multiplicity` and change radicals as needed
if a `radical_map`, which is an RMG Molecule object with the same atom order, is given,
it'll be used to set radicals (useful if bond orders aren't known for a molecule)
"""
mol.multiplicity = multiplicity
if radical_map is not None:
if not isinstance(radical_map, Molecule):
raise TypeError('radical_map sent to set_multiplicity() has to be a Molecule object. Got {0}'.format(
type(radical_map)))
set_radicals_by_map(mol, radical_map)
radicals = mol.getRadicalCount()
if mol.multiplicity != radicals + 1:
# this is not the trivial "multiplicity = number of radicals + 1" case
# either the number of radicals was not identified correctly from the 3D structure (i.e., should be lone pairs),
# or their spin isn't determined correctly
if mol.multiplicity > radicals + 1:
# there are sites that should have radicals, but were'nt identified as such.
# try adding radicals according to missing valances
add_rads_by_atom_valance(mol)
if mol.multiplicity > radicals + 1:
# still problematic, currently there's no automated solution to this case, raise an error
raise SpeciesError('A multiplicity of {0} was given, but only {1} radicals were identified. '
'Cannot infer 2D graph representation for this species.\nMore info:{2}\n{3}'.format(
mol.multiplicity, radicals, mol.toSMILES(), mol.toAdjacencyList()))
if len(mol.atoms) == 1 and mol.multiplicity == 1 and mol.atoms[0].radicalElectrons == 4:
# This is a singlet atomic C or Si
mol.atoms[0].radicalElectrons = 0
mol.atoms[0].lonePairs = 2
if mol.multiplicity < radicals + 1:
# make sure all cabene and nitrene sites, if exist, have lone pairs rather than two unpaired electrons
for atom in mol.atoms:
if atom.radicalElectrons == 2:
atom.radicalElectrons = 0
atom.lonePairs += 1
# final check: an even number of radicals results in an odd multiplicity, and vice versa
if divmod(mol.multiplicity, 2)[1] == divmod(radicals, 2)[1]:
raise SpeciesError('Number of radicals ({0}) and multiplicity ({1}) for {2} do not match.\n{3}'.format(
radicals, mol.multiplicity, mol.toSMILES(), mol.toAdjacencyList()))


def add_rads_by_atom_valance(mol):
"""
A helper function for assigning radicals if not identified automatically
and missing according to the given multiplicity
We assume here that all partial charges are already set, but this assumption could be wrong
This implementation might also be problematic for aromatic species with undefined bond orders
"""
for atom in mol.atoms:
if atom.isNonHydrogen():
atomic_orbitals = atom.lonePairs + atom.radicalElectrons + atom.getBondOrdersForAtom()
missing_electrons = 4 - atomic_orbitals
if missing_electrons:
atom.radicalElectrons = missing_electrons
# print(mol.toAdjacencyList())


def set_radicals_by_map(mol, radical_map):
"""Set radicals in `mol` by `radical_map`, bot are RMG Molecule objects with the same atom order"""
for i, atom in enumerate(mol.atoms):
if atom.element.number != radical_map.atoms[i].element.number:
raise ValueError('Atom order in mol and radical_map in set_radicals_by_map() do not match. '
'{0} is not {1}.'.format(atom.element.symbol, radical_map.atoms[i].symbol))
atom.radicalElectrons = radical_map.atoms[i].radicalElectrons


def order_atoms_in_mol_list(ref_mol, mol_list):
"""Order the atoms in all molecules of mol_list by the atom order in ref_mol"""
for mol in mol_list:
Expand Down Expand Up @@ -260,6 +333,7 @@ def update_molecule(mol, to_single_bonds=False):
bond = Bond(atom_mapping[atom1], atom_mapping[atom2], bond_order)
new_mol.addBond(bond)
new_mol.updateAtomTypes()
new_mol.multiplicity = mol.multiplicity
return new_mol


Expand Down
24 changes: 24 additions & 0 deletions arc/species/converterTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import unittest

from rmgpy.molecule.molecule import Molecule
from rmgpy.species import Species

import arc.species.converter as converter
from arc.species.species import ARCSpecies
Expand Down Expand Up @@ -97,6 +98,18 @@ def setUpClass(cls):
H 1.1294795781 -0.8708998550 0.7537444446
H 1.4015274689 -0.6230592706 -0.8487058662"""

nh_s_adj = str("""1 N u0 p2 c0 {2,S}
2 H u0 p0 c0 {1,S}""")
nh_s_xyz = str("""N 0.50949998 0.00000000 0.00000000
H -0.50949998 0.00000000 0.00000000""")
cls.spc1 = ARCSpecies(label=str('NH2(S)'), adjlist=nh_s_adj, xyz=nh_s_xyz, multiplicity=1, charge=0)
spc = Species().fromAdjacencyList(nh_s_adj)
cls.spc2 = ARCSpecies(label=str('NH2(S)'), rmg_species=spc, xyz=nh_s_xyz)

cls.spc3 = ARCSpecies(label=str('NCN(S)'), smiles=str('[N]=C=[N]'), multiplicity=1, charge=0)

cls.spc4 = ARCSpecies(label=str('NCN(T)'), smiles=str('[N]=C=[N]'), multiplicity=3, charge=0)

def test_get_xyz_string(self):
"""Test conversion of xyz array to string format"""
xyz_array = [[-0.06618943, -0.12360663, -0.07631983],
Expand Down Expand Up @@ -801,6 +814,17 @@ def test_s_bonds_mol_from_xyz(self):
self.assertEqual(len(mol4.atoms), 24)
self.assertEqual(len(mol5.atoms), 3)

def set_radicals_correctly_from_xyz(self):
"""Test that we determine the number of radicals correctly from given xyz and multiplicity"""
self.assertEqual(self.spc1.multiplicity, 1) # NH(S), a nitrene
self.assertTrue(all([atom.radicalElectrons == 0 for atom in self.spc1.mol.atoms]))
self.assertEqual(self.spc2.multiplicity, 1) # NH(S), a nitrene
self.assertTrue(all([atom.radicalElectrons == 0 for atom in self.spc2.mol.atoms]))
self.assertEqual(self.spc3.multiplicity, 1) # NCN(S), a singlet birad
self.assertTrue(all([atom.radicalElectrons == 1 for atom in self.spc3.mol.atoms if atom.isNitrogen()]))
self.assertEqual(self.spc3.multiplicity, 3) # NCN(T)
self.assertTrue(all([atom.radicalElectrons == 1 for atom in self.spc3.mol.atoms if atom.isNitrogen()]))


################################################################################

Expand Down
7 changes: 4 additions & 3 deletions arc/species/species.py
Original file line number Diff line number Diff line change
Expand Up @@ -811,22 +811,23 @@ def mol_from_xyz(self, xyz=None):
if self.mol is not None:
# self.mol should have come from another source, e.g. SMILES or yml
original_mol = self.mol
self.mol = molecules_from_xyz(xyz)[1]
self.mol = molecules_from_xyz(xyz, multiplicity=self.multiplicity)[1]

if self.mol is not None and not check_isomorphism(original_mol, self.mol):
raise InputError('XYZ and the 2D graph representation of the Molecule are not isomorphic.\n'
'Got xyz:\n{0}\n\nwhich corresponds to {1}\n{2}\n\nand: {3}\n{4}'.format(
xyz, self.mol.toSMILES(), self.mol.toAdjacencyList(),
original_mol.toSMILES(), original_mol.toAdjacencyList()))
else:
self.mol = molecules_from_xyz(xyz)[1]
self.mol = molecules_from_xyz(xyz, multiplicity=self.multiplicity)[1]

if self.mol_list is None:
# Assign atom ids first, so they carry through to the resonance structures
self.mol.assignAtomIDs()
# The generate_resonance_structures method changes atom order
# Make a copy so we don't disturb the original order from xyz
self.mol_list = self.mol.copy(deep=True).generate_resonance_structures(keep_isomorphic=False, filter_structures=True)
self.mol_list = self.mol.copy(deep=True).generate_resonance_structures(keep_isomorphic=False,
filter_structures=True)
order_atoms_in_mol_list(ref_mol=self.mol, mol_list=self.mol_list)


Expand Down
Loading

0 comments on commit 11b68e8

Please sign in to comment.