Skip to content

Commit

Permalink
Check for chemkin duplicate reactions only once.
Browse files Browse the repository at this point in the history
Previously, every time we wrote a chemkin file (twice per iteration) we checked
every reaction in it for to see if it was a duplicate, in a method that scaled
with N^2 and ended up taking almost a third of a 7-hour RMG-Py run (See issue #78).

This commit changes it so that reactions being added to the core in an enlarge step
are checked for duplicate status at that time, and the entire set is checked after
adding a seed mechanism or reaction library. This way we should be sure to catch
all the duplicates, but only need to check new reactions once at each iteration, 
instead of *all* reactions twice per iteration.

The saveChemkinFile method can now be called without checking for duplicates.
Hopefully this speeds things up.

If called without the 'checkForDuplicates=False' option, then saveChemkinFile 
still does the check, so that other utilities (eg. for merging reaction sets) 
should not be messed up (nor sped up) by this.
  • Loading branch information
rwest committed Apr 22, 2013
1 parent aeb076a commit 44f742b
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 22 deletions.
53 changes: 35 additions & 18 deletions rmgpy/chemkin.py
Original file line number Diff line number Diff line change
Expand Up @@ -1302,29 +1302,43 @@ def writeKineticsEntry(reaction, speciesList, verbose = True, javaLibrary = Fals

################################################################################

def markDuplicateReaction(test_reaction, reaction_list):
"""
If the test_reaction is a duplicate (in Chemkin terms) of one in reaction_list, then set `duplicate=True` on both instances.
`reaction_list` can be any iterator.
It does not add the testReaction to the reactionList - you probably want to do this yourself afterwards.
"""
reaction1 = test_reaction
for reaction2 in reaction_list:
if reaction1.__class__ != reaction2.__class__:
# TemplateReaction, LibraryReaction, and PDepReaction cannot be
# duplicates of one another.
# RHW question: why can't TemplateReaction be duplicate of LibraryReaction, in Chemkin terms? I guess it shouldn't happen in RMG.
continue
if reaction1.reactants == reaction2.reactants and reaction1.products == reaction2.products:
if reaction1.duplicate and reaction2.duplicate:
continue
else:
if reaction1.kinetics.isPressureDependent() == reaction2.kinetics.isPressureDependent():
# Only mark as duplicate if both reactions are pressure dependent or both are
# not pressure dependent. Do not mark as duplicates otherwise.
logging.warning('Marked reaction {0} as duplicate for saving to Chemkin file.'.format(reaction1))
reaction1.duplicate = True
reaction2.duplicate = True

def markDuplicateReactions(reactions):
"""
For a given list of `reactions`, mark all of the duplicate reactions as
understood by Chemkin.
This is pretty slow (quadratic in size of reactions list) so only call it if you're really worried
you may have undetected duplicate reactions.
"""
for index1 in range(len(reactions)):
reaction1 = reactions[index1]
for index2 in range(index1+1, len(reactions)):
reaction2 = reactions[index2]
if reaction1.__class__ != reaction2.__class__:
# TemplateReaction, LibraryReaction, and PDepReaction cannot be
# duplicates of one another
continue
if reaction1.reactants == reaction2.reactants and reaction1.products == reaction2.products:
if reaction1.duplicate and reaction2.duplicate:
continue
else:
if reaction1.kinetics.isPressureDependent() == reaction2.kinetics.isPressureDependent():
# Only mark as duplicate if both reactions are pressure dependent or both are
# not pressure dependent. Do not mark as duplicates otherwise.
logging.warning('Marked reaction {0} as duplicate for saving to Chemkin file.'.format(reaction1))
reaction1.duplicate = True
reaction2.duplicate = True
remainingList = reactions[index1+1:]
markDuplicateReaction(reaction1, remainingList)


def saveSpeciesDictionary(path, species):
"""
Expand Down Expand Up @@ -1369,13 +1383,16 @@ def saveTransportFile(path, species):
spec.Zrot.value_si,
))

def saveChemkinFile(path, species, reactions, verbose = True):
def saveChemkinFile(path, species, reactions, verbose = True, checkForDuplicates=True):
"""
Save a Chemkin input file to `path` on disk containing the provided lists
of `species` and `reactions`.
If checkForDuplicates is False then we don't check for unlabeled duplicate reactions,
thus saving time (eg. if you are sure you've already labeled them as duplicate).
"""
# Check for duplicate
markDuplicateReactions(reactions)
if checkForDuplicates:
markDuplicateReactions(reactions)

f = open(path, 'w')

Expand Down
34 changes: 30 additions & 4 deletions rmgpy/rmg/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
import itertools

from rmgpy.display import display

import rmgpy.chemkin
import rmgpy.constants as constants
from rmgpy.quantity import Quantity
import rmgpy.species
Expand Down Expand Up @@ -654,6 +654,14 @@ def enlarge(self, newObject):
# Recalculate k(T,P) values for modified networks
self.updateUnimolecularReactionNetworks(database)
logging.info('')

# Check new core reactions for Chemkin duplicates
newCoreReactions = self.core.reactions[numOldCoreReactions:]
checkedCoreReactions = self.core.reactions[:numOldCoreReactions]
from rmgpy.chemkin import markDuplicateReaction
for rxn in newCoreReactions:
markDuplicateReaction(rxn, itertools.chain(checkedCoreReactions,self.outputReactionList) )
checkedCoreReactions.append(rxn)

self.printEnlargeSummary(
newCoreSpecies=self.core.species[numOldCoreSpecies:],
Expand Down Expand Up @@ -1168,12 +1176,16 @@ def addSeedMechanismToCore(self, seedMechanism, react=False):
if self.pressureDependence and rxn.isUnimolecular():
# If this is going to be run through pressure dependence code,
# we need to make sure the barrier is positive.
# ...but are Seed Mechanisms run through PDep? Perhaps not.
for spec in itertools.chain(rxn.reactants, rxn.products):
if spec.thermo is None:
spec.generateThermoData(database)
rxn.fixBarrierHeight(forcePositive=True)
self.addReactionToCore(rxn)


# Check we didn't introduce unmarked duplicates
self.markChemkinDuplicates()

self.printEnlargeSummary(
newCoreSpecies=self.core.species[numOldCoreSpecies:],
newCoreReactions=self.core.reactions[numOldCoreReactions:],
Expand Down Expand Up @@ -1260,6 +1272,7 @@ def addReactionLibraryToOutput(self, reactionLib):
else:
rxn.kinetics.comment = ("RMG did not find reaction rate to be high enough to be included in model core.")
self.outputReactionList.append(rxn)
self.markChemkinDuplicates()


def addReactionToUnimolecularNetworks(self, newReaction, newSpecies, network=None):
Expand Down Expand Up @@ -1515,6 +1528,19 @@ def loadSeedMechanism(self, path):
for rxn in seedReactionList:
self.addReactionToCore(rxn)

def markChemkinDuplicates(self):
"""
Check that all reactions that will appear the chemkin output have been checked as duplicates.
Call this if you've done something that may have introduced undetected duplicate reactions,
like add a reaction library or seed mechanism.
Anything added via the :meth:`expand` method should already be detected.
"""
from rmgpy.chemkin import markDuplicateReactions
rxnList = self.core.reactions + self.outputReactionList
markDuplicateReactions(rxnList)


def saveChemkinFile(self, path, verbose_path, dictionaryPath=None):
"""
Save a Chemkin file for the current model core as well as any desired output
Expand All @@ -1523,7 +1549,7 @@ def saveChemkinFile(self, path, verbose_path, dictionaryPath=None):
from rmgpy.chemkin import saveChemkinFile, saveSpeciesDictionary
speciesList = self.core.species + self.outputSpeciesList
rxnList = self.core.reactions + self.outputReactionList
saveChemkinFile(path, speciesList, rxnList, verbose = False)
saveChemkinFile(verbose_path, speciesList, rxnList, verbose = True)
saveChemkinFile(path, speciesList, rxnList, verbose = False, checkForDuplicates=False) # We should already have marked everything as duplicates by now
saveChemkinFile(verbose_path, speciesList, rxnList, verbose = True, checkForDuplicates=False)
if dictionaryPath:
saveSpeciesDictionary(dictionaryPath, speciesList)

0 comments on commit 44f742b

Please sign in to comment.