Check for chemkin duplicate reactions only once.

Previously, every time we wrote a chemkin file (twice per iteration) we checked every reaction in it for to see if it was a duplicate, in a method that scaled with N^2 and ended up taking almost a third of a 7-hour RMG-Py run (See issue #78). This commit changes it so that reactions being added to the core in an enlarge step are checked for duplicate status at that time, and the entire set is checked after adding a seed mechanism or reaction library. This way we should be sure to catch all the duplicates, but only need to check new reactions once at each iteration, instead of *all* reactions twice per iteration. The saveChemkinFile method can now be called without checking for duplicates. Hopefully this speeds things up. If called without the 'checkForDuplicates=False' option, then saveChemkinFile still does the check, so that other utilities (eg. for merging reaction sets) should not be messed up (nor sped up) by this.
ReactionMechanismGenerator · Apr 22, 2013 · 44f742b · 44f742b
1 parent aeb076a
commit 44f742b
Show file tree

Hide file tree

Showing 2 changed files with 65 additions and 22 deletions.
diff --git a/rmgpy/chemkin.py b/rmgpy/chemkin.py
@@ -1302,29 +1302,43 @@ def writeKineticsEntry(reaction, speciesList, verbose = True, javaLibrary = Fals
 
 ################################################################################
 
+def markDuplicateReaction(test_reaction, reaction_list):
+    """
+    If the test_reaction is a duplicate (in Chemkin terms) of one in reaction_list, then set `duplicate=True` on both instances.
+    `reaction_list` can be any iterator.
+    It does not add the testReaction to the reactionList - you probably want to do this yourself afterwards.
+    """
+    reaction1 = test_reaction
+    for reaction2 in reaction_list:
+        if reaction1.__class__ != reaction2.__class__:
+            # TemplateReaction, LibraryReaction, and PDepReaction cannot be
+            # duplicates of one another.
+            # RHW question: why can't TemplateReaction be duplicate of LibraryReaction, in Chemkin terms? I guess it shouldn't happen in RMG.
+            continue
+        if reaction1.reactants == reaction2.reactants and reaction1.products == reaction2.products:
+            if reaction1.duplicate and reaction2.duplicate:
+                continue
+            else:
+                if reaction1.kinetics.isPressureDependent() == reaction2.kinetics.isPressureDependent():
+                    # Only mark as duplicate if both reactions are pressure dependent or both are
+                    # not pressure dependent.  Do not mark as duplicates otherwise.
+                    logging.warning('Marked reaction {0} as duplicate for saving to Chemkin file.'.format(reaction1))
+                    reaction1.duplicate = True
+                    reaction2.duplicate = True
+
 def markDuplicateReactions(reactions):
     """
     For a given list of `reactions`, mark all of the duplicate reactions as
     understood by Chemkin.
+    
+    This is pretty slow (quadratic in size of reactions list) so only call it if you're really worried
+    you may have undetected duplicate reactions.
     """
     for index1 in range(len(reactions)):
         reaction1 = reactions[index1]
-        for index2 in range(index1+1, len(reactions)):
-            reaction2 = reactions[index2]
-            if reaction1.__class__ != reaction2.__class__:
-                # TemplateReaction, LibraryReaction, and PDepReaction cannot be
-                # duplicates of one another
-                continue
-            if reaction1.reactants == reaction2.reactants and reaction1.products == reaction2.products:
-                if reaction1.duplicate and reaction2.duplicate:
-                    continue
-                else:
-                    if reaction1.kinetics.isPressureDependent() == reaction2.kinetics.isPressureDependent():
-                        # Only mark as duplicate if both reactions are pressure dependent or both are
-                        # not pressure dependent.  Do not mark as duplicates otherwise.
-                        logging.warning('Marked reaction {0} as duplicate for saving to Chemkin file.'.format(reaction1))
-                        reaction1.duplicate = True
-                        reaction2.duplicate = True     
+        remainingList = reactions[index1+1:]
+        markDuplicateReaction(reaction1, remainingList)
+
 
 def saveSpeciesDictionary(path, species):
     """
@@ -1369,13 +1383,16 @@ def saveTransportFile(path, species):
                 spec.Zrot.value_si,
             ))
 
-def saveChemkinFile(path, species, reactions, verbose = True):
+def saveChemkinFile(path, species, reactions, verbose = True, checkForDuplicates=True):
     """
     Save a Chemkin input file to `path` on disk containing the provided lists
     of `species` and `reactions`.
+    If checkForDuplicates is False then we don't check for unlabeled duplicate reactions,
+    thus saving time (eg. if you are sure you've already labeled them as duplicate).
     """
     # Check for duplicate
-    markDuplicateReactions(reactions)
+    if checkForDuplicates:
+        markDuplicateReactions(reactions)
 
     f = open(path, 'w')
 

diff --git a/rmgpy/rmg/model.py b/rmgpy/rmg/model.py
@@ -39,7 +39,7 @@
 import itertools
 
 from rmgpy.display import display
-
+import rmgpy.chemkin
 import rmgpy.constants as constants
 from rmgpy.quantity import Quantity
 import rmgpy.species
@@ -654,6 +654,14 @@ def enlarge(self, newObject):
             # Recalculate k(T,P) values for modified networks
             self.updateUnimolecularReactionNetworks(database)
             logging.info('')
+
+        # Check new core reactions for Chemkin duplicates
+        newCoreReactions = self.core.reactions[numOldCoreReactions:]
+        checkedCoreReactions = self.core.reactions[:numOldCoreReactions]
+        from rmgpy.chemkin import markDuplicateReaction
+        for rxn in newCoreReactions:
+            markDuplicateReaction(rxn, itertools.chain(checkedCoreReactions,self.outputReactionList) )
+            checkedCoreReactions.append(rxn)
 
         self.printEnlargeSummary(
             newCoreSpecies=self.core.species[numOldCoreSpecies:],
@@ -1168,12 +1176,16 @@ def addSeedMechanismToCore(self, seedMechanism, react=False):
             if self.pressureDependence and rxn.isUnimolecular():
                 # If this is going to be run through pressure dependence code,
                 # we need to make sure the barrier is positive.
+                # ...but are Seed Mechanisms run through PDep? Perhaps not.
                 for spec in itertools.chain(rxn.reactants, rxn.products):
                     if spec.thermo is None:
                         spec.generateThermoData(database)
                 rxn.fixBarrierHeight(forcePositive=True)
             self.addReactionToCore(rxn)
-
+
+        # Check we didn't introduce unmarked duplicates
+        self.markChemkinDuplicates()
+
         self.printEnlargeSummary(
             newCoreSpecies=self.core.species[numOldCoreSpecies:],
             newCoreReactions=self.core.reactions[numOldCoreReactions:],
@@ -1260,6 +1272,7 @@ def addReactionLibraryToOutput(self, reactionLib):
             else:
                 rxn.kinetics.comment = ("RMG did not find reaction rate to be high enough to be included in model core.")
                 self.outputReactionList.append(rxn)
+        self.markChemkinDuplicates()
 
 
     def addReactionToUnimolecularNetworks(self, newReaction, newSpecies, network=None):
@@ -1515,6 +1528,19 @@ def loadSeedMechanism(self, path):
         for rxn in seedReactionList:
             self.addReactionToCore(rxn)
 
+    def markChemkinDuplicates(self):
+        """
+        Check that all reactions that will appear the chemkin output have been checked as duplicates.
+        
+        Call this if you've done something that may have introduced undetected duplicate reactions,
+        like add a reaction library or seed mechanism.
+        Anything added via the :meth:`expand` method should already be detected.
+        """
+        from rmgpy.chemkin import markDuplicateReactions
+        rxnList = self.core.reactions + self.outputReactionList
+        markDuplicateReactions(rxnList)
+
+
     def saveChemkinFile(self, path, verbose_path, dictionaryPath=None):
         """
         Save a Chemkin file for the current model core as well as any desired output
@@ -1523,7 +1549,7 @@ def saveChemkinFile(self, path, verbose_path, dictionaryPath=None):
         from rmgpy.chemkin import saveChemkinFile, saveSpeciesDictionary
         speciesList = self.core.species + self.outputSpeciesList
         rxnList = self.core.reactions + self.outputReactionList
-        saveChemkinFile(path, speciesList, rxnList, verbose = False)
-        saveChemkinFile(verbose_path, speciesList, rxnList, verbose = True)
+        saveChemkinFile(path, speciesList, rxnList, verbose = False, checkForDuplicates=False) # We should already have marked everything as duplicates by now
+        saveChemkinFile(verbose_path, speciesList, rxnList, verbose = True, checkForDuplicates=False)
         if dictionaryPath:
             saveSpeciesDictionary(dictionaryPath, speciesList)