Skip to content

Commit

Permalink
Merge pull request #196 from nyee/rebaseDatabaseTest
Browse files Browse the repository at this point in the history
Rebase database test
  • Loading branch information
connie committed Mar 31, 2014
2 parents abf0c74 + bd42226 commit 57fdfaa
Show file tree
Hide file tree
Showing 4 changed files with 312 additions and 38 deletions.
160 changes: 124 additions & 36 deletions rmgpy/data/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -786,7 +786,7 @@ def descendants(self, node):
descendants.extend(self.descendants(child))
return descendants

def isWellFormed(self):
def checkWellFormed(self):
"""
Return :data:`True` if the database is well-formed. A well-formed
database has an entry in the dictionary for every entry in the tree, and
Expand All @@ -797,47 +797,123 @@ def isWellFormed(self):
nodes in the tree, if the tree is present; this is for databases with
multiple trees, e.g. the kinetics databases.
"""

wellFormed = True


from rmgpy.data.kinetics.family import KineticsFamily

#list of nodes that are not wellFormed
noGroup=[]
noMatchingGroup={}
notInTree=[]
notSubgroup=[]
probablyProduct=[]

# Give correct arguments for each type of database
if isinstance(self, KineticsFamily):
library=self.rules.entries
groups=self.groups.entries
treeIsPresent=True
topNodes=self.getRootTemplate()

# Make list of all nodes in library
libraryNodes = []
for nodes in self.library:
libraryNodes.extend(nodes.split(';'))
libraryNodes = list(set(libraryNodes))
libraryNodes=[]
libraryNodesSplit = []
for nodes in library:
libraryNodes.append(nodes)
libraryNodesSplit.extend(nodes.split(';'))
libraryNodesSplit = list(set(libraryNodesSplit))



for node in libraryNodes:
try:
for node in libraryNodesSplit:

# All nodes in library must be in dictionary
try:
if node not in self.entries:
raise DatabaseError('Node "{0}" in library is not present in dictionary.'.format(node))
except DatabaseError, e:
wellFormed = False
logging.error(str(e))

# If a tree is present, all nodes in library should be in tree
# (Technically the database is still well-formed, but let's warn
# the user anyway
if len(self.tree.parent) > 0:
try:
if node not in self.tree.parent:
raise DatabaseError('Node "{0}" in library is not present in tree.'.format(node))
except DatabaseError, e:
logging.warning(str(e))

# If a tree is present, all nodes in tree must be in dictionary
if self.tree is not None:
for node in self.tree.parent:
try:
if node not in self.entries:
raise DatabaseError('Node "{0}" in tree is not present in dictionary.'.format(node))
except DatabaseError, e:
wellFormed = False
logging.error(str(e))

return wellFormed
if node not in groups:
noGroup.append(node)

#no point checking in tree if it doesn't even exist in groups
for libraryNode in libraryNodes:
nodes=libraryNode.split(';')
for libraryEntry in library[libraryNode]:
for node in nodes:
for libraryGroup in libraryEntry.item.reactants:
try:
if groups[node].item.isIsomorphic(libraryGroup):
break
except AttributeError:
if isinstance(groups[node].item, LogicOr) and isinstance(libraryGroup, LogicOr):
if groups[node].item==libraryGroup:
break
except TypeError:
print libraryGroup, type(libraryGroup)
except KeyError:
noGroup.append(node)
else:
noMatchingGroup[node]=libraryNode

if treeIsPresent:
# All nodes need to be in the tree
# This is true when ascending through parents leads to a top node
for nodeName in groups:
ascendParent=self.groups.entries[nodeName]

while ascendParent not in topNodes:
child=ascendParent
ascendParent=ascendParent.parent
if ascendParent is None or child not in ascendParent.children:
if child.index==-1:
probablyProduct.append(child.label)
break
else:
# If a group is not in a tree, we want to save the uppermost parent, not necessarily the original node
notInTree.append(child.label)
break
#check if child is actually subgroup of parent
ascendParent=self.groups.entries[nodeName].parent
if ascendParent is not None:
try:
if not ascendParent.item.isSubgraphIsomorphic(self.groups.entries[nodeName].item):
notSubgroup.append(nodeName)
except AttributeError:
if isinstance(groups[node].item, LogicOr) and isinstance(libraryGroup, LogicOr):
if groups[node].item==libraryGroup:
break
except TypeError:
print libraryGroup, type(libraryGroup)
# The adj list of each node actually needs to be subset of its parent's adjlist
#More to come later -nyee
except DatabaseError, e:
logging.error(str(e))

# # If a tree is present, all nodes in library should be in tree
# # (Technically the database is still well-formed, but let's warn
# # the user anyway
# if len(self.tree.parent) > 0:
# try:
# if node not in self.tree.parent:
# raise DatabaseError('Node "{0}" in library is not present in tree.'.format(node))
# except DatabaseError, e:
# logging.warning(str(e))
#
# # If a tree is present, all nodes in tree must be in dictionary
# if self.tree is not None:
# for node in self.tree.parent:
# try:
# if node not in self.entries:
# raise DatabaseError('Node "{0}" in tree is not present in dictionary.'.format(node))
# except DatabaseError, e:
# wellFormed = False
# logging.error(str(e))

# for libraryRule in library:
#check the groups

#eliminate duplicates
noGroup=list(set(noGroup))
notInTree=list(set(notInTree))

return (noGroup, noMatchingGroup, notInTree, notSubgroup, probablyProduct)

def matchNodeToStructure(self, node, structure, atoms):
"""
Expand Down Expand Up @@ -988,6 +1064,18 @@ def matchToStructure(self,database,structure,atoms):
return True != self.invert
return False != self.invert

def matchToLogicOr(self, other):
"""
Is other the same LogicOr group as self?
"""
if len(self.components)!=len(other.components):
return False
else:
for node in self.components:
if node not in other.components:
return False
return True

def getPossibleStructures(self, entries):
"""
Return a list of the possible structures below this node.
Expand Down
147 changes: 146 additions & 1 deletion rmgpy/data/kinetics/family.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
from copy import copy, deepcopy

from rmgpy.data.base import Database, Entry, LogicNode, LogicOr, ForbiddenStructures,\
ForbiddenStructureException, getAllCombinations
ForbiddenStructureException, getAllCombinations, DatabaseError
from rmgpy.reaction import Reaction
from rmgpy.kinetics import Arrhenius, ArrheniusEP, ThirdBody, Lindemann, Troe, \
PDepArrhenius, MultiArrhenius, MultiPDepArrhenius, \
Expand Down Expand Up @@ -1941,3 +1941,148 @@ def getRateCoefficientUnits(self):
return 's^-1'
else:
raise ValueError('Unable to determine units of rate coefficient for reaction family "{0}".'.format(self.label))

def checkWellFormed(self):
"""
Returns a tuple of malformed database entries:
noGroup is a list of nodes in the rules that has no corresponding group in
groups.py
noMatchingGroup is a dictionary with entry labels from the rules as a key
and entry labels from groups as values. These are groups where rule.py's
adj list does not match group.py's.
notInTree is a list of groups that do not appear in the tree
notSubgroup is a dictionary with group labels as keys and atom indexes
as values. Each key is a group where the child's adj list is not a
true child of it's parent. The list of indexes corresponds to the
child's adj list index, where the atom is not a true child.
probablyProduct is a list of groups which do not apepar in the
tree, but are probably products (as opposed to reactants) which
are created in the database loading. These are not necessarily
malformations, but because I'm not certain where they came from,
I decided to list them.
"""


#A function to add to the not in Subgroup dictionary
def appendToDict(dictionary, key, value):
if key not in dictionary:
dictionary[key]=[value]
else:
dictionary[key].append(value)
return dictionary

#list of nodes that are not wellFormed
noGroup=[]
noMatchingGroup={}
tempNoMatchingGroup={}
notInTree=[]
notUnique={}
notSubgroup={}
probablyProduct=[]

# Give correct arguments for each type of database
# if isinstance(self, KineticsFamily):
library=self.rules.entries
groups=self.groups.entries
groupsCopy=copy(groups)
topNodes=self.getRootTemplate()

# Make list of all node names in library
libraryNodes=[]
for nodes in library:
libraryNodes.append(nodes)

try:
#Each label in rules.py should be be in the form group1;group2;group3 etc
#and each group must appear in groups.py
for libraryNode in libraryNodes:
nodes=libraryNode.split(';')
for libraryEntry in library[libraryNode]:
for nodeName in nodes:
if nodeName not in groups:
noGroup.append(nodeName)
#If the node is not in the dictionary, we can't do the rest of the check
continue
#Each adj list in rules.py should match the adj list in group's.py
for libraryGroup in libraryEntry.item.reactants:
#break if we find a match between two groups
if isinstance(groups[nodeName].item, Group) and isinstance(libraryGroup, Group):
if groups[nodeName].item.isIsomorphic(libraryGroup):
break
#break if we find a match between two logic nodes
elif isinstance(groups[nodeName].item, LogicOr) and isinstance(libraryGroup, LogicOr):
if groups[nodeName].item.matchToLogicOr(libraryGroup):
break
#Otherwise no match is found, so we add it to the tempNoMatchingGroup
else:
tempNoMatchingGroup=appendToDict(tempNoMatchingGroup, libraryNode, nodeName)
#eliminate duplicates
for key, nodeList in tempNoMatchingGroup.iteritems():
noMatchingGroup[key]=list(set(nodeList))

# Each group in groups.py should appear in the tree
# This is true when ascending through parents leads to a top node
for nodeName in groups:
nodeGroup=self.groups.entries[nodeName]
nodeGroupItem=nodeGroup.item
ascendParent=nodeGroup
while ascendParent not in topNodes:
child=ascendParent
ascendParent=ascendParent.parent
if ascendParent is None or child not in ascendParent.children:
if child.index==-1:
probablyProduct.append(child.label)
break
else:
# If a group is not in a tree, we want to save the uppermost parent, not necessarily the original node
notInTree.append(child.label)
break

#each node should also be unique:
del groupsCopy[nodeName]
for nodeName2 in groupsCopy:
nodeGroup2Item=self.groups.entries[nodeName2].item
if isinstance(nodeGroup2Item, Group) and isinstance(nodeGroupItem, Group):
if nodeGroupItem.isIdentical(nodeGroup2Item):
notUnique=appendToDict(notUnique, nodeName, nodeName2)
if isinstance(nodeGroup2Item, LogicOr) and isinstance(nodeGroupItem, LogicOr):
if nodeGroupItem.matchToLogicOr(nodeGroup2Item):
notUnique=appendToDict(notUnique, nodeName, nodeName2)

#For a correct child-parent relationship, each atom in the parent should have a corresponding child atom in the child.
nodeParent=nodeGroup.parent
#Atoms may be in a different order initially. Need to sort both child and parent first
#Don't need to do check for topNodes
if nodeParent is not None:
if isinstance(nodeParent.item, LogicOr):
if not nodeGroup.label in nodeParent.item.components:
#-1 index means the child is not in the LogicOr
notSubgroup[nodeName]=nodeParent.label
continue
else:
#if the parent is a LogicOr, we want to keep ascending until we get to a group or hit a discontinuity (could be
#malformed tree or just ascending past the top node)
while isinstance(nodeParent.item, LogicOr):
nodeParent=nodeParent.parent
if nodeParent == None: break
if nodeParent == None: continue
# nodeParent.item.sortAtoms()
elif isinstance(nodeGroup.item, LogicOr):
print nodeGroup, ' is an intermediate LogicOr. See if it can be replaced with a adj list.'
continue
#If both the parent and child are graphs, we can use the function isSubgroupIsomorphic if it is actually a child
if not nodeGroup.item.isSubgraphIsomorphic(nodeParent.item):
notSubgroup[nodeName]=nodeParent.label
except DatabaseError, e:
logging.error(str(e))

#eliminate duplicates
noGroup=list(set(noGroup))
notInTree=list(set(notInTree))

return (noGroup, noMatchingGroup, notInTree, notUnique, notSubgroup, probablyProduct)
2 changes: 2 additions & 0 deletions rmgpy/molecule/group.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,5 @@ cdef class Group(Graph):
cpdef bint isSubgraphIsomorphic(self, Graph other, dict initialMap=?) except -2

cpdef list findSubgraphIsomorphisms(self, Graph other, dict initialMap=?)

cpdef bint isIdentical(self, Graph other)
Loading

0 comments on commit 57fdfaa

Please sign in to comment.