In [1]:
import numpy as np
import sys
from __future__ import division #to have float division
import copy

In [2]:
def readInput(filename):
    f = open(filename)
    triples = []
    entityToIntDict = {}
    relationToIntDict = {}
    intToEntityDict = {}
    intToRelationDict = {}
    
    numTriples = sum(1 for triple in f)/2
    f = open(filename)
    
    # Number of relations are the number of triples
    # We add rows for each entity node as we see it
    
    outboundAdjacencyMatrix = np.zeros((0, numTriples))
    inboundAdjacencyMatrix = np.zeros((0, numTriples))
        
    for counter, triple in enumerate(f):
        # We skip every odd number
#         print counter
        if triple[0] == '_':
            continue
        realCounter = np.floor(counter/2)
#         print "realCounter: ", realCounter
        split = triple.split()
        firstEntity = split[1][2:]
        secondEntity = split[2][2:]
        relation = split[0]+"_"+ firstEntity +"_" + secondEntity
#         print relation
        triples.append(split)
        
        # Add it to the sets and create the graph nodes
        relationToIntDict[relation] = realCounter
        intToRelationDict[realCounter] = relation
        
        # if it's not there, add it
        
        if (firstEntity not in entityToIntDict):
            outboundAdjacencyMatrix = np.vstack((outboundAdjacencyMatrix, np.zeros((1, numTriples))))
            inboundAdjacencyMatrix = np.vstack((inboundAdjacencyMatrix, np.zeros((1, numTriples))))
            length = len(outboundAdjacencyMatrix) - 1
            # its assignment is simply the length of the matrices 
            entityToIntDict[firstEntity] = length
            intToEntityDict[length] = firstEntity
                                                
        # same for second entity
        if (secondEntity not in entityToIntDict):
            outboundAdjacencyMatrix = np.vstack((outboundAdjacencyMatrix, np.zeros((1, numTriples))))
            inboundAdjacencyMatrix = np.vstack((inboundAdjacencyMatrix, np.zeros((1, numTriples))))
            length = len(outboundAdjacencyMatrix) - 1
            # its assignment is simply the length of the matrices 
            entityToIntDict[secondEntity] = length
            intToEntityDict[length] = secondEntity
             
        
        numFirstEntity = entityToIntDict[firstEntity]
        numSecondEntity = entityToIntDict[secondEntity]
        
        # the current relation is counter
        outboundAdjacencyMatrix[numFirstEntity][realCounter] = 1
        inboundAdjacencyMatrix[numSecondEntity][realCounter] = 1
#         print outboundAdjacencyMatrix
#         print inboundAdjacencyMatrix
#         print ""
        
        
    return [triples, entityToIntDict, relationToIntDict, intToEntityDict, intToRelationDict, outboundAdjacencyMatrix, inboundAdjacencyMatrix]

In [18]:
# We have an entity-relation graph modelled after a set of entity-relation triplets (e_1, r, e_2).
# We have N nodes, N_e entity nodes and N_r relation nodes.
# We have two 2d lists of communities, where each row is a community that contains the nodes in that community.
# One matrix is the K_e matrix for entity communities, and one is the K_r matrix for relation communities.

def initialize(outboundAdjacencyMatrix, inboundAdjacencyMatrix, numEntityNodes, numRelationNodes):
#     numEntityNodes = 6;
#     numRelationNodes = 5;
    numTotalNodes = numEntityNodes + numRelationNodes;
#     outboundAdjacencyMatrix = np.zeros((numEntityNodes, numRelationNodes)) # cell (i,j) = 1 if edge, 0 otherwise
#     inboundAdjacencyMatrix = np.zeros((numEntityNodes, numRelationNodes)) # cell (i,j) = 1 if edge, 0 otherwise

    K_entity = 2;
    K_relation = 2;
    
    # cell (i,j) = 1 if node j belongs to community i. It is 0 else
    commEntityMatrix = np.zeros((K_entity, numEntityNodes)) #Matrix of entity communities.
    commRelationMatrix = np.zeros((K_relation, numRelationNodes)) #Matrix of relation communities
    
    commsByEntity = np.zeros(K_entity) #Index i of this array stores the community that entity i belongs to
    commsByRelation = np.zeros(K_relation) #Index i of this array stores the community that relation i belongs to
    
    entityCommOutboundEdges = np.zeros((K_entity, K_relation))
    entityCommInboundEdges = np.zeros((K_entity, K_relation))
    relationCommOutboundEdges = np.zeros((K_relation, K_entity))
    relationCommInboundEdges = np.zeros((K_relation, K_entity))
    
    # Randomly assign a node to one of the communities
    # FOR TESTING ONLY
#     commsByEntity = np.array([0,0,1,0,0,1])
#     commsByRelation = np.array([0,0,1,0,1])
    commsByEntity = np.random.randint(0, K_entity, numEntityNodes)
    commsByRelation = np.random.randint(0, K_relation, numRelationNodes)
    
#     # FOR TESTING ONLY FOR TESTING
#     outboundAdjacencyMatrix = np.array([[1,0,0,0,0],[0,1,0,0,0],[0,0,1,0,0],[0,0,0,1,0],[0,0,0,0,1],[0,0,0,0,0]])
#     inboundAdjacencyMatrix = np.array([[0,0,0,0,0],[0,0,0,0,0],[0,0,0,0,0],[1,0,0,0,0],[0,1,0,1,0],[0,0,1,0,1]])

    #CREATE THE COMMUNITY MATRICES
    for entityNode in xrange(numEntityNodes):
        commEntityMatrix[commsByEntity[entityNode]][entityNode] = 1
    for relationNode in xrange(numRelationNodes):
        commRelationMatrix[commsByRelation[relationNode]][relationNode] = 1
    
    # Store the inboundEdges and outboundEdges for each community
    for entityNode in xrange(numEntityNodes):
        entityComm = commsByEntity[entityNode]
        curOutboundEdges = outboundAdjacencyMatrix[entityNode]
        curInboundEdges = inboundAdjacencyMatrix[entityNode]
        for relationNode in xrange(numRelationNodes):
            relComm = commsByRelation[relationNode]
            
            # If there is an outbound edge to that relation node, update the outbound/inbound edges on both sides
            if curOutboundEdges[relationNode] == 1:
                entityCommOutboundEdges[entityComm][relComm] += 1
                relationCommInboundEdges[relComm][entityComm] += 1
            if curInboundEdges[relationNode] == 1:
                entityCommInboundEdges[entityComm][relComm] += 1
                relationCommOutboundEdges[relComm][entityComm] += 1
    
    return [numEntityNodes, numRelationNodes, numTotalNodes,
           K_entity, K_relation, commEntityMatrix, commRelationMatrix, entityCommOutboundEdges, 
           entityCommInboundEdges, relationCommOutboundEdges, relationCommInboundEdges,
           commsByEntity, commsByRelation]

In [None]:
# Randomize initial communities
def initializeAssignments():
    # Randomly assign a node to one of the communities
    commsByEntity = np.random.randint(0, K_entity, numEntityNodes)
    commsByRelation = np.random.randint(0, K_relation, numRelationNodes)
    
    # Store the inboundEdges and outboundEdges for each community
    for entityNode in xrange(numEntityNodes):
        entityComm = commsByEntity[entityNode]
        curOutboundEdges = outboundAdjacencyMatrix[entityNode]
        curInboundEdges = inboundAdjacencyMatrix[entityNode]
        for relationNode in xrange(numRelationNodes):
            relComm = commsByRelation[relationNode]
            
            # If there is an outbound edge to that relation node, update the outbound/inbound edges on both sides
            if curOutboundEdges[relationNode] == 1:
                entityCommOutboundEdges[entityComm][relComm] += 1
                relationCommInboundEdges[relComm][entityComm] += 1
            if curInboundEdges[relationNode] == 1:
                entityCommInboundEdges[entityComm][relComm] += 1
                relationCommOutboundEdges[relComm][entityComm] += 1

In [9]:
# Define the algorithm for one iteration
def algoIteration(numEntityNodes, numRelationNodes, numTotalNodes, outboundAdjacencyMatrix, inboundAdjacencyMatrix, 
               K_entity, K_relation, commEntityMatrix, commRelationMatrix, entityCommOutboundEdges,
               entityCommInboundEdges, relationCommOutboundEdges, relationCommInboundEdges,commsByEntity, 
               commsByRelation, bestStateCommsByEntity, bestStateCommsByRelation, bestStateTotalPenalty):
    
#     print "BestStateTotalPenalty: ", bestStateTotalPenalty
#     print "bestStateCommsByEntity: ", bestStateCommsByEntity
#     print "bestStateCommsByRelation: ", bestStateCommsByRelation
    
    
    # calculate entity node penalties
    totalEntityPenalty = 0
    maxEntityPenalty = -sys.maxint - 1
    entityNodeToChange = -1
    changeEntityNodeOutboundEdges = np.zeros(K_relation)
    changeEntityNodeInboundEdges = np.zeros(K_relation)
    
    # Calculate outbound edges and inbound edges for this node
    for entityNode in xrange(numEntityNodes):
#         print "curEntityNode: ", entityNode
        
        curNodeOutboundEdges = np.zeros(K_relation)
        curNodeInboundEdges = np.zeros(K_relation)
        
        # We calculate its outbound and inbound edges to all relation communities
        outboundEdges = outboundAdjacencyMatrix[entityNode]
        for relationNode in xrange(numRelationNodes):
            outboundComm = commsByRelation[relationNode]
            if outboundEdges[relationNode] == 1:
                curNodeOutboundEdges[outboundComm] += 1
        
        inboundEdges = inboundAdjacencyMatrix[entityNode]
        for relationNode in xrange(numRelationNodes):
            inboundComm = commsByRelation[relationNode]
            if inboundEdges[relationNode] == 1:
                curNodeInboundEdges[inboundComm] += 1
            
#         print "outboundEdges: ", curNodeOutboundEdges
#         print "inboundEdges: ", curNodeInboundEdges
        
        # Calculate the penalty for this entity node
        # Its current community is stored in commsByEntity
        curComm = commsByEntity[entityNode]
        curCommOutboundEdges = entityCommOutboundEdges[curComm]
        curCommInboundEdges = entityCommInboundEdges[curComm]
        numNodesInComm = np.sum(commEntityMatrix[curComm]) + 1 #We add +1 in case there are 0 nodes in this comm
#         print "curComm: ", curComm
#         print "curCommOutboundEdges: ", curCommOutboundEdges
#         print "curCommInboundEdges: ", curCommInboundEdges
#         print "numNodesInComm: ", numNodesInComm
        
        # The penalty is the sum of squared differences between the mean outbound of this community and
        # current node outbound plus mean inbound minus current node inbound
        outboundDiff = np.sum(np.square(curCommOutboundEdges/numNodesInComm - curNodeOutboundEdges))
        inboundDiff = np.sum(np.square(curCommInboundEdges/numNodesInComm - curNodeInboundEdges))
#         print "outboundDiff: ", outboundDiff
#         print "inboundDiff: ", inboundDiff
        curPenalty = outboundDiff + inboundDiff
        
#         print "curPenalty", curPenalty
        totalEntityPenalty += curPenalty
        
        if curPenalty > maxEntityPenalty:
            maxEntityPenalty = curPenalty
            entityNodeToChange = entityNode
            changeEntityNodeOutboundEdges = curNodeOutboundEdges
            changeEntityNodeInboundEdges = curNodeInboundEdges
            
    # Figure out where to change the node, but DON'T change it yet
    
#     print "maxEntityPenalty ", maxEntityPenalty
#     print "entityNodeToChange ", entityNodeToChange
    
    newCommForEntityBoolean = False
    minEntityPenalty = sys.maxint
    newCommForEntity = -1
    for curComm in xrange(K_entity):
        curCommOutboundEdges = entityCommOutboundEdges[curComm]
        curCommInboundEdges = entityCommInboundEdges[curComm]
        numNodesInComm = np.sum(commEntityMatrix[curComm]) + 1
#         print "curComm: ", curComm
#         print "curCommOutboundEdges: ", curCommOutboundEdges
#         print "curCommInboundEdges: ", curCommInboundEdges
#         print "numNodesInComm: ", numNodesInComm
        
        # The penalty is the sum of squared differences between the mean outbound of this community and
        # current node outbound plus mean inbound minus current node inbound
        outboundDiff = np.sum(np.square(curCommOutboundEdges/numNodesInComm - changeEntityNodeOutboundEdges))
        inboundDiff = np.sum(np.square(curCommInboundEdges/numNodesInComm - changeEntityNodeInboundEdges))
#         print "outboundDiffMoving: ", outboundDiff
#         print "inboundDiffMoving: ", inboundDiff
#         print "curPenaltyMoving, ", (outboundDiff + inboundDiff)
        
        if (outboundDiff + inboundDiff) < minEntityPenalty:
            minEntityPenalty = outboundDiff + inboundDiff
            newCommForEntity = curComm
    # Check if it is actually a new community for this entity node
    if not newCommForEntity == commsByEntity[entityNodeToChange]:
        newCommForEntityBoolean = True
        
#     print "minEntityPenalty ", minEntityPenalty
#     print "newCommForEntity, ", newCommForEntity
#     print "newCommForEntityBoolean: ", newCommForEntityBoolean
    
#     print ""
#     print ""
#     print ""
    
    # calculate relation node penalties
    totalRelationPenalty = 0
    maxRelationPenalty = -sys.maxint - 1
    relationNodeToChange = -1
    changeRelationNodeOutboundEdges = np.zeros(K_entity)
    changeRelationNodeInboundEdges = np.zeros(K_entity)
    
    # Calculate outbound edges and inbound edges for this node
    for relationNode in xrange(numRelationNodes):     
#         print "curRelationNode: ", relationNode
        curNodeOutboundEdges = np.zeros(K_entity)
        curNodeInboundEdges = np.zeros(K_entity)
        
        # We calculate its outbound and inbound edges to all relation communities
        # Note that the outbound for a relation is the inbound for an entity
        outboundEdges = inboundAdjacencyMatrix.T[relationNode]
        for entityNode in xrange(numEntityNodes):
            outboundComm = commsByEntity[entityNode]
            if outboundEdges[entityNode] == 1:
                curNodeOutboundEdges[outboundComm] += 1
        
        inboundEdges = outboundAdjacencyMatrix.T[relationNode]
        for entityNode in xrange(numEntityNodes):
            inboundComm = commsByEntity[relationNode]
            if inboundEdges[entityNode] == 1:
                curNodeInboundEdges[inboundComm] += 1
        
#         print "outboundEdges: ", curNodeOutboundEdges
#         print "inboundEdges: ", curNodeInboundEdges
        
        # Calculate the penalty for this entity node
        # Its current community is stored in commsByEntity
        curComm = commsByRelation[relationNode]
        curCommOutboundEdges = relationCommOutboundEdges[curComm]
        curCommInboundEdges = relationCommInboundEdges[curComm]
        numNodesInComm = np.sum(commRelationMatrix[curComm]) + 1
#         print "curComm: ", curComm
#         print "curCommOutboundEdges: ", curCommOutboundEdges
#         print "curCommInboundEdges: ", curCommInboundEdges
#         print "numNodesInComm: ", numNodesInComm
        
        # The penalty is the sum of squared differences between the mean outbound of this community and
        # current node outbound plus mean inbound minus current node inbound
        outboundDiff = np.sum(np.square(curCommOutboundEdges/numNodesInComm - curNodeOutboundEdges))
        inboundDiff = np.sum(np.square(curCommInboundEdges/numNodesInComm - curNodeInboundEdges))
#         print "outboundDiff: ", outboundDiff
#         print "inboundDiff: ", inboundDiff
        curPenalty = outboundDiff + inboundDiff
#         print "curPenalty", curPenalty
        totalRelationPenalty += curPenalty
        
        if curPenalty > maxRelationPenalty:
            maxRelationPenalty = curPenalty
            relationNodeToChange = relationNode
            changeRelationNodeOutboundEdges = curNodeOutboundEdges
            changeRelationNodeInboundEdges = curNodeInboundEdges
          
#     print "maxRelationPenalty ", maxRelationPenalty
#     print "relationNodeToChange ", relationNodeToChange
    
    # Figure out where to move the worst relation node
    newCommForRelationBoolean = False
    minRelationPenalty = sys.maxint
    newCommForRelation = -1
    for curComm in xrange(K_relation):
        curCommOutboundEdges = relationCommOutboundEdges[curComm]
        currCommInboundEdges = relationCommInboundEdges[curComm]
        numNodesInComm = np.sum(commRelationMatrix[curComm]) + 1
#         print "curComm: ", curComm
#         print "curCommOutboundEdges: ", curCommOutboundEdges
#         print "curCommInboundEdges: ", curCommInboundEdges
#         print "numNodesInComm: ", numNodesInComm
        
        # The penalty is the sum of squared differences between the mean outbound of this community and
        # current node outbound plus mean inbound minus current node inbound
        outboundDiff = np.sum(np.square(curCommOutboundEdges/numNodesInComm - changeRelationNodeOutboundEdges))
        inboundDiff = np.sum(np.square(curCommInboundEdges/numNodesInComm - changeRelationNodeInboundEdges))
#         print "outboundDiffMoving: ", outboundDiff
#         print "inboundDiffMoving: ", inboundDiff
#         print "curPenaltyMoving, ", (outboundDiff + inboundDiff)
        if (outboundDiff + inboundDiff) < minRelationPenalty:
            minRelationPenalty = outboundDiff + inboundDiff
            newCommForRelation = curComm
    # Check if it is actually a new community for this entity node
    if not newCommForRelation == commsByRelation[relationNodeToChange]:
        newCommForRelationBoolean = True
    
#     print "minRelationPenalty ", minRelationPenalty
#     print "newCommForRelation, ", newCommForRelation
#     print "newCommForrelationBoolean: ", newCommForRelationBoolean
        
    if not newCommForEntityBoolean and not newCommForRelationBoolean:
        # We might get stuck in local minima, so move if our penalties are greater than the tolerance
        # Move to a random community
#         print "local minimum"
        tolerance = 0.1
        if maxEntityPenalty > tolerance:
            newCommForEntity = np.random.randint(0,K_entity)
            newCommForEntityBoolean = True
        if maxRelationPenalty > tolerance:
            newCommForRelation = np.random.randint(0, K_relation)
            newCommForRelationBoolean = True
            
#         print "newCommForEntity: ", newCommForEntity
#         print "newCommForEntityBoolean: ", newCommForEntityBoolean
#         print "newCommForRelation: ", newCommForRelation
#         print "newCommForRelationBoolean: ", newCommForRelationBoolean
            
        if not newCommForEntityBoolean and not newCommForRelationBoolean:
            return False
    
    
#     print ""
#     print ""
#     print ""
        
    # Move the entity node and relation node to their new communities
    if newCommForEntityBoolean:
        # Move entity node first
        entityPrevComm = commsByEntity[entityNodeToChange]
        # Update the inbound/outbound edges on the entity side by subtracting from current comm and adding to new comm
        entityCommOutboundEdges[entityPrevComm] -=  changeEntityNodeOutboundEdges
        entityCommInboundEdges[entityPrevComm] -= changeEntityNodeInboundEdges
        entityCommOutboundEdges[newCommForEntity] += changeEntityNodeOutboundEdges
        entityCommInboundEdges[newCommForEntity] += changeEntityNodeInboundEdges

        # Update the inbound/outbound edges on the relation side that are changed by moving this entity node
        outboundLinks = outboundAdjacencyMatrix[entityNodeToChange]
        inboundLinks = inboundAdjacencyMatrix[entityNodeToChange]
        for relationNode in xrange(numRelationNodes):
            relComm = commsByRelation[relationNode]

            # If there is an outbound edge from entity to relation, change relation inbound edges and vice versa
            if outboundLinks[relationNode] == 1:
                relationCommInboundEdges[relComm][entityPrevComm] -= 1
                relationCommInboundEdges[relComm][newCommForEntity] += 1
            if inboundLinks[relationNode] == 1:
                relationCommOutboundEdges[relComm][entityPrevComm] -= 1
                relationCommOutboundEdges[relComm][newCommForEntity] += 1
        # Update the actual community matrix
        commEntityMatrix[entityPrevComm][entityNodeToChange] = 0
        commEntityMatrix[newCommForEntity][entityNodeToChange] = 1
    
    if newCommForRelationBoolean:
        # Move relation node
        relationPrevComm = commsByRelation[relationNodeToChange]
        # Update the inbound/outbound edges on the relation side by subtracting from current comm and adding to new comm
        relationCommOutboundEdges[relationPrevComm] -= changeRelationNodeOutboundEdges
        relationCommInboundEdges[relationPrevComm] -= changeRelationNodeInboundEdges
        relationCommOutboundEdges[newCommForRelation] += changeRelationNodeOutboundEdges
        relationCommInboundEdges[newCommForRelation] += changeRelationNodeInboundEdges
        
        # Update the inbound/outbound edges on the entity side that are changed by moving this relation node
        outboundLinks = outboundAdjacencyMatrix.T[relationNodeToChange]
        inboundLinks = inboundAdjacencyMatrix.T[relationNodeToChange]
        for entityNode in xrange(numEntityNodes):
            entityComm = commsByEntity[entityNode]
            
            if outboundLinks[entityNode] == 1:
                entityCommInboundEdges[entityComm][relationPrevComm] -= 1
                entityCommInboundEdges[entityComm][newCommForRelation] += 1
            if inboundLinks[entityNode] == 1:
                entityCommOutboundEdges[entityComm][relationPrevComm] -= 1
                entityCommOutboundEdges[entityComm][newCommForRelation] += 1
        # Update the actual community matrix
        commRelationMatrix[relationPrevComm][relationNodeToChange] = 0
        commRelationMatrix[newCommForRelation][relationNodeToChange] = 1
    
    commsByEntity[entityNodeToChange] = newCommForEntity
    commsByRelation[relationNodeToChange] = newCommForRelation
#     print "NEW COMMS BY ENTITY: ", commsByEntity
#     print "NEW COMMS BY RELATION: ", commsByRelation
    
    totalPenalty = totalEntityPenalty - maxEntityPenalty + minEntityPenalty + totalRelationPenalty - maxRelationPenalty + minRelationPenalty
#     print "totalPenalty: ", totalPenalty
#     print "bestStateCommsByEntity: ", bestStateCommsByEntity
#     print "bestStateCommsByRelation: ", bestStateCommsByRelation

    if totalPenalty < bestStateTotalPenalty:
        print "***************"
        print "***************"
        print "UPDATING STUFF"
        bestStateCommsByEntity = copy.deepcopy(commsByEntity)
        bestStateCommsByRelation = copy.deepcopy(commsByRelation)
        bestStateTotalPenalty = totalPenalty
        print "new BestStateTotalPenalty: ", bestStateTotalPenalty
        print "new bestStateCommsByEntity: ", bestStateCommsByEntity
        print "new bestStateCommsByRelation: ", bestStateCommsByRelation
        print "***************"
        print "***************"
    
#     print "returned BestStateTotalPenalty: ", bestStateTotalPenalty
#     print "returned bestStateCommsByEntity: ", bestStateCommsByEntity
#     print "returned bestStateCommsByRelation: ", bestStateCommsByRelation
    return [bestStateCommsByEntity, bestStateCommsByRelation, bestStateTotalPenalty]

In [19]:
# if __name__ == '__main__':
filename = 'NELL165/NELL165/eventtest.txt'
[triples, entityToIntDict, relationToIntDict, intToEntityDict, intToRelationDict, 
     outboundAdjacencyMatrix, inboundAdjacencyMatrix] = readInput(filename)
print entityToIntDict
print intToEntityDict
print relationToIntDict
print intToRelationDict
print outboundAdjacencyMatrix
print inboundAdjacencyMatrix
print len(triples), len(entityToIntDict), len(outboundAdjacencyMatrix)

numEntityNodes = len(entityToIntDict)
numRelationNodes = len(relationToIntDict)

# print "bestStateTotalPenalty: ", bestStateTotalPenalty
# print "bestStateCommsByEntity: ", bestStateCommsByEntity
# print "bestStateCommsByRelation: ", bestStateCommsByRelation

# WE TEST WITH MANY INITIALIZATIONS AND BEST STATES
bestBestStateTotalPenalty = sys.maxint
bestBestStateCommsByEntity = np.zeros(commsByEntity.shape)
bestBestStateCommsByRelation = np.zeros(commsByRelation.shape)

for i in xrange(20):
    [numEntityNodes, numRelationNodes, numTotalNodes, 
           K_entity, K_relation, commEntityMatrix, commRelationMatrix, entityCommOutboundEdges,
           entityCommInboundEdges, relationCommOutboundEdges, relationCommInboundEdges,commsByEntity, 
           commsByRelation] = initialize(outboundAdjacencyMatrix, inboundAdjacencyMatrix, numEntityNodes, numRelationNodes)
    #     testOutbound = np.random.randint(0, 2, (numEntityNodes, numRelationNodes))
    #     testInbound = np.random.randint(0,2,(numEntityNodes, numRelationNodes))

    #     outboundAdjacencyMatrix = testOutbound
    #     # inbound is just the flipped of outbound
    #     flipBinary = np.vectorize(lambda a: 1-a)
    #     inboundAdjacencyMatrix = flipBinary(outboundAdjacencyMatrix)

#     print "numEntityNodes: ",numEntityNodes
#     print "numRelationNodes: ",numRelationNodes
#     print "numTotalNodes: ",numTotalNodes
#     print "outboundAdjacencyMatrix: ",outboundAdjacencyMatrix
#     print "inboundAdjacencyMatrix: ",inboundAdjacencyMatrix
#     print "K_entity: ",K_entity
#     print "K_relation: ",K_relation
#     print "commEntityMatrix: ",commEntityMatrix
#     print "commRelationMatrix: ",commRelationMatrix
#     print "entityCommOutboundEdges: ",entityCommOutboundEdges
#     print "entityCommInboundEdges: ",entityCommInboundEdges
#     print "relationCommOutboundEdges: ",relationCommOutboundEdges
#     print "relationCommInboundEdges: ",relationCommInboundEdges
#     print "commsByEntity: ",commsByEntity
#     print "commsByRelation: ",commsByRelation

    bestStateTotalPenalty = sys.maxint
    bestStateCommsByEntity = commsByEntity
    bestStateCommsByRelation = commsByRelation
    
    for i in xrange(50):
        [bestStateCommsByEntity, bestStateCommsByRelation, bestStateTotalPenalty] = algoIteration(numEntityNodes, numRelationNodes, numTotalNodes, outboundAdjacencyMatrix, inboundAdjacencyMatrix, 
                   K_entity, K_relation, commEntityMatrix, commRelationMatrix, entityCommOutboundEdges,
                   entityCommInboundEdges, relationCommOutboundEdges, relationCommInboundEdges,commsByEntity, 
                   commsByRelation, bestStateCommsByEntity, bestStateCommsByRelation, bestStateTotalPenalty)
#         print "commEntityMatrix: ",commEntityMatrix
#         print "commRelationMatrix: ",commRelationMatrix
#         print "entityCommOutboundEdges: ",entityCommOutboundEdges
#         print "entityCommInboundEdges: ",entityCommInboundEdges
#         print "relationCommOutboundEdges: ",relationCommOutboundEdges
#         print "relationCommInboundEdges: ",relationCommInboundEdges
#         print "commsByEntity: ",commsByEntity
#         print "commsByRelation: ",commsByRelation
#         print "new BestStateTotalPenalty: ", bestStateTotalPenalty
#         print "new bestStateCommsByEntity: ", bestStateCommsByEntity
#         print "new bestStateCommsByRelation: ", bestStateCommsByRelation

#         print ""
#         print ""

    print "END OF A ROUND OK END OF A ROUND"
    print "bestStateTotalPenalty: ", bestStateTotalPenalty
    print "bestStateCommsByEntity: ", bestStateCommsByEntity
    print "bestStateCommsByRelation: ", bestStateCommsByRelation
    
    if bestStateTotalPenalty < bestBestStateTotalPenalty:
        print "updating the bestBest"
        bestBestStateTotalPenalty = bestStateTotalPenalty
        bestBestStateCommsByEntity = bestStateCommsByEntity
        bestBestStateCommsByRelation = bestStateCommsByRelation
        print "bestBestStateTotalPenalty: ", bestBestStateTotalPenalty
        print "bestBestStateCommsByEntity: ", bestBestStateCommsByEntity
        print "bestBestStateCommsByRelation: ", bestBestStateCommsByRelation

print "bestBestStateTotalPenalty: ", bestBestStateTotalPenalty
print "bestBestStateCommsByEntity: ", bestBestStateCommsByEntity
print "bestBestStateCommsByRelation: ", bestBestStateCommsByRelation
        
# Make the final matrices the same as best states
commEntityMatrix = np.zeros((K_entity, numEntityNodes))
for i in xrange(len(bestBestStateCommsByEntity)):
    commEntityMatrix[bestBestStateCommsByEntity[i]][i] = 1
commRelationMatrix = np.zeros((K_relation, numRelationNodes))
for i in xrange(len(bestBestStateCommsByRelation)):
    commRelationMatrix[bestBestStateCommsByRelation[i]][i] = 1

print "****************"
print "Communities: "
for i in xrange(K_entity):
    print "Entity Community "+str(i) +": "
    for j in xrange(numEntityNodes):
        if commEntityMatrix[i][j] == 1:
            print intToEntityDict[j]
print "Relations: "
for i in xrange(K_relation):
    print "Relation Community "+str(i) +": "
    for j in xrange(numRelationNodes):
        if commRelationMatrix[i][j] == 1:
            print intToRelationDict[j]



{'troop_surge': 13, 'pearl_harbour': 6, 'june_1967': 9, 'n1812': 11, 'operation_iraqi_freedom': 2, 'world_war_ii': 12, 'operation_barbarossa': 4, 'revolutionary_war': 10, 'n2005': 3, 'june_1941': 5, 'n2007': 1, 'liquidity_crisis': 0, 'december_1941': 7, 'six_day_war': 8}
{0: 'liquidity_crisis', 1: 'n2007', 2: 'operation_iraqi_freedom', 3: 'n2005', 4: 'operation_barbarossa', 5: 'june_1941', 6: 'pearl_harbour', 7: 'december_1941', 8: 'six_day_war', 9: 'june_1967', 10: 'revolutionary_war', 11: 'n1812', 12: 'world_war_ii', 13: 'troop_surge'}
{'eventdate_operation_barbarossa_june_1941': 2.0, 'eventdate_world_war_ii_december_1941': 6.0, 'eventdate_troop_surge_n2007': 7.0, 'eventdate_six_day_war_june_1967': 4.0, 'eventdate_pearl_harbour_december_1941': 3.0, 'eventdate_revolutionary_war_n1812': 5.0, 'eventdate_operation_iraqi_freedom_n2005': 1.0, 'eventdate_liquidity_crisis_n2007': 0.0}
{0.0: 'eventdate_liquidity_crisis_n2007', 1.0: 'eventdate_operation_iraqi_freedom_n2005', 2.0: 'eventdate_op

