In [8]:
import numpy as np

In [9]:
# We have an entity-relation graph modelled after a set of entity-relation triplets (e_1, r, e_2).
# We have N nodes, N_e entity nodes and N_r relation nodes.
# We have two 2d lists of communities, where each row is a community that contains the nodes in that community.
# One matrix is the K_e matrix for entity communities, and one is the K_r matrix for relation communities.

def initializeVariables():
    numEntityNodes = 10;
    numRelationNodes = 10;
    numTotalNodes = numEntityNodes + numRelationNodes;
    outboundAdjacencyMatrix = np.zeros((numEntityNodes, numRelationNodes)) # cell (i,j) = 1 if edge, 0 otherwise
    inboundAdjacencyMatrix = np.zeros((numEntityNodes, numRelationNodes)) # cell (i,j) = 1 if edge, 0 otherwise

    K_entity = 2;
    K_relation = 2;
    
    # cell (i,j) = 1 if node j belongs to community i. It is 0 else
    commEntityMatrix = np.zeros((K_entity, numEntityNodes)) #Matrix of entity communities.
    commRelationMatrix = np.zeros((K_relation, numRelationNodes)) #Matrix of relation communities
    
    commsByEntity = np.zeros(K_entity) #Index i of this array stores the community that entity i belongs to
    commsByRelation = np.zeros(K_relation) #Index i of this array stores the community that relation i belongs to
    
    entityCommOutboundEdges = np.zeros((K_entity, K_relation))
    entityCommInboundEdges = np.zeros((K_entity, K_relation))
    relationCommOutboundEdges = np.zeros((K_relation, K_entity))
    relationCommInboundEdges = np.zeros((K_relation, K_entity))
    
    return [numEntityNodes, numRelationNodes, numTotalNodes, outboundAdjacencyMatrix, inboundAdjacencyMatrix,
           K_entity, K_relation, commEntityMatrix, commRelationMatrix, entityCommOutboundEdges, 
           entityCommInboundEdges, relationCommOutboundEdges, relationCommInboundEdges,
           commsByEntity, commsByRelation]

[numEntityNodes, numRelationNodes, numTotalNodes, outboundAdjacencyMatrix, inboundAdjacencyMatrix, 
           K_entity, K_relation, commEntityMatrix, commRelationMatrix, entityCommOutboundEdges,
           entityCommInboundEdges, relationCommOutboundEdges, relationCommInboundEdges,commsByEntity, 
           commsByRelation] = initializeVariables()

In [None]:
# Randomize initial communities
def initializeAssignments():
    # Store the inboundEdges and outboundEdges for each community

In [10]:
# Define the algorithm for one iteration
def algoIteration():
    # calculate entity node penalties
    maxPenalty = -sys.maxint - 1
    entityNodeToChange = -1
    changeEntityNodeOutboundEdges = np.zeros(K_relation)
    changeEntityNodeInboundEdges = np.zeros(K_relation)
    
    # Calculate outbound edges and inbound edges for this node
    for entityNode in xrange(numEntityNodes):        
        curNodeOutboundEdges = np.zeros(K_relation)
        curNodeInboundEdges = np.zeros(K_relation)
        
        # We calculate its outbound and inbound edges to all relation communities
        outboundEdges = outboundAdjacencyMatrix[entityNode]
        for relationNode in outboundEdges:
            outboundComm = commsByRelation[relationNode]
            curNodeOutboundEdges[outboundComm] += 1
        
        inboundEdges = inboundAdjacencyMatrix[entityNode]
        for relationNode in inboundEdges:
            inboundComm = commsByRelation[relationNode]
            curNodeInboundEdges[inboundComm] += 1
        
        # Calculate the penalty for this entity node
        # Its current community is stored in commsByEntity
        curComm = commsByEntity[entityNode]
        curCommOutboundEdges = entityCommOutboundEdges[curComm]
        currCommInboundEdges = entityCommInboundEdges[curComm]
        
        # The penalty is the sum of squared differences between the mean outbound of this community and
        # current node outbound plus mean inbound minus current node inbound
        outboundDiff = np.sum(np.square(np.mean(currCommOutboundEdges) - curNodeOutboundEdges))
        inboundDiff = np.sum(np.square(np.mean(currCommInboundEdges) - curNodeInboundEdges))
        curPenalty = outboundDiff + inboundDiff
        
        if curPenalty > maxPenalty:
            maxPenalty = curPenalty
            entityNodeToChange = entityNode
            changeEntityNodeOutboundEdges = curNodeOutboundEdges
            changeEntityNodeInboundEdges = curNodeInboundEdges
            
    # Figure out where to change the node, but DON'T change it yet
    newCommForEntityBoolean = False
    minPenalty = sys.maxint
    newCommForEntity = -1
    for curComm in commEntityMatrix:
        curCommOutboundEdges = entityCommOutboundEdges[curComm]
        currCommInboundEdges = entityCommInboundEdges[curComm]
        
        # The penalty is the sum of squared differences between the mean outbound of this community and
        # current node outbound plus mean inbound minus current node inbound
        outboundDiff = np.sum(np.square(np.mean(currCommOutboundEdges) - changeEntityNodeOutboundEdges))
        inboundDiff = np.sum(np.square(np.mean(currCommInboundEdges) - changeEntityNodeInboundEdges))
        if (outboundDiff + inboundDiff) < minPenalty:
            minPenalty = outboundDiff + inboundDiff
            newCommForEntity = curComm
    # Check if it is actually a new community for this entity node
    if not newComm == commsByEntity[entityNodeToChange]:
        newCommForEntityBoolean = True
        
    # calculate relation node penalties
    maxPenalty = -sys.maxint - 1
    relationNodeToChange = -1
    changeRelationNodeOutboundEdges = np.zeros(K_entity)
    changeRelationNodeInboundEdges = np.zeros(K_entity)
    
    # Calculate outbound edges and inbound edges for this node
    for relationNode in xrange(numRelationNodes):        
        curNodeOutboundEdges = np.zeros(K_entity)
        curNodeInboundEdges = np.zeros(K_entity)
        
        # We calculate its outbound and inbound edges to all relation communities
        outboundEdges = outboundAdjacencyMatrix.T[relationNode]
        for entityNode in outboundEdges:
            outboundComm = commsByEntity[entityNode]
            curNodeOutboundEdges[outboundComm] += 1
        
        inboundEdges = inboundAdjacencyMatrix.T[relationNode]
        for entityNode in inboundEdges:
            inboundComm = commsByEntity[relationNode]
            curNodeInboundEdges[inboundComm] += 1
        
        # Calculate the penalty for this entity node
        # Its current community is stored in commsByEntity
        curComm = commsByRelation[relationNode]
        curCommOutboundEdges = relationCommOutboundEdges[curComm]
        currCommInboundEdges = relationCommInboundEdges[curComm]
        
        # The penalty is the sum of squared differences between the mean outbound of this community and
        # current node outbound plus mean inbound minus current node inbound
        outboundDiff = np.sum(np.square(np.mean(currCommOutboundEdges) - curNodeOutboundEdges))
        inboundDiff = np.sum(np.square(np.mean(currCommInboundEdges) - curNodeInboundEdges))
        curPenalty = outboundDiff + inboundDiff
        
        if curPenalty > maxPenalty:
            maxPenalty = curPenalty
            relationNodeToChange = relationNode
            changeRelationNodeOutboundEdges = curNodeOutboundEdges
            changeRelationNodeInboundEdges = curNodeInboundEdges
            
    # Figure out where to move the worst relation node
    newCommForRelationBoolean = False
    minPenalty = sys.maxint
    newCommForRelation = -1
    for curComm in commRelationMatrix:
        curCommOutboundEdges = relationCommOutboundEdges[curComm]
        currCommInboundEdges = relationCommInboundEdges[curComm]
        
        # The penalty is the sum of squared differences between the mean outbound of this community and
        # current node outbound plus mean inbound minus current node inbound
        outboundDiff = np.sum(np.square(np.mean(currCommOutboundEdges) - changeRelationNodeOutboundEdges))
        inboundDiff = np.sum(np.square(np.mean(currCommInboundEdges) - changeRelationNodeInboundEdges))
        if (outboundDiff + inboundDiff) < minPenalty:
            minPenalty = outboundDiff + inboundDiff
            newCommForRelation = curComm
    # Check if it is actually a new community for this entity node
    if not newComm == commsByRelation[relationNodeToChange]:
        newCommForRelationBoolean = True
        
    # Move the entity node and relation node to their new communities
    # Move entity node first
    entityComm = commsByEntity[entityNodeToChange]
    # Update the inbound/outbound edges on the entity side by subtracting from current comm and adding to new comm
    entityCommOutboundEdges[entityComm] -=  changeEntityNodeOutboundEdges
    entityCommInboundEdges[entityComm] -= changeEntityNodeInboundEdges
    entityCommOutboundEdges[newCommForEntity] += changeEntityNodeOutboundEdges
    entityCommInboundEdges[newCommForEntity] += changeEntityNodeInboundEdges


In [None]:
def moveNodes(entityNodeToChange, newCommForEntity, relationNodeToChange, newRelationForEntity):
    