In [1]:
import networkx as nx
import DBTool as DB

#import EventLog
from pm4py.objects.log.importer.xes import factory as xes_importer
log = xes_importer.import_log('dummy.xes')

#Create tables from EventLog
db = DB.DBTool(log)

#Get subgraphs by removing edges with ['weight'] > treshold but keeping those with ['weight'] == -1
#Return a list of subgraphs
def clusterDetection(G, treshold):
    
    filteredEdges = [(u, v) for (u, v, d) in G.edges(data=True) if (d['weight'] > treshold and d['weight'] != -1)]
    G.remove_edges_from(filteredEdges)
    
    return [nx.Graph(G.subgraph(c)) for c in nx.k_edge_subgraphs(G, k=1)] #or also use nx.connected_components(G)

#For each subgraph relabel candidateLabels
#Return a list of subgraphs with relabeled nodes
def horizontalRefinement(candidateLabels, graphList):
    
    for i, subgraph in enumerate(graphList, start = 1):  
        for cn,_ in filter(lambda d: d[1]['curLabel'] in candidateLabels, subgraph.nodes(data=True)):
            subgraph.node[cn]['newLabel'] += str(i) 

    return graphList

#Dummy EventVariantTable
db.getEventVariantTable()

Unnamed: 0_level_0,VariantID,Position,Event
EventID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,0,a
1,0,1,b
2,0,2,c
3,1,0,a
4,1,1,b
5,1,2,b
6,2,0,a
7,2,1,c
8,2,2,c


In [2]:
#Dummy tests Graph
G = nx.Graph()

G.add_nodes_from([(1, {'curLabel':'a', 'newLabel':'a'}),(2,{'curLabel':'b', 'newLabel':'b'}), (3,{'curLabel':'c', 'newLabel':'c'}),
                  (4,{'curLabel':'a', 'newLabel':'a'}), (5,{'curLabel':'b', 'newLabel':'b'}), (6,{'curLabel':'b', 'newLabel':'b'}),
                  (7,{'curLabel':'a', 'newLabel':'a'}), (8,{'curLabel':'c', 'newLabel':'c'}), (9,{'curLabel':'c', 'newLabel':'c'}),
                 ])

G.add_edges_from([(1, 4, {'weight': 0}), (2, 5, {'weight': 0}), (3, 8, {'weight': 0.7}), 
                  (1, 7, {'weight': 0.7}), (4, 7, {'weight': 0.7})])

G.add_edges_from([(1, 2, {'weight': -1}), (2, 3, {'weight': -1}), 
                  (4, 5, {'weight': -1}), (5, 6, {'weight': -1}),
                  (7, 8, {'weight': -1}), (8, 9, {'weight': -1})])

#Cluster detection implementation
subgraphs = clusterDetection(G,0.2)

#Horizonal refinement implementation
for g in horizontalRefinement(['a','b'], subgraphs):
    print(g.nodes(data=True), "\n")


[(1, {'curLabel': 'a', 'newLabel': 'a1'}), (2, {'curLabel': 'b', 'newLabel': 'b1'}), (3, {'curLabel': 'c', 'newLabel': 'c'}), (4, {'curLabel': 'a', 'newLabel': 'a1'}), (5, {'curLabel': 'b', 'newLabel': 'b1'}), (6, {'curLabel': 'b', 'newLabel': 'b1'})] 

[(8, {'curLabel': 'c', 'newLabel': 'c'}), (9, {'curLabel': 'c', 'newLabel': 'c'}), (7, {'curLabel': 'a', 'newLabel': 'a2'})] 



In [3]:
#Get Connected components given a subgraph G
#Return a dictionary with the form {label: [{comp1},{comp2}...]}
def connectedComponents(G, candidateLabels):
    
    #Remove edges with 'weight' == -1
    G.remove_edges_from([(u,v) for (u,v,d) in G.edges(data=True) if d['weight'] == -1])
    
    #1st find nodes with candidate labels, 2nd find connected components of each node, 3rd remove duplicate connected components 
    #since two connected nodes may have equal connected components   
    return {label : [list(cc) for cc in set([tuple(nx.node_connected_component(G, cnode[0]))
                                 for cnode in filter(lambda node: node[1]['curLabel'] == label, G.nodes(data=True))])]
                                     for label in candidateLabels}


#Get the size of the largest component given a connectedComponents dictionary
#Return a dictionary with the form {label: maxSize[{comp1},{comp2},...]}
def sizelargestComponent(connectedComponents):  
    return {label: len(max(cc, key=len, default=[])) 
                for label, cc in connectedComponents.items()}

#Get the average position of the events for a given connectedComponent, i.e., #Gi
#Return a list with the avg position [[avgPosComp1],[avgPosComp2],...]
def averagePosition(Gi, db):    
    return [sum(map(lambda eID: getPosition(eID,db), nodes))/len(nodes) 
                for nodes in Gi]

#Get the position of an event given its eventID
def getPosition(eID, db):   
    event = db.getEventByID(eID)
    return event.Position
    
#Sort the Connected components in ascending order
#Return a dictionary with sorted components having the form {label: [{comp1},{comp2}...]}
def sortConectedComponents(connectedComponents, db):   
    #sortCC = {event: sorted(zip(cc,averagePosition(cc,db)), key = lambda d: d[1]) 
     #           for event, cc in connectedComponents.items()}
        
    sortCC = {event: list(map(lambda d: d[0], sorted(zip(cc,averagePosition(cc,db)), key = lambda d: d[1])))
                 for event, cc in connectedComponents.items()}
    
    return  sortCC


#For each subgraph relabel candidateLabels according to the paper
def verticalRefinement(graphList, candidateLabels, db, threshold):
    
    for subgraph in graphList:
        cc = connectedComponents(subgraph, candidateLabels)
        cc = sortConectedComponents(cc, db)
        mSize = sizelargestComponent(cc)
        
        for event, nG in cc.items():
            for i,G in enumerate(nG, start = 1):
                for cn in G:
                    if i == 1 or len(G) >= threshold * mSize[event]:
                        subgraph.node[cn]['newLabel'] += '.' + str(i)
                        prevLabel = subgraph.node[cn]['newLabel']
                    else:
                        subgraph.node[cn]['newLabel'] = prevLabel
            prevLabel = '' 
                        
    return graphList
        

for g in verticalRefinement(subgraphs, ['a','b'], db, 0.3):
    print(g.nodes(data=True), "\n")

[(1, {'curLabel': 'a', 'newLabel': 'a1.1'}), (2, {'curLabel': 'b', 'newLabel': 'b1.2'}), (3, {'curLabel': 'c', 'newLabel': 'c'}), (4, {'curLabel': 'a', 'newLabel': 'a1.1'}), (5, {'curLabel': 'b', 'newLabel': 'b1.2'}), (6, {'curLabel': 'b', 'newLabel': 'b1.1'})] 

[(8, {'curLabel': 'c', 'newLabel': 'c'}), (9, {'curLabel': 'c', 'newLabel': 'c'}), (7, {'curLabel': 'a', 'newLabel': 'a2.1'})] 

