In [186]:
# Setup + Helper Methods

import pandas as pd
import snap

# return list of graphs
def get_graph_database():
    G1 = snap.LoadEdgeList(snap.TUNGraph, "datasets/test-graphs/graph-A.txt", 0, 1)
    G2 = snap.LoadEdgeList(snap.TUNGraph, "datasets/test-graphs/graph-B.txt", 0, 1)
    G3 = snap.LoadEdgeList(snap.TUNGraph, "datasets/test-graphs/graph-C.txt", 0, 1)
    G4 = snap.LoadEdgeList(snap.TUNGraph, "datasets/test-graphs/graph-D.txt", 0, 1)
    graph_database = [G1, G2, G3, G4]   
    
    return graph_database


'''
NOTE: (get_all_node_supports) 
This method assumes that there are no label repitions in any of the graphs.
That is, none of the graphs have more than one node with ID x.
'''
# return dict with all nodes in g and their support
def get_all_node_supports(graph_database):
    NS = {}
    
    for graph in graph_database:
        for N in graph.Nodes():
            curr_node = N.GetId()
            if curr_node in NS:
                NS[curr_node] += 1
            else:
                NS[curr_node] = 1
    
    return NS

    
'''
NOTE: (get_all_edge_supports)
This methods takes duplication into account.
That is, the edges NodeX-NodeY and NodeY-NodeX are considered the same.
''' 
# return dict will all edges in g and their supports
def get_all_edge_supports(graph_database):
        ES = {}    
        
        for graph in graph_database:
            for E in graph.Edges():
                curr_edge = (E.GetSrcNId(), E.GetDstNId())
                curr_edge_flip = (E.GetDstNId(), E.GetSrcNId)
                if curr_edge in ES:
                    ES[curr_edge] += 1
                elif curr_edge_flip in ES:
                    ES[curr_edge_flip] += 1
                else:    
                    ES[curr_edge] = 1
        
        return ES
    
    
# print snap TUNGraph
def print_graph(graph):
    if graph.GetEdges() == 0:
        for N in graph.Nodes():
            curr_node = (N)
            print(curr_Node)
    else:
        for E in graph.Edges():
            curr_edge = (E.GetSrcNId(), E.GetDstNId())
            print(curr_edge)
            
            
# print subgraphs and their supports / print dict
def print_dict(D, opt):
    if opt == "graph":
        graph_res = []
        
        for graph in D.keys():
            if graph.GetEdges() == 0:
                for N in graph.Nodes():
                    graph_res.append(N.GetId())
            else:        
                for E in graph.Edges():
                    curr_edge = (E.GetSrcNId(), E.GetDstNId())
                    graph_res.append(curr_edge)
                    
            print("Graph: {}, Support: {}".format(graph_res, D[graph]))
            graph_res = []
        
    else:
        for key in D.keys():
            print("Key : {} , Value : {}".format(key, D[key]))
            

In [183]:
# Node-Based Join Growth

'''
To Do:
def subgraph_match(Gq, G): Ullman's Algorithm
def check_downward_closure(candidate): 
''' 

# return dict with frequent singleton graphs and their supports
def get_frequent_singleton_graphs(NS, minsup):
    F1 = {}
    
    for N in NS:
        if NS[N] >= minsup:
            subgraph = snap.TUNGraph.New() # create new graph
            subgraph.AddNode(N) # add frequent node
            F1[subgraph] = NS[N] # graph support = node support
    
    return F1

# return candidate by joining singletons
def join_singletons(subgraph1, subgraph2):
    subgraph1.AddNode(subgraph2.BegNI().GetId()) # add subgraph2 node to subgraph1
    subgraph1.AddEdge(subgraph1.BegNI().GetId(), subgraph2.BegNI().GetId()) # add edge between nodes
        
    return subgraph1
    
               
# return candidates by performing node-based joins    
def join_subgraphs(subgraph1, subgraph2):
   
    # get last edge of each subgraph
    for E in subgraph1.Edges():
        s1_last_E = (E.GetSrcNId(), E.GetDstNId())
    for E in subgraph2.Edges():
        s2_last_E = (E.GetSrcNId(), E.GetDstNId())

    subgraph1.AddNode(s2_last_E[1]) # add destNode of subgraph2's last edge to subgraph1
    subgraph1.AddEdge(s2_last_E[0], s2_last_E[1]) # add edge between last node and new node
    c1 = snap.ConvertGraph(type(subgraph1), subgraph1) # save as candidate 1

    subgraph1.AddEdge(s1_last_E[1], s2_last_E[1]) # add edge between unmatching node and new node
    c2 = subgraph1 # save as candidate 2
    
    return c1, c2

def generate_Fkplus1(C, g, minsup):
    Fkplus1 = {}
    candidate_is_subgraph = True
    support = 0
    
    for candidate in C:
        
        for graph in g:
            
            for N in candidate.Nodes():
                if not graph.IsNode(N.GetId()):
                    candidate_is_subgraph = False
                    break
                    
            if candidate_is_subgraph:        
                for E in candidate.Edges():
                    if not graph.IsEdge(E.GetSrcNId(), E.GetDstNId()):
                        candidate_is_subgraph = False
                        break
                        
            if candidate_is_subgraph:
                support += 1
            
            candidate_is_subgraph = True
    
        if support >= minsup:
            Fkplus1[candidate] = support
        
        support = 0
    
    return Fkplus1

'''
NOTE: (generate_candidates)
Method is incomplete i.e. no optimization/pruning of candidates'
'''
def generate_candidates(Fk, k):
    candidates = []
    
    for i in range(0, len(Fk)):
        sG1 = Fk[i]
        
        for j in range(i+1, len(Fk)):
            sG2 = Fk[j]
            
            if k == 2:
                c = join_singletons(sG1, sG2)
                candidates.append(c)
                
            else:
                # if subgraph_match(sG1, sG2, k):
                c1, c2 = join_subgraphs(sG1, sG2)

                for c in c1, c2:
                    # if downward_closure(c):
                    candidates.append(c)
                
    return candidates


# return dict with frequent subgraphs and support
def node_based_join_growth(g, minsup):
    
    # NS = { [(NodeId) : Support] }
    NS = get_all_node_supports(g)
    
    # F1 = { All frequent singleton graphs }
    Fk = get_frequent_singleton_graphs(NS, minsup) # frequent k subgraphs
    k = 1
    
    FsG = Fk # all frequent subgraphs
    C = [] # candidates from Fk
    
    # Apriori Algorithm
    while(True):
        C = generate_candidates(list(Fk.keys()), k+1)
        Fk = generate_Fkplus1(C, g, minsup)
        
        # end if no more frequent subgraphs
        if not Fk: 
            break
            
        FsG.update(Fk) # append all frequent subgraphs
        
        k = k + 1
    
    return FsG

In [None]:
# Main

# minsup = minimum support
minsup = 2

# g = [G1, G2, G3 ... G10] 
g = get_graph_database()

# Skaramoosh
FsG = node_based_join_growth(g, minsup)

print_dict(FsG, "graph")

In [None]:
# Trials and Testing

subgraph1 = snap.TUNGraph.New()
subgraph1.AddNode(1)
subgraph1.AddNode(5)
subgraph1.AddEdge(1, 5)

subgraph2 = snap.TUNGraph.New()
subgraph2.AddNode(1)
subgraph2.AddNode(12)
subgraph2.AddEdge(1, 12)