In [205]:
# Setup + Helper Methods

import pandas as pd
import snap

# return list of graphs
def get_graph_database():
    G1 = snap.LoadEdgeList(snap.TUNGraph, "datasets/test-graphs/graph-A.txt", 0, 1)
    G2 = snap.LoadEdgeList(snap.TUNGraph, "datasets/test-graphs/graph-B.txt", 0, 1)
    G3 = snap.LoadEdgeList(snap.TUNGraph, "datasets/test-graphs/graph-C.txt", 0, 1)
    G4 = snap.LoadEdgeList(snap.TUNGraph, "datasets/test-graphs/graph-D.txt", 0, 1)
    graph_database = [G1, G2, G3, G4]   
    
    return graph_database


'''
NOTE: (get_all_node_supports) 
This method assumes that there are no label repitions in any of the graphs.
That is, none of the graphs have more than one node with ID x.
'''
# return dict with all nodes in g and their support
def get_all_node_supports(graph_database):
    NS = {}
    
    for graph in graph_database:
        for N in graph.Nodes():
            curr_node = N.GetId()
            if curr_node in NS:
                NS[curr_node] += 1
            else:
                NS[curr_node] = 1
    
    return NS

    
'''
NOTE: (get_all_edge_supports)
This methods takes duplication into account.
That is, the edges NodeX-NodeY and NodeY-NodeX are considered the same.
''' 
# return dict will all edges in g and their supports
def get_all_edge_supports(graph_database):
        ES = {}    
        
        for graph in graph_database:
            for E in graph.Edges():
                curr_edge = (E.GetSrcNId(), E.GetDstNId())
                curr_edge_flip = (E.GetDstNId(), E.GetSrcNId)
                if curr_edge in ES:
                    ES[curr_edge] += 1
                elif curr_edge_flip in ES:
                    ES[curr_edge_flip] += 1
                else:    
                    ES[curr_edge] = 1
        
        return ES
    
    
# print snap TUNGraph
def print_graph(graph):
    if graph.GetEdges() == 0:
        for N in graph.Nodes():
            curr_node = N.GetId()
            print(curr_node)
    else:
        for E in graph.Edges():
            curr_edge = (E.GetSrcNId(), E.GetDstNId())
            print(curr_edge)
            
            
# print subgraphs and their supports / print dict
def print_dict(D, opt):
    if opt == "graph":
        graph_res = []
        
        for graph in D.keys():
            if graph.GetEdges() == 0:
                for N in graph.Nodes():
                    graph_res.append(N.GetId())
            else:        
                for E in graph.Edges():
                    curr_edge = (E.GetSrcNId(), E.GetDstNId())
                    graph_res.append(curr_edge)
                    
            print("Graph: {}, Support: {}".format(graph_res, D[graph]))
            graph_res = []
        
    else:
        for key in D.keys():
            print("Key : {} , Value : {}".format(key, D[key]))
            

In [325]:
# Node-Based Join Growth

'''
To Do:
def check_downward_closure(candidate): 
''' 
def subgraph_match(Gq, G):
    nmE_f = False
    nmN_f = False
    nmE = None
    nmN = None
    res = False

    for E in Gq.Edges():
        if not G.IsEdge(E.GetSrcNId(), E.GetDstNId()):
            if nmE_f:
                nmE_f = False
                break
            else:
                nmE = (E.GetSrcNId(), E.GetDstNId())
                nmE_f = True
    
    if nmE_f:
        for N in Gq.Nodes():
            if not G.IsNode(N.GetId()):
                if nmN_f:
                    nmN_f = False
                    break
                else:
                    nmN = N.GetId()
                    if nmN in nmE:
                        nmN_f = True
                    else:
                        break
                        
    if nmN_f and nmE_f:
        res = True
    
    return res, nmE, nmN
                
        
# return dict with frequent singleton graphs and their supports
def get_frequent_singleton_graphs(NS, minsup):
    F1 = {}
    
    for N in NS:
        if NS[N] >= minsup:
            subgraph = snap.TUNGraph.New() # create new graph
            subgraph.AddNode(N) # add frequent node
            F1[subgraph] = NS[N] # graph support = node support
    
    return F1

# return candidate by joining singletons
def join_singletons(subgraph1, subgraph2):
    c = snap.TUNGraph.New()
    c = snap.ConvertGraph(type(subgraph1), subgraph1)
    
    c.AddNode(subgraph2.BegNI().GetId()) # add subgraph2 node to subgraph1
    c.AddEdge(subgraph1.BegNI().GetId(), subgraph2.BegNI().GetId()) # add edge between nodes
        
    return c
    
               
# return candidates by performing node-based joins    
def join_subgraphs(subgraph1, subgraph2, nmE, nmN):
    
#     print("Subgraph1")
#     print_graph(subgraph1)
#     print("Subgraph2")
#     print_graph(subgraph2)
    
    for N in subgraph2.Nodes():
        if not subgraph1.IsNode(N.GetId()):
            nmN_s2 = N.GetId()
    
    c1 = snap.TUNGraph.New()
    c1 = snap.ConvertGraph(type(subgraph2), subgraph2) # copy subgraph 1
    c1.AddNode(nmN)
    c1.AddEdge(nmE[0], nmE[1])
    
    c2 = snap.TUNGraph.New()
    c2 = snap.ConvertGraph(type(c1), c1)
    c2.AddEdge(nmN, nmN_s2)

    print("Joined Candidate 1:")
    print_graph(c1)
    print("Joined Candidate 2:")
    print_graph(c2)
    
    return c1, c2


'''
NOTE: (generate_Fkplus1)
There is something wrong in this method.
'''
def generate_Fkplus1(C, g, minsup):
    Fkplus1 = {}
    candidate_is_subgraph = True
    support = 0
    
    for candidate in C:
        
        for graph in g:
            
            for N in candidate.Nodes():
                if not graph.IsNode(N.GetId()):
                    candidate_is_subgraph = False
                    break
                    
            if candidate_is_subgraph:        
                for E in candidate.Edges():
                    if not graph.IsEdge(E.GetSrcNId(), E.GetDstNId()):
                        candidate_is_subgraph = False
                        break
                        
            if candidate_is_subgraph:
                support += 1
            
            candidate_is_subgraph = True
    
        if support >= minsup:
            Fkplus1[candidate] = support
        
        support = 0
        
    print("Fkplus1: ")
    print_dict(Fkplus1, "graph")
    return Fkplus1

'''
NOTE: (generate_candidates)
Method is incomplete i.e. no optimization/pruning of candidates'
'''
def generate_candidates(Fk, k):
    candidates = []
    
    for i in range(0, len(Fk)):
        sG1 = Fk[i]
        
        for j in range(i+1, len(Fk)):
            sG2 = Fk[j]
            if k == 2:
                c = join_singletons(sG1, sG2)
                candidates.append(c)

            else:
                match, nmE, nmN = subgraph_match(sG1, sG2)
                print(match)
                print("sG1: ")
                print_graph(sG1)
                print("sG2: ")
                print_graph(sG2)
                if match:
                    c1, c2 = join_subgraphs(sG1, sG2, nmE, nmN)
                    for _c in c1, c2:
                        candidates.append(_c)
                
    return candidates


# return dict with frequent subgraphs and support
def node_based_join_growth(g, minsup):
    
    # NS = { [(NodeId) : Support] }
    NS = get_all_node_supports(g)
    
    # F1 = { All frequent singleton graphs }
    Fk = get_frequent_singleton_graphs(NS, minsup) # frequent k subgraphs
    k = 1
    
    FsG = {} # all frequent subgraphs
    C = [] # candidates from Fk
    
    # Apriori Algorithm
    while(True):
        C = generate_candidates(list(Fk.keys()), k+1)
        Fk = generate_Fkplus1(C, g, minsup)
        
        # end if no more frequent subgraphs
        if not Fk: 
            break
            
        FsG.update(Fk) # append all frequent subgraphs
        
        k = k + 1
    
    return FsG

In [328]:
# Main

# minsup = minimum support
minsup = 2

# g = [G1, G2, G3 ... G10] 
g = get_graph_database()

# Skaramoosh
FsG = node_based_join_growth(g, minsup)

print("FsG: ")
print_dict(FsG, "graph")

for G in FsG.keys():
    if G.GetEdges() > 3:
        print_graph(G)
        print("Support: {}".format(FsG[G]))

Fkplus1: 
Graph: [(1, 5)], Support: 4
Graph: [(1, 12)], Support: 4
Graph: [(5, 6)], Support: 4
Graph: [(4, 12)], Support: 4
Graph: [(13, 19)], Support: 4
Graph: [(19, 20)], Support: 4
Graph: [(3, 8)], Support: 4
Graph: [(3, 9)], Support: 4
Graph: [(3, 11)], Support: 4
Graph: [(8, 9)], Support: 4
Graph: [(4, 6)], Support: 4
Graph: [(13, 18)], Support: 4
Graph: [(18, 20)], Support: 4
Graph: [(89, 100)], Support: 2
True
sG1: 
(1, 5)
sG2: 
(1, 12)
Joined Candidate 1:
(1, 5)
(1, 12)
Joined Candidate 2:
(1, 5)
(1, 12)
(5, 12)
True
sG1: 
(1, 5)
sG2: 
(5, 6)
Joined Candidate 1:
(5, 6)
(1, 5)
Joined Candidate 2:
(5, 6)
(1, 5)
(1, 6)
False
sG1: 
(1, 5)
sG2: 
(4, 12)
False
sG1: 
(1, 5)
sG2: 
(13, 19)
False
sG1: 
(1, 5)
sG2: 
(19, 20)
False
sG1: 
(1, 5)
sG2: 
(3, 8)
False
sG1: 
(1, 5)
sG2: 
(3, 9)
False
sG1: 
(1, 5)
sG2: 
(3, 11)
False
sG1: 
(1, 5)
sG2: 
(8, 9)
False
sG1: 
(1, 5)
sG2: 
(4, 6)
False
sG1: 
(1, 5)
sG2: 
(13, 18)
False
sG1: 
(1, 5)
sG2: 
(18, 20)
False
sG1: 
(1, 5)
sG2: 
(89, 100)
Fal

(3, 8)
(3, 9)
Joined Candidate 1:
(8, 9)
(3, 8)
(3, 9)
(3, 11)
Joined Candidate 2:
(8, 9)
(9, 11)
(3, 8)
(3, 9)
(3, 11)
False
sG1: 
(3, 8)
(3, 11)
sG2: 
(18, 20)
(13, 18)
False
sG1: 
(8, 9)
(3, 8)
sG2: 
(8, 9)
(3, 8)
(3, 9)
False
sG1: 
(8, 9)
(3, 8)
sG2: 
(3, 9)
(3, 11)
False
sG1: 
(8, 9)
(3, 8)
sG2: 
(8, 9)
(3, 9)
False
sG1: 
(8, 9)
(3, 8)
sG2: 
(8, 9)
(3, 8)
(3, 9)
False
sG1: 
(8, 9)
(3, 8)
sG2: 
(18, 20)
(13, 18)
False
sG1: 
(8, 9)
(3, 8)
(3, 9)
sG2: 
(3, 9)
(3, 11)
False
sG1: 
(8, 9)
(3, 8)
(3, 9)
sG2: 
(8, 9)
(3, 9)
False
sG1: 
(8, 9)
(3, 8)
(3, 9)
sG2: 
(8, 9)
(3, 8)
(3, 9)
False
sG1: 
(8, 9)
(3, 8)
(3, 9)
sG2: 
(18, 20)
(13, 18)
True
sG1: 
(3, 9)
(3, 11)
sG2: 
(8, 9)
(3, 9)
Joined Candidate 1:
(8, 9)
(3, 9)
(3, 11)
Joined Candidate 2:
(8, 9)
(8, 11)
(3, 9)
(3, 11)
True
sG1: 
(3, 9)
(3, 11)
sG2: 
(8, 9)
(3, 8)
(3, 9)
Joined Candidate 1:
(8, 9)
(3, 8)
(3, 9)
(3, 11)
Joined Candidate 2:
(8, 9)
(8, 11)
(3, 8)
(3, 9)
(3, 11)
False
sG1: 
(3, 9)
(3, 11)
sG2: 
(18, 20)
(13, 18)
False
sG

(3, 8)
(3, 9)
(3, 11)
False
sG1: 
(18, 20)
(19, 20)
(13, 19)
sG2: 
(8, 9)
(3, 8)
(3, 11)
False
sG1: 
(18, 20)
(19, 20)
(13, 19)
sG2: 
(8, 9)
(3, 8)
(3, 9)
(3, 11)
False
sG1: 
(18, 20)
(19, 20)
(13, 19)
sG2: 
(3, 8)
(3, 9)
(3, 11)
False
sG1: 
(18, 20)
(19, 20)
(13, 19)
sG2: 
(3, 8)
(3, 9)
(3, 11)
(8, 9)
False
sG1: 
(18, 20)
(19, 20)
(13, 19)
sG2: 
(8, 9)
(3, 8)
(3, 9)
(3, 11)
False
sG1: 
(18, 20)
(19, 20)
(13, 19)
sG2: 
(8, 9)
(3, 9)
(3, 11)
False
sG1: 
(18, 20)
(19, 20)
(13, 19)
sG2: 
(8, 9)
(3, 8)
(3, 9)
(3, 11)
False
sG1: 
(18, 20)
(19, 20)
(13, 18)
(13, 19)
sG2: 
(18, 20)
(13, 18)
(13, 19)
False
sG1: 
(18, 20)
(19, 20)
(13, 18)
(13, 19)
sG2: 
(18, 20)
(13, 18)
(13, 19)
(19, 20)
False
sG1: 
(18, 20)
(19, 20)
(13, 18)
(13, 19)
sG2: 
(18, 20)
(13, 18)
(19, 20)
False
sG1: 
(18, 20)
(19, 20)
(13, 18)
(13, 19)
sG2: 
(18, 20)
(13, 18)
(13, 19)
(19, 20)
False
sG1: 
(18, 20)
(19, 20)
(13, 18)
(13, 19)
sG2: 
(3, 8)
(3, 9)
(3, 11)
False
sG1: 
(18, 20)
(19, 20)
(13, 18)
(13, 19)
sG2: 
(3, 8)
(3

(1, 5)
(1, 12)
sG2: 
(4, 6)
(4, 12)
(5, 6)
(1, 12)
False
sG1: 
(4, 6)
(4, 12)
(5, 6)
(1, 5)
(1, 12)
sG2: 
(4, 6)
(4, 12)
(5, 6)
(1, 5)
(1, 12)
False
sG1: 
(4, 6)
(4, 12)
(5, 6)
(1, 12)
sG2: 
(4, 6)
(4, 12)
(5, 6)
(1, 5)
(1, 12)
Fkplus1: 
FsG: 
Graph: [(1, 5)], Support: 4
Graph: [(1, 12)], Support: 4
Graph: [(5, 6)], Support: 4
Graph: [(4, 12)], Support: 4
Graph: [(13, 19)], Support: 4
Graph: [(19, 20)], Support: 4
Graph: [(3, 8)], Support: 4
Graph: [(3, 9)], Support: 4
Graph: [(3, 11)], Support: 4
Graph: [(8, 9)], Support: 4
Graph: [(4, 6)], Support: 4
Graph: [(13, 18)], Support: 4
Graph: [(18, 20)], Support: 4
Graph: [(89, 100)], Support: 2
Graph: [(1, 5), (1, 12)], Support: 4
Graph: [(5, 6), (1, 5)], Support: 4
Graph: [(4, 12), (1, 12)], Support: 4
Graph: [(4, 6), (5, 6)], Support: 4
Graph: [(4, 6), (4, 12)], Support: 4
Graph: [(19, 20), (13, 19)], Support: 4
Graph: [(13, 18), (13, 19)], Support: 4
Graph: [(18, 20), (19, 20)], Support: 4
Graph: [(3, 8), (3, 9)], Support: 4
Graph: [(3

In [274]:
# Trials and Testing

subgraph1 = snap.TUNGraph.New()
subgraph1.AddNode(1)
subgraph1.AddNode(6)
subgraph1.AddNode(7)
subgraph1.AddEdge(1, 6)
subgraph1.AddEdge(1, 7)

subgraph2 = snap.TUNGraph.New()
subgraph2.AddNode(1)
subgraph2.AddNode(12)
subgraph2.AddNode(5)
subgraph2.AddEdge(1, 12)
subgraph2.AddEdge(1, 5)

res = subgraph_match(subgraph1, subgraph2)

print(res)

(False, (1, 6))


In [290]:
NS = get_all_node_supports(g)
F1 = get_frequent_singleton_graphs(NS, minsup)
C = generate_candidates(list(F1.keys()), 2)

print_dict(F1, "graph")
print(len(C))
for c in C:
    print_graph(c)

Graph: [1], Support: 4
Graph: [5], Support: 4
Graph: [12], Support: 4
Graph: [19], Support: 4
Graph: [3], Support: 4
Graph: [8], Support: 4
Graph: [9], Support: 4
Graph: [11], Support: 4
Graph: [4], Support: 4
Graph: [6], Support: 4
Graph: [15], Support: 2
Graph: [13], Support: 4
Graph: [48], Support: 4
Graph: [18], Support: 4
Graph: [20], Support: 4
Graph: [23], Support: 2
Graph: [100], Support: 2
Graph: [89], Support: 3
153
(1, 5)
(1, 12)
(1, 19)
(1, 3)
(1, 8)
(1, 9)
(1, 11)
(1, 4)
(1, 6)
(1, 15)
(1, 13)
(1, 48)
(1, 18)
(1, 20)
(1, 23)
(1, 100)
(1, 89)
(5, 12)
(5, 19)
(3, 5)
(5, 8)
(5, 9)
(5, 11)
(4, 5)
(5, 6)
(5, 15)
(5, 13)
(5, 48)
(5, 18)
(5, 20)
(5, 23)
(5, 100)
(5, 89)
(12, 19)
(3, 12)
(8, 12)
(9, 12)
(11, 12)
(4, 12)
(6, 12)
(12, 15)
(12, 13)
(12, 48)
(12, 18)
(12, 20)
(12, 23)
(12, 100)
(12, 89)
(3, 19)
(8, 19)
(9, 19)
(11, 19)
(4, 19)
(6, 19)
(15, 19)
(13, 19)
(19, 48)
(18, 19)
(19, 20)
(19, 23)
(19, 100)
(19, 89)
(3, 8)
(3, 9)
(3, 11)
(3, 4)
(3, 6)
(3, 15)
(3, 13)
(3, 48)
(3