In [327]:
from collections import defaultdict 

In [328]:
#This class represents a directed graph using adjacency list representation 
class Graph: 
   
    def __init__(self): 
        self.graph = defaultdict(set) # default dictionary to store graph 
        #self.T = Tree() #propagation tree
        
    def get_vertices(self):
        return(set(self.graph.keys()))
   
    # function to add an edge to graph 
    def addEdge(self,u,v, bidirection = True): 
        
        self.graph[u] |= {v}
        if bidirection:
            self.graph[v] |= {u} #bidirectional
   
    def fillOrder(self,v,visited, stack): 
        # Mark the current node as visited  
        visited[v]= True
        #Recur for all the vertices adjacent to this vertex 
        for i in self.graph[v]: 
            if visited[i]==False: 
                self.fillOrder(i, visited, stack) 
        stack = stack.append(v) 
      
   
    def Min_Selection_Step(self):
                
        
        stack = [] 
        visited ={x:False for x in self.get_vertices()}
        for i in self.get_vertices(): 
            if visited[i]==False: 
                self.fillOrder(i, visited, stack) 
          
        visited ={x:False for x in self.get_vertices()}
      
        H = Graph()
                
        while stack: 
            u = stack.pop() 
            if visited[u]==False: 
                NN_G_vertice = self.graph[u]
                v_min = min(NN_G_vertice | {u})
            
                for v in NN_G_vertice | {u}:

                    H.addEdge(v, v_min, bidirection = False)
                                         
        return(H)
    
    def Pruning_Step(self, tree): 
        stack = [] 
        visited ={x:False for x in self.get_vertices()}

        for i in self.get_vertices(): 
            if visited[i]==False: 
                self.fillOrder(i, visited, stack) 

          
        visited ={x:False for x in self.get_vertices()}
        G_next = Graph() 
        
        while stack: 
            u = stack.pop() 
            if visited[u]==False:
                
                NN_H_vertice = self.graph[u]
                v_min = min(NN_H_vertice)
                if len(NN_H_vertice) > 1:
                    
                    for v in (NN_H_vertice - {v_min}):
                        G_next.addEdge(v_min, v, bidirection = True)
                
                if u not in NN_H_vertice:
                    tree.addEdge(v_min, u)
                    tree.setSeed(v_min, u)
        
        return(G_next)
                        


In [407]:
class Tree:
    def __init__(self): 
        self.tree = defaultdict(set) 
        self.seed = set()
    def addEdge(self,u,v):    
        self.tree[u] |= {v}   
    def setSeed(self,u,v):
        self.seed |= {u}
        self.seed -= {v}   
    def getChildrenList(self):
        return(list(self.tree.values()))
    def getParentList(self):
        return(list(self.tree.keys()))
    def getSeed(self,u): #find the root of the leaf u
        if u in self.seed:

            return u
        
        parentFound = False
        n = 0
        while (not parentFound) and (n < len(self.getChildrenList())) :
            if u in self.getChildrenList()[n]:
                parent = self.getParentList()[n]
                parentFound = True
            else:
                n += 1
                
        if not parentFound:
            return(u)
        else:
            if parent in self.seed:
                return parent
            else:
                #print(parent)
                getSeed(self, parent)  

In [415]:
# Create a graph given in the above diagram 
g = Graph() 
g.addEdge(0, 1) 
g.addEdge(1, 2) 
g.addEdge(2, 5) 
g.addEdge(5, 8) 
g.addEdge(7, 8) 
g.addEdge(3, 7) 
g.addEdge(3, 4) 
g.addEdge(3, 6) 

In [416]:
g.graph #by default diretional

defaultdict(set,
            {0: {1},
             1: {0, 2},
             2: {1, 5},
             3: {4, 6, 7},
             4: {3},
             5: {2, 8},
             6: {3},
             7: {3, 8},
             8: {5, 7}})

In [417]:
def Cracker_1(G):
    tree = Tree()
    t = 0
    #print("G1:", G.graph)
    while len(G.graph) > 0:
        t += 1
        H = G.Min_Selection_Step()
        #print("H{t}:".format(t = t), H.graph)
        G = H.Pruning_Step(tree)
        #print("Tree seed:", tree.seed)
        #print("G{t}:".format(t = t+1), G.graph)
        
    #print(tree.tree)
    return(tree)

In [418]:
Cracker_1(g).tree

defaultdict(set, {0: {1, 2, 3, 5}, 2: {8}, 3: {4, 6, 7}})

In [419]:
import os

In [420]:
os.listdir('graph_datasets')

['.DS_Store',
 'simulated_blockmodel_graph_100_nodes.tsv',
 'soc-sign-bitcoinalpha.csv',
 'facebook',
 'as-skitter.txt.gz',
 'btc_01.txt',
 'btc_02.txt']

In [421]:
os.path.getsize('graph_datasets/simulated_blockmodel_graph_100_nodes.tsv')

6096

In [422]:
G_100 = Graph() 
for line in open('graph_datasets/simulated_blockmodel_graph_100_nodes.tsv',"r"):
    node_1, node_2, c = line.strip().split("\t")
    G_100.addEdge(int(node_1), int(node_2))
    G_100.addEdge(int(node_1) + 100, int(node_2) + 100)
    G_100.addEdge(int(node_1) + 200, int(node_2) + 200)
    G_100.addEdge(int(node_1) + 300, int(node_2) + 300)
    G_100.addEdge(int(node_1) + 400, int(node_2) + 400)
    G_100.addEdge(int(node_1) + 500, int(node_2) + 500)
    G_100.addEdge(int(node_1) + 600, int(node_2) + 600)

In [425]:
Cracker_1(G_100).tree

defaultdict(set,
            {1: {2,
              3,
              4,
              5,
              6,
              7,
              8,
              9,
              10,
              11,
              12,
              13,
              14,
              15,
              16,
              18,
              19,
              22,
              26,
              27,
              28,
              30,
              31,
              32,
              33,
              35,
              36,
              37,
              42,
              43,
              46,
              48,
              49,
              50,
              53,
              55,
              57,
              58,
              60,
              63,
              64,
              65,
              66,
              67,
              69,
              71,
              72,
              77,
              79,
              80,
              81,
              82,
              84,
              85,
              88

In [339]:
os.path.getsize('graph_datasets/facebook/107.edges')

523802

In [401]:
G_fb = Graph()
for line in open('graph_datasets/facebook/107.edges',"r"): 
    node_1, node_2 = line.strip().split(" ")
    G_fb.addEdge(int(node_1), int(node_2))
 

In [402]:
G_fb.graph

defaultdict(set,
            {953: {484,
              897,
              932,
              978,
              993,
              995,
              997,
              1029,
              1040,
              1047,
              1056,
              1059,
              1078,
              1083,
              1086,
              1107,
              1117,
              1124,
              1135,
              1156,
              1160,
              1163,
              1172,
              1173,
              1191,
              1199,
              1201,
              1205,
              1214,
              1222,
              1230,
              1238,
              1243,
              1256,
              1271,
              1280,
              1289,
              1323,
              1330,
              1331,
              1336,
              1359,
              1361,
              1370,
              1377,
              1389,
              1390,
              1391,
              1409,
     

In [341]:
Cracker_1(G_fb)

{0}


In [403]:
os.path.getsize('graph_datasets/soc-sign-bitcoinalpha.csv')

503131

In [404]:
G_btc = Graph()
for line in open('graph_datasets/soc-sign-bitcoinalpha.csv',"r"):
    edge = line.strip().split(",")
    #print(edge)
    G_btc.addEdge(int(edge[0]), int(edge[1]))

In [406]:
G_btc.graph

defaultdict(set,
            {7188: {1},
             1: {2,
              4,
              9,
              10,
              11,
              15,
              17,
              18,
              20,
              22,
              29,
              30,
              35,
              37,
              38,
              42,
              44,
              57,
              58,
              67,
              68,
              71,
              72,
              75,
              76,
              87,
              89,
              90,
              95,
              96,
              112,
              113,
              115,
              116,
              117,
              118,
              121,
              123,
              124,
              126,
              142,
              146,
              151,
              152,
              154,
              155,
              156,
              158,
              160,
              161,
              174,
              175,
 

In [405]:
Cracker_1(G_btc)

{1, 1870, 1389, 3228, 5837}


# Map Reduce Paradigm

In [381]:
G_btc_1 = Graph()
for line in open('graph_datasets/btc_01.txt',"r"):
    edge = line.strip().split(",")
    #print(edge)
    G_btc_1.addEdge(int(edge[0]), int(edge[1]))

In [382]:
#print(G_btc_1.graph)

In [383]:
G_btc_2 = Graph()
for line in open('graph_datasets/btc_02.txt',"r"):
    edge = line.strip().split(",")
    #print(edge)
    G_btc_2.addEdge(int(edge[0]), int(edge[1]))

In [384]:
#print(G_btc_2.graph)

In [385]:
def Cracker_Mapper(G):
    tree = Tree()
    t = 0
    #print("G1:", G.graph)
    while(len(G.graph)) > 0:
        t += 1
        H = G.Min_Selection_Step()
        #print("H{t}:".format(t = t), H.graph)
        G = H.Pruning_Step(tree)
        #print("Tree seed:", tree.seed)
        #print("G{t}:".format(t = t+1), G.graph)
        
    return(tree)

In [386]:
tree_big = Cracker_Mapper(G_btc)
tree_big.seed

{1, 1389, 1870, 3228, 5837}

In [387]:
tree_big.tree

defaultdict(set,
            {1: {2,
              3,
              4,
              5,
              6,
              7,
              8,
              9,
              10,
              11,
              12,
              13,
              14,
              15,
              16,
              17,
              18,
              19,
              20,
              21,
              22,
              23,
              24,
              25,
              26,
              27,
              28,
              29,
              30,
              31,
              32,
              33,
              34,
              35,
              36,
              37,
              38,
              39,
              40,
              41,
              42,
              43,
              44,
              45,
              46,
              47,
              48,
              49,
              50,
              51,
              52,
              53,
              54,
              55,
              56

In [388]:
tree_1 = Cracker_Mapper(G_btc_1)

In [389]:
tree_2 = Cracker_Mapper(G_btc_2)

In [390]:
tree_1.tree

defaultdict(set,
            {1: {2,
              3,
              4,
              5,
              6,
              7,
              8,
              9,
              10,
              11,
              12,
              13,
              14,
              15,
              16,
              17,
              18,
              19,
              20,
              21,
              22,
              23,
              24,
              25,
              26,
              27,
              28,
              29,
              30,
              31,
              32,
              33,
              34,
              35,
              36,
              37,
              38,
              39,
              40,
              41,
              42,
              43,
              44,
              45,
              46,
              47,
              48,
              49,
              50,
              51,
              52,
              53,
              54,
              55,
              56

In [391]:
tree_2.seed

{97,
 527,
 760,
 836,
 868,
 932,
 1010,
 1017,
 1034,
 1090,
 1175,
 1204,
 1232,
 1302,
 1345,
 1389,
 1476,
 1482,
 1490,
 1636,
 1704,
 1764,
 1866,
 1870,
 1873,
 1929,
 2078,
 2117,
 2151,
 2155,
 2234,
 2356,
 2794,
 3186,
 3228,
 5837}

In [399]:
def Cracker_reducer(T1, T2):
    allSeedT1 = T1.seed.copy()
    allSeedT2 = T2.seed.copy()
    #this function merges two trees and return the seeds
    for seedT2 in T2.seed:
        
        set2 = set()
        for seedT2_leaves in {seedT2} | T2.tree[seedT2]:
            set2 |= {T1.getSeed(seedT2_leaves)}
            print(set2)
        #print({min(set2)})
        allSeedT1 |= {min(set2)}
    e
    for seedT1 in T1.seed:
        set1 = set()
        for seedT1_leaves in {seedT1} | T1.tree[seedT2]:
            set1 |= {T2.getSeed(seedT1_leaves)}
        allSeedT2 |= {min(set1)}
    
    return(allSeedT1 & allSeedT2)

In [None]:
tree

In [400]:
Cracker_reducer(tree_1, tree_2)

{1}
{1}
{None}
{None}
{None}
{6157}
{6157, None}


TypeError: '<' not supported between instances of 'NoneType' and 'int'

In [394]:
tree_1.getSeed(836)

836

In [395]:
tree_big.getSeed(836)

1

In [396]:
tree_2.tree[836]

{3179}