In [1]:
class Queue():
    '''
    FIFO Queue
    '''
    def __init__(self, input_list=[]):
        self.list = input_list
        
    def push(self, el):
        self.list.append(el)
        
    def pop(self):
        a = self.list.pop(0)
        return a
    
    def is_empty(self):
        return len(self.list) == 0

In [2]:
class Stack():
    '''
    LIFO Stack
    '''
    def __init__(self, input_list=[]):
        self.list = input_list
        
    def push(self, el):
        self.list.append(el)
        
    def pop(self):
        a = self.list.pop()
        return a
    
    def is_empty(self):
        return len(self.list) == 0

In [3]:
import random
from copy import deepcopy

class Graph():
    
    def __init__(self, graph):
        self.graph = graph
        
    def list_of_nodes(self):
        return list(self.graph.keys())

    def choose_random_node(self):
        return random.choice(self.list_of_nodes())
        
    def choose_node_not_explored(self, graph, explored):
        nodes = self.list_of_nodes()
        nodes_copy = deepcopy(nodes)
        for node in explored:
            try:
                nodes_copy.remove(node) # Need try-remove statement
            except:
                pass
        if len(nodes_copy) == 0:
            return None
        return random.choice(nodes_copy)
        
    def bfs(self, start):
        '''
        Idea:
        Start at a node:
        Add it to a queue of nodes to be explored
        Look at all of its connections
        If they haven't been explored:
        1. Add them to an "explored" list
        2. Add them to the "to explore" queue
        Keep going until there is nothing left to explore!
        '''
        explored = []
        to_explore = Queue()
        
        to_explore.push(start)
        explored.append(start)
        
        while not to_explore.is_empty():
            el = to_explore.pop()
            new_nodes = self.graph.get(el, None)
            for node in new_nodes:
                if node not in explored:
                    explored.append(node)
                    to_explore.push(node)
        
        return explored
    
    def dfs(self, start, explored=[], new_explored=[]):
        '''
        Idea:
        Start at a node:
        Add it to a queue of nodes to be explored
        Look at all of its connections
        If they haven't been explored:
        1. Add them to an "explored" list
        2. Add them to the "to explore" queue
        Keep going until there is nothing left to explore!
        '''
        to_explore = Stack()
        to_explore.push(start)
        if start not in new_explored: 
            new_explored.append(start)
        new_nodes = self.graph.get(start, [])
        
        for node in new_nodes:
            if node not in new_explored + explored:
                self.dfs(node, explored, new_explored)

        return new_explored
    
    
                
    def shortest_path(self, node1, node2):
        '''
        Do the same thing as breadth-first search, except for each 
        '''
        explored = []
        to_explore = Queue()
        distances = {}
        
        depth = 0
        to_explore.push(node1)
        explored.append(node1)
        distances[node1] = depth
        
        while not to_explore.is_empty():
            el = to_explore.pop()
            new_nodes = self.graph.get(el, None)
            for node in new_nodes:
                distances[node] = distances[el] + 1
                if node == node2:
                    return distances[node]
                if node not in explored:
                    to_explore.push(node)
        
        return None
    
    def num_connections(self):
        '''
        TODO
        '''
        # Choose starting node
        start = self.choose_random_node()
        connections = 0
        
        nodes = self.bfs(start)
        connections += 1
        
        while self.choose_node_not_explored(graph, nodes):
            temp = self.choose_node_not_explored(graph, nodes)
            new_explored = self.bfs(temp)
            nodes = nodes + new_explored
            connections += 1
            
        return connections
    
    def reverse_graph(self):
        new_graph = {}
        graph_keys = list(self.graph.keys())
        for node in graph_keys:
            new_graph[node] = []
            for node2 in graph_keys: 
                if node in self.graph[node2]:
                    new_graph[node].append(node2)
        return new_graph
            
    def finishing_time(self):
        '''
        TODO
        '''
        # Choose starting node
        time = 0
        finishing_time = {}
        num_nodes = len(self.list_of_nodes())
        explored = []
        
        for i in list(range(num_nodes, 0, -1)):
            if i not in explored:
                explored_temp = self.dfs(i, explored=explored, new_explored=[])
                for node in explored_temp[::-1]:
                    time += 1
                    finishing_time[node] = time
                explored = explored + explored_temp
        return finishing_time


    def directed_graph_sccs(self):
        '''
        TODO
        '''
        finishing_time = self.finishing_time()
        reverse_graph = self.reverse_graph()
        new_graph = {}
        for key, value in reverse_graph.items():
            new_graph[finishing_time[key]] = [finishing_time[val] for val in value]
        new_graph = Graph(new_graph)

        num_nodes = len(self.list_of_nodes())
        leaders = {}
        explored = []
        for i in list(range(num_nodes, 0, -1)):
            if i not in explored:
                leader = i
                leaders[i] = []
                explored_temp = new_graph.dfs(i, explored=explored, new_explored=[])
                for node in explored_temp:
                    leaders[i].append(node)
                explored = explored + explored_temp
        return leaders
    
            
            


In [None]:
graph = {}
graph['s'] = ['a', 'b']
graph['a'] = ['s', 'c']
graph['b'] = ['s', 'c', 'd']
graph['c'] = ['a', 'b', 'd', 'e']
graph['d'] = ['b', 'c', 'e']
graph['e'] = ['c', 'd']

g = Graph(graph)
print(g.dfs('a'))
print(g.shortest_path('s', 'e'))
print(g.num_connections())

In [None]:
graph2 = {}
graph2['s'] = ['a', 'b']
graph2['a'] = ['s']
graph2['b'] = ['s']
graph2['c'] = ['d', 'e']
graph2['d'] = ['c']
graph2['e'] = ['c']
g2 = Graph(graph2)
g2.list_of_nodes()
print(g2.dfs('a'))
# print(g2.shortest_path('s', 'e'))
# print(g2.num_connections())

# Counting connected components

In [8]:
graph3 = {}
graph3[1] = [7]
graph3[2] = [5]
graph3[3] = [9]
graph3[4] = [1]
graph3[5] = [8]
graph3[6] = [8, 3]
graph3[7] = [4, 9]
graph3[8] = [2]
graph3[9] = [6]

In [10]:
g3 = Graph(graph3)
print(g3.finishing_time())
g3.directed_graph_sccs()

{3: 1, 5: 2, 2: 3, 8: 4, 6: 5, 9: 6, 1: 7, 4: 8, 7: 9}


{4: [4, 2, 3], 6: [6, 1, 5], 9: [9, 7, 8]}

# Text file

In [4]:
with open("SCC.txt", "r") as f:
    l = [x.strip().split(' ') for x in f.read().split('\n') if x != '']

In [5]:
from collections import defaultdict

big_dict = defaultdict(list)
for line in l:
    big_dict[int(line[0])].append(int(line[1]))

In [6]:
a = sorted(list(range(1, 875714 + 1)))
b = sorted([int(x) for x in dict(big_dict).keys()])
print(len(a))
print(len(b))

875714
739454


In [7]:
g4 = Graph(dict(big_dict))
g4.graph

{1: [1, 2, 5, 6, 7, 3, 8, 4],
 2: [47646, 47647, 13019, 47648, 47649, 47650, 7700, 47651, 47652],
 3: [511596],
 5: [1, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 7, 8],
 6: [1, 20, 12, 13, 21, 22, 23, 24, 25, 7, 8],
 7: [1, 5, 26, 27, 23, 28, 29, 30, 18, 31, 32, 8],
 8: [1, 5, 33, 34, 35, 21, 36, 37, 32, 7],
 9: [71107, 71108, 71109, 21, 71110, 71111, 39350, 71112, 71113],
 10: [1,
  30555,
  5,
  26,
  12,
  13,
  27,
  104816,
  171168,
  23,
  28,
  24,
  17,
  171169,
  29,
  171170,
  171171,
  104784,
  30,
  18,
  19,
  32,
  7,
  8],
 11: [5,
  115323,
  9,
  10,
  171172,
  115331,
  23113,
  76269,
  171173,
  16456,
  21,
  14,
  22,
  131692,
  100299,
  24,
  15,
  73869,
  171174,
  171169,
  115355,
  171175,
  171170,
  171176,
  100619,
  32,
  100308,
  7,
  8,
  171177],
 12: [1, 5, 158265, 171178, 171179, 7, 8],
 13: [1,
  5,
  10,
  171180,
  53195,
  171181,
  104807,
  16,
  171182,
  158254,
  53205,
  171183,
  171184,
  19,
  7,
  8],
 14: [171185],
 15: [104

# Solution

In [5]:
def main():

    # make a graph and its reverse from the given data
    graph = {}
    graphR = {}
    f = open("SCC.txt", 'r')
    line = f.readline()
    while line:
        edge = [int(e) for e in line.split()]
        if edge[0] in graph:
            graph[edge[0]].append(edge[1])
        else:
            graph[edge[0]] = [edge[1]]
        if edge[1] in graphR:
            graphR[edge[1]].append(edge[0])
        else:
            graphR[edge[1]] = [edge[0]]
        line = f.readline()

    # run depth-first search on graph reverse
    node_post = {}
    post_node = {}
    post = 1
    for node in graphR:
        if not (node in node_post):
            prev_dict, prev_index, prev_set = {}, 1, set([node])
            prev_dict[prev_index] = node
            try:
                nodes = graphR[node][:]
            except:
                nodes = []
            next_dict, next_index, next_set = {}, 0, set()
            for n in nodes:
                if not ((n in prev_set) or (n in next_set) or
                        (n in node_post)):
                    next_index += 1
                    next_dict[next_index] = n
                    next_set.add(n)
            while next_dict:
                if next_dict[next_index] in prev_set:
                    next_set.remove(next_dict[next_index])
                    del next_dict[next_index]
                    next_index = len(next_dict)
                else:
                    prev_set.add(next_dict[next_index])
                    prev_index += 1
                    prev_dict[prev_index] = next_dict[next_index]
                    try:
                        nodes = graphR[next_dict[next_index]][:]
                    except:
                        nodes = []
                    next_set.remove(next_dict[next_index])
                    del next_dict[next_index]
                    next_index = len(next_dict)
                    for n in nodes:
                        if not ((n in prev_set) or (n in next_set) or
                                (n in node_post)):
                            next_index += 1
                            next_dict[next_index] = n
                            next_set.add(n)
                    next_index = len(next_dict)
            post = post + len(prev_dict)
            for element in prev_dict:
                if not prev_dict[element] in node_post:
                    node_post[prev_dict[element]] = post - element
                    post_node[post - element] = prev_dict[element]

    # run DFS on graph (by reverse postorder)
    reverse_postorder = list(post_node.values())[::-1]
    marked = set()
    results = []
    order = 0
    lenght = len(reverse_postorder)
    while order < lenght:
        node = reverse_postorder[order]
        if (not (node in marked)):
            prev_dict, prev_index, prev_set = {}, 1, set([node])
            prev_dict[prev_index] = node
            try:
                nodes = graph[node][:]
            except:
                nodes = []
            next_dict, next_index, next_set = {}, 0, set()
            for n in nodes:
                if not ((n in prev_set) or (n in next_set) or (n in marked)):
                    next_index += 1
                    next_dict[next_index] = n
                    next_set.add(n)
            while next_dict:
                if next_dict[next_index] in prev_set:
                    next_set.remove(next_dict[next_index])
                    del next_dict[next_index]
                    next_index = len(next_dict)
                else:
                    prev_set.add(next_dict[next_index])
                    prev_index += 1
                    prev_dict[prev_index] = next_dict[next_index]
                    try:
                        nodes = graph[next_dict[next_index]][:]
                    except:
                        nodes = []
                    next_set.remove(next_dict[next_index])
                    del next_dict[next_index]
                    next_index = len(next_dict)
                    for n in nodes:
                        if not ((n in prev_set) or (n in next_set) or
                                (n in marked)):
                            next_index += 1
                            next_dict[next_index] = n
                            next_set.add(n)
                    next_index = len(next_dict)
            # add the size of the previous strongly connected component
            results.append(len(prev_dict))
        for element in prev_dict:
            marked.add(prev_dict[element])
        order += 1

    # output the sizes of the 5 largest SCCs in the given graph
    solution = ','.join(str(x) for x in sorted(results + [0] * 5,
                                               reverse=True)[:5])
    print(solution)
    return solution


In [6]:
main()

434821,968,459,314,211


'434821,968,459,314,211'