In [1]:
class Queue():
    '''
    FIFO Queue
    '''
    def __init__(self, input_list=[]):
        self.list = input_list
        
    def push(self, el):
        self.list.append(el)
        
    def pop(self):
        a = self.list.pop(0)
        return a
    
    def is_empty(self):
        return len(self.list) == 0

In [2]:
class Stack():
    '''
    LIFO Stack
    '''
    def __init__(self, input_list=[]):
        self.list = input_list
        
    def push(self, el):
        self.list.append(el)
        
    def pop(self):
        a = self.list.pop()
        return a
    
    def is_empty(self):
        return len(self.list) == 0

In [3]:
import random
from copy import deepcopy

class Graph():
    
    def __init__(self, graph):
        self.graph = graph
        
    def list_of_nodes(self):
        return list(self.graph.keys())

    def choose_random_node(self):
        return random.choice(self.list_of_nodes())
        
    def choose_node_not_explored(self, graph, explored):
        nodes = self.list_of_nodes()
        nodes_copy = deepcopy(nodes)
        for node in explored:
            try:
                nodes_copy.remove(node) # Need try-remove statement
            except:
                pass
        if len(nodes_copy) == 0:
            return None
        return random.choice(nodes_copy)
        
    def bfs(self, start):
        '''
        Idea:
        Start at a node:
        Add it to a queue of nodes to be explored
        Look at all of its connections
        If they haven't been explored:
        1. Add them to an "explored" list
        2. Add them to the "to explore" queue
        Keep going until there is nothing left to explore!
        '''
        explored = []
        to_explore = Queue()
        
        to_explore.push(start)
        explored.append(start)
        
        while not to_explore.is_empty():
            el = to_explore.pop()
            new_nodes = self.graph.get(el, None)
            for node in new_nodes:
                if node not in explored:
                    explored.append(node)
                    to_explore.push(node)
        
        return explored
    
    def dfs(self, start, explored=[], new_explored=[]):
        '''
        Idea:
        Start at a node:
        Add it to a queue of nodes to be explored
        Look at all of its connections
        If they haven't been explored:
        1. Add them to an "explored" list
        2. Add them to the "to explore" queue
        Keep going until there is nothing left to explore!
        '''
        to_explore = Stack()
        to_explore.push(start)
        if start not in new_explored: 
            new_explored.append(start)
        new_nodes = self.graph.get(start, [])
        
        for node in new_nodes:
            if node not in new_explored + explored:
                self.dfs(node, explored, new_explored)

        return new_explored
    
    
                
    def shortest_path(self, node1, node2):
        '''
        Do the same thing as breadth-first search, except for each 
        '''
        explored = []
        to_explore = Queue()
        distances = {}
        
        depth = 0
        to_explore.push(node1)
        explored.append(node1)
        distances[node1] = depth
        
        while not to_explore.is_empty():
            el = to_explore.pop()
            new_nodes = self.graph.get(el, None)
            for node in new_nodes:
                distances[node] = distances[el] + 1
                if node == node2:
                    return distances[node]
                if node not in explored:
                    to_explore.push(node)
        
        return None
    
    def num_connections(self):
        '''
        TODO
        '''
        # Choose starting node
        start = self.choose_random_node()
        connections = 0
        
        nodes = self.bfs(start)
        connections += 1
        
        while self.choose_node_not_explored(graph, nodes):
            temp = self.choose_node_not_explored(graph, nodes)
            new_explored = self.bfs(temp)
            nodes = nodes + new_explored
            connections += 1
            
        return connections
    
    def reverse_graph(self):
        new_graph = {}
        graph_keys = list(self.graph.keys())
        for node in graph_keys:
            new_graph[node] = []
            for node2 in graph_keys: 
                if node in self.graph[node2]:
                    new_graph[node].append(node2)
        return new_graph
            
    def finishing_time(self):
        '''
        TODO
        '''
        # Choose starting node
        time = 0
        finishing_time = {}
        num_nodes = len(self.list_of_nodes())
        explored = []
        
        for i in list(range(num_nodes, 0, -1)):
            if i not in explored:
                explored_temp = self.dfs(i, explored=explored, new_explored=[])
                for node in explored_temp[::-1]:
                    time += 1
                    finishing_time[node] = time
                explored = explored + explored_temp
        return finishing_time


    def directed_graph_sccs(self):
        '''
        TODO
        '''
        finishing_time = self.finishing_time()
        reverse_graph = self.reverse_graph()
        new_graph = {}
        for key, value in reverse_graph.items():
            new_graph[finishing_time[key]] = [finishing_time[val] for val in value]
        new_graph = Graph(new_graph)

        num_nodes = len(self.list_of_nodes())
        leaders = {}
        explored = []
        for i in list(range(num_nodes, 0, -1)):
            if i not in explored:
                leader = i
                leaders[i] = []
                explored_temp = new_graph.dfs(i, explored=explored, new_explored=[])
                for node in explored_temp:
                    leaders[i].append(node)
                explored = explored + explored_temp
        return leaders
    
            
            


In [None]:
graph = {}
graph['s'] = ['a', 'b']
graph['a'] = ['s', 'c']
graph['b'] = ['s', 'c', 'd']
graph['c'] = ['a', 'b', 'd', 'e']
graph['d'] = ['b', 'c', 'e']
graph['e'] = ['c', 'd']

g = Graph(graph)
print(g.dfs('a'))
print(g.shortest_path('s', 'e'))
print(g.num_connections())

In [None]:
graph2 = {}
graph2['s'] = ['a', 'b']
graph2['a'] = ['s']
graph2['b'] = ['s']
graph2['c'] = ['d', 'e']
graph2['d'] = ['c']
graph2['e'] = ['c']
g2 = Graph(graph2)
g2.list_of_nodes()
print(g2.dfs('a'))
# print(g2.shortest_path('s', 'e'))
# print(g2.num_connections())

# Counting connected components

In [8]:
graph3 = {}
graph3[1] = [7]
graph3[2] = [5]
graph3[3] = [9]
graph3[4] = [1]
graph3[5] = [8]
graph3[6] = [8, 3]
graph3[7] = [4, 9]
graph3[8] = [2]
graph3[9] = [6]

In [10]:
g3 = Graph(graph3)
print(g3.finishing_time())
g3.directed_graph_sccs()

{3: 1, 5: 2, 2: 3, 8: 4, 6: 5, 9: 6, 1: 7, 4: 8, 7: 9}


{4: [4, 2, 3], 6: [6, 1, 5], 9: [9, 7, 8]}

# Text file

In [4]:
with open("SCC.txt", "r") as f:
    l = [x.strip().split(' ') for x in f.read().split('\n') if x != '']

In [5]:
from collections import defaultdict

big_dict = defaultdict(list)
for line in l:
    big_dict[int(line[0])].append(int(line[1]))

In [6]:
a = sorted(list(range(1, 875714 + 1)))
b = sorted([int(x) for x in dict(big_dict).keys()])
print(len(a))
print(len(b))

875714
739454


In [7]:
g4 = Graph(dict(big_dict))
g4.graph

{1: [1, 2, 5, 6, 7, 3, 8, 4],
 2: [47646, 47647, 13019, 47648, 47649, 47650, 7700, 47651, 47652],
 3: [511596],
 5: [1, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 7, 8],
 6: [1, 20, 12, 13, 21, 22, 23, 24, 25, 7, 8],
 7: [1, 5, 26, 27, 23, 28, 29, 30, 18, 31, 32, 8],
 8: [1, 5, 33, 34, 35, 21, 36, 37, 32, 7],
 9: [71107, 71108, 71109, 21, 71110, 71111, 39350, 71112, 71113],
 10: [1,
  30555,
  5,
  26,
  12,
  13,
  27,
  104816,
  171168,
  23,
  28,
  24,
  17,
  171169,
  29,
  171170,
  171171,
  104784,
  30,
  18,
  19,
  32,
  7,
  8],
 11: [5,
  115323,
  9,
  10,
  171172,
  115331,
  23113,
  76269,
  171173,
  16456,
  21,
  14,
  22,
  131692,
  100299,
  24,
  15,
  73869,
  171174,
  171169,
  115355,
  171175,
  171170,
  171176,
  100619,
  32,
  100308,
  7,
  8,
  171177],
 12: [1, 5, 158265, 171178, 171179, 7, 8],
 13: [1,
  5,
  10,
  171180,
  53195,
  171181,
  104807,
  16,
  171182,
  158254,
  53205,
  171183,
  171184,
  19,
  7,
  8],
 14: [171185],
 15: [104

# Solution

In [13]:
class SccFinder(object):
    def __init__(self, input_file):
        self.scc_list = []
        with open(input_file) as file:
            self.finish_order = []
            self._graph = {}
            for line in file:
                (from_v, to_v) = tuple(number for number in line.split())
                self._add_edge_to_graph(int(from_v), int(to_v))

    def _add_edge_to_graph(self, from_v, to_v):
        if from_v in self._graph:
            self._graph[from_v].append(to_v)
        else:
            self._graph[from_v] = [to_v]
        if to_v in self._graph:
            self._graph[to_v].append(-from_v)
        else:
            self._graph[to_v] = [-from_v]

    def compute_finish_times(self):
        visited_nodes, finished_nodes = set(), set()
        for vertex in self._graph.keys():
            if vertex in visited_nodes:
                continue
            nodes_stack = [vertex]
            while nodes_stack:
                node = nodes_stack.pop()
                if node not in visited_nodes:
                    visited_nodes.add(node)
                    nodes_stack.append(node)
                    neighbors = (-edge for edge in self._graph[node] if edge < 0)
                    for neighbor in neighbors:
                        if neighbor not in visited_nodes:
                            nodes_stack.append(neighbor)
                else:
                    if node not in finished_nodes:
                        self.finish_order.append(node)
                        finished_nodes.add(node)

    def compute_sccs(self):
        visited_nodes = set()
        assert (len(self.finish_order) == len(self._graph))
        for i in reversed(self.finish_order):
            if i in visited_nodes:
                continue
            nodes_stack = [i]
            size = 0
            while nodes_stack:
                node = nodes_stack.pop()
                if node not in visited_nodes:
                    size += 1
                    visited_nodes.add(node)
                    nodes_stack.append(node)
                    neighbors = (edge for edge in self._graph[node] if edge > 0)
                    for neighbor in neighbors:
                        if neighbor not in visited_nodes:
                            nodes_stack.append(neighbor)
            self.scc_list.append(size)
        self.scc_list.sort(reverse=True)
        print(self.scc_list[:5])

scc_finder = SccFinder("SCC.txt")
scc_finder.compute_finish_times()
scc_finder.compute_sccs()
# expected_sccs = [434821, 968, 459, 313, 211]
# print(scc_finder.scc_list[:5])


[434821, 968, 459, 313, 211]


In [5]:
with open('SCC.txt') as file:
    finish_order = []
    _graph = {}
    for line in file:
        (from_v, to_v) = tuple(number for number in line.split())
        print(tuple(number for number in line.split()))

def _add_edge_to_graph(_graph, from_v, to_v):
    if from_v in _graph:
        _graph[from_v].append(to_v)
    else:
        _graph[from_v] = [to_v]
    if to_v in _graph:
        _graph[to_v].append(-from_v)
    else:
        _graph[to_v] = [-from_v]

('1', '1')
('1', '2')
('1', '5')
('1', '6')
('1', '7')
('1', '3')
('1', '8')
('1', '4')
('2', '47646')
('2', '47647')
('2', '13019')
('2', '47648')
('2', '47649')
('2', '47650')
('2', '7700')
('2', '47651')
('2', '47652')
('3', '511596')
('5', '1')
('5', '9')
('5', '10')
('5', '11')
('5', '12')
('5', '13')
('5', '14')
('5', '15')
('5', '16')
('5', '17')
('5', '18')
('5', '19')
('5', '7')
('5', '8')
('6', '1')
('6', '20')
('6', '12')
('6', '13')
('6', '21')
('6', '22')
('6', '23')
('6', '24')
('6', '25')
('6', '7')
('6', '8')
('7', '1')
('7', '5')
('7', '26')
('7', '27')
('7', '23')
('7', '28')
('7', '29')
('7', '30')
('7', '18')
('7', '31')
('7', '32')
('7', '8')
('8', '1')
('8', '5')
('8', '33')
('8', '34')
('8', '35')
('8', '21')
('8', '36')
('8', '37')
('8', '32')
('8', '7')
('9', '71107')
('9', '71108')
('9', '71109')
('9', '21')
('9', '71110')
('9', '71111')
('9', '39350')
('9', '71112')
('9', '71113')
('10', '1')
('10', '30555')
('10', '5')
('10', '26')
('10', '12')
('10', '13')


('157', '112')
('157', '113')
('157', '114')
('157', '115')
('157', '116')
('157', '117')
('157', '118')
('157', '119')
('157', '120')
('157', '121')
('158', '110')
('158', '111')
('158', '112')
('158', '113')
('158', '114')
('158', '115')
('158', '116')
('158', '117')
('158', '118')
('158', '119')
('158', '120')
('158', '121')
('159', '110')
('159', '111')
('159', '112')
('159', '113')
('159', '114')
('159', '115')
('159', '116')
('159', '117')
('159', '118')
('159', '119')
('159', '120')
('159', '121')
('160', '110')
('160', '111')
('160', '112')
('160', '113')
('160', '114')
('160', '115')
('160', '116')
('160', '117')
('160', '118')
('160', '119')
('160', '120')
('160', '121')
('161', '110')
('161', '111')
('161', '112')
('161', '113')
('161', '114')
('161', '115')
('161', '116')
('161', '117')
('161', '118')
('161', '119')
('161', '120')
('161', '121')
('162', '163')
('162', '164')
('162', '165')
('162', '166')
('162', '167')
('162', '168')
('162', '169')
('163', '162')
('163', '1

('284', '269')
('284', '271')
('284', '275')
('285', '260')
('285', '261')
('285', '266')
('285', '268')
('285', '269')
('285', '271')
('285', '275')
('286', '260')
('286', '261')
('286', '266')
('286', '268')
('286', '269')
('286', '271')
('286', '275')
('287', '207263')
('287', '173531')
('287', '266')
('287', '207264')
('287', '207265')
('287', '207266')
('288', '289')
('288', '290')
('288', '291')
('288', '292')
('288', '293')
('288', '294')
('288', '296')
('288', '298')
('288', '17906')
('288', '266')
('288', '300')
('288', '17910')
('288', '301')
('288', '130929')
('288', '302')
('288', '91382')
('288', '130940')
('288', '14832')
('288', '17886')
('288', '303')
('288', '304')
('288', '305')
('288', '306')
('288', '130931')
('288', '308')
('288', '309')
('288', '32701')
('288', '311')
('288', '312')
('288', '314')
('288', '315')
('289', '288')
('289', '192204')
('289', '121293')
('289', '36511')
('289', '192205')
('289', '290')
('289', '291')
('289', '292')
('289', '293')
('289', 

('424', '398')
('424', '424362')
('424', '399')
('424', '366943')
('424', '400')
('424', '558519')
('424', '401')
('424', '402')
('424', '685018')
('424', '503604')
('424', '404')
('424', '405')
('424', '732876')
('424', '406')
('424', '732877')
('424', '732878')
('424', '732879')
('425', '398')
('425', '399')
('425', '400')
('425', '401')
('425', '402')
('425', '404')
('425', '19406')
('425', '405')
('425', '88762')
('425', '406')
('426', '398')
('426', '399')
('426', '400')
('426', '401')
('426', '402')
('426', '404')
('426', '405')
('426', '406')
('427', '398')
('427', '399')
('427', '400')
('427', '401')
('427', '402')
('427', '404')
('427', '405')
('427', '406')
('427', '749995')
('428', '398')
('428', '399')
('428', '400')
('428', '401')
('428', '402')
('428', '404')
('428', '405')
('428', '406')
('428', '465506')
('429', '398')
('429', '399')
('429', '400')
('429', '401')
('429', '402')
('429', '404')
('429', '405')
('429', '406')
('430', '398')
('430', '399')
('430', '400')
('4

('581', '541569')
('581', '541570')
('581', '567')
('581', '568')
('581', '541571')
('582', '553')
('582', '554')
('582', '555')
('582', '633')
('582', '606')
('582', '541572')
('582', '541573')
('582', '559')
('582', '560')
('582', '561')
('582', '541574')
('582', '562')
('582', '563')
('582', '564')
('582', '566')
('582', '567')
('582', '568')
('583', '553')
('583', '435246')
('583', '554')
('583', '555')
('583', '633')
('583', '606')
('583', '435247')
('583', '575')
('583', '435248')
('583', '435249')
('583', '559')
('583', '560')
('583', '561')
('583', '562')
('583', '563')
('583', '564')
('583', '566')
('583', '567')
('583', '568')
('584', '560')
('585', '555')
('585', '596080')
('585', '647222')
('585', '45987')
('585', '101697')
('585', '560')
('585', '597')
('585', '45989')
('585', '333865')
('586', '42649')
('586', '624366')
('586', '17090')
('586', '578')
('586', '580')
('586', '279632')
('586', '77465')
('586', '560')
('586', '190572')
('586', '581')
('586', '170324')
('586'

('742', '741')
('742', '743')
('742', '744')
('742', '481290')
('742', '367927')
('743', '733')
('743', '398344')
('743', '735')
('743', '157669')
('743', '481287')
('743', '736')
('743', '481288')
('743', '166')
('743', '481281')
('743', '737')
('743', '438417')
('743', '157670')
('743', '481275')
('743', '481276')
('743', '481291')
('743', '481277')
('743', '481292')
('743', '481282')
('743', '481283')
('743', '481293')
('743', '481278')
('743', '738')
('743', '481294')
('743', '481295')
('743', '481296')
('743', '739')
('743', '188')
('743', '481284')
('743', '481289')
('743', '438418')
('743', '481297')
('743', '438419')
('743', '481285')
('743', '481298')
('743', '157671')
('743', '481299')
('743', '740')
('743', '481300')
('743', '741')
('743', '481301')
('743', '742')
('743', '218661')
('743', '744')
('743', '481286')
('743', '481290')
('743', '481279')
('743', '367927')
('743', '455445')
('744', '735')
('744', '736')
('744', '166')
('744', '737')
('744', '738')
('744', '739')
(

('913', '914')
('913', '915')
('913', '21203')
('913', '21186')
('913', '21204')
('913', '21205')
('913', '916')
('913', '918')
('913', '21206')
('913', '919')
('913', '920')
('913', '921')
('913', '922')
('913', '923')
('913', '924')
('914', '910')
('914', '911')
('914', '21207')
('914', '912')
('914', '913')
('914', '21208')
('914', '915')
('914', '21209')
('914', '21186')
('914', '916')
('914', '918')
('914', '919')
('914', '920')
('914', '921')
('914', '21210')
('914', '922')
('914', '923')
('914', '924')
('915', '21185')
('915', '21211')
('915', '21212')
('915', '21213')
('915', '21187')
('915', '21188')
('915', '21195')
('915', '21190')
('915', '904')
('915', '910')
('915', '21214')
('915', '21196')
('915', '911')
('915', '21215')
('915', '21216')
('915', '21217')
('915', '21191')
('915', '912')
('915', '21218')
('915', '21219')
('915', '21220')
('915', '21221')
('915', '913')
('915', '21222')
('915', '21223')
('915', '914')
('915', '21224')
('915', '21208')
('915', '21225')
('91

('1083', '391629')
('1083', '391630')
('1083', '391631')
('1083', '254498')
('1083', '391632')
('1083', '254499')
('1083', '254500')
('1083', '254501')
('1083', '254502')
('1083', '254503')
('1083', '391633')
('1083', '391634')
('1083', '254504')
('1083', '391635')
('1083', '254505')
('1084', '986')
('1084', '21194')
('1084', '426236')
('1084', '907')
('1084', '426237')
('1084', '426238')
('1084', '426239')
('1085', '21194')
('1085', '907')
('1086', '30027')
('1086', '30075')
('1086', '30076')
('1086', '30036')
('1086', '30077')
('1086', '30028')
('1086', '907')
('1086', '30029')
('1086', '30030')
('1086', '30031')
('1086', '30078')
('1086', '30033')
('1086', '30034')
('1086', '30035')
('1087', '188486')
('1087', '985')
('1087', '188487')
('1087', '188488')
('1087', '188489')
('1087', '30282')
('1087', '188491')
('1087', '907')
('1087', '188494')
('1087', '188495')
('1087', '188496')
('1087', '188497')
('1087', '188500')
('1087', '188498')
('1090', '639247')
('1091', '261900')
('1091',

('1239', '1219')
('1239', '1220')
('1239', '1221')
('1239', '1222')
('1239', '1223')
('1239', '1224')
('1239', '1225')
('1239', '1226')
('1239', '1240')
('1239', '1241')
('1239', '1227')
('1239', '1228')
('1239', '1242')
('1239', '1229')
('1240', '1239')
('1240', '1219')
('1240', '1220')
('1240', '1221')
('1240', '1222')
('1240', '1223')
('1240', '1224')
('1240', '1225')
('1240', '1226')
('1240', '1241')
('1240', '1227')
('1240', '1228')
('1240', '1242')
('1240', '1229')
('1241', '1239')
('1241', '1219')
('1241', '1220')
('1241', '1221')
('1241', '1222')
('1241', '1223')
('1241', '1224')
('1241', '1225')
('1241', '1226')
('1241', '1240')
('1241', '1227')
('1241', '1228')
('1241', '1242')
('1241', '1229')
('1242', '1239')
('1242', '1219')
('1242', '1220')
('1242', '1221')
('1242', '1222')
('1242', '1223')
('1242', '1224')
('1242', '1225')
('1242', '1226')
('1242', '1240')
('1242', '1231')
('1242', '1241')
('1242', '1232')
('1242', '1227')
('1242', '1228')
('1242', '1229')
('1243', '1219

KeyboardInterrupt: 

In [12]:
def _add_edge_to_graph(_graph, from_v, to_v):
    if from_v in _graph:
        _graph[from_v].append(to_v)
    else:
        _graph[from_v] = [to_v]
    if to_v in _graph:
        _graph[to_v].append(-from_v)
    else:
        _graph[to_v] = [-from_v]
        
    return _graph
        
graph = {}
_add_edge_to_graph(graph, 1, 1)
_add_edge_to_graph(graph, 1, 2)
_add_edge_to_graph(graph, 1, 3)
_add_edge_to_graph(graph, 5, 1)

{1: [1, -1, 2, 3, -5], 2: [-1], 3: [-1], 5: [1]}

In [6]:
main()

434821,968,459,314,211


'434821,968,459,314,211'