### Kosaraju's two-pass algorithm

1. rescurse all arcs in the graph
2. run DFS -loop on $G^{rev}$ -- compute 'magical ordering' of the notes \
let f(v) = 'finishing time' of each v $\in$ G^{rev}
3. run DFS -loop on G -- discover the SCCs

## read in key, value pair

In [6]:
from collections import defaultdict
adj_lst = defaultdict(list)
with open('scc_short.txt') as f:
    lines = f.readlines()
    
for line in lines:
    ll = line.strip().split()
    adj_lst[int(ll[0])].append(int(ll[1]))

## form adjacent list

In [182]:
adj_lst

defaultdict(list,
            {7: [1],
             4: [7],
             1: [4],
             9: [7, 3],
             6: [9],
             8: [6, 5],
             2: [8],
             5: [2],
             3: [6]})

## reverse the graph    

In [7]:
class Graph:
    
    def __init__(self, adj_lst):        
        self.graph = adj_lst  
    
    def addEdge(self, vertex1, vertex2): # add an edge from vertex1 to vertex2
        self.graph[vertex1].append(vertex2)
    
    def reverseGraph(self):        
        inverseG = Graph(defaultdict(list))
        
        for i in self.graph:
            for j in self.graph[i]:
                inverseG.addEdge(j, i)
        return inverseG
## first DFS to computing finishing time


    def finishingTimeStack(self, vertex, visited, stack):
        '''
        visited: a list to record visited vertices
        vertex: current vertex 
        stack: push vertex to stack as DFS proceeding
        '''
        
        visited[vertex] = True
        for i in self.graph[vertex]:
            if visited[i] == False:
                self.finishingTimeStack(i, visited, stack)
        stack.append(vertex)
        # print(stack)
        
    def getOneSCC(self, vertex, visited, scc):
        scc.append(vertex)
        visited[vertex] = True
        
        for v in self.graph[vertex]:
            if visited[v] == False:
                self.getOneSCC(v, visited, scc)
       
    def computeSCCs(self):
        
        stack = [] # order of stack is the finishing time
        
        ### First DFS: compute the finishing time
        visited = defaultdict(bool) # initialized all notes as unvisited
        
        for i in self.graph.keys(): # use outer loop to ganrantee every note will be visited
            if visited[i] == False:
                self.finishingTimeStack(i, visited, stack)
        
        ### Compute a inverse graph
        inverG = self.reverseGraph()
        
        ### Second DFS: compute the SCCs
        SCC_lst = []
        visited =  defaultdict(bool) # initialized all notes as unvisited 
        while stack:
            i = stack.pop()
            if visited[i] == False:
                scc = []
                inverG.getOneSCC(i, visited, scc)
                SCC_lst.append(scc)
        return SCC_lst
    

In [8]:
oriG = Graph(adj_lst)
invG = oriG.reverseGraph()

In [9]:
scclist = invG.computeSCCs()

## the large exercise

In [19]:
from collections import defaultdict
adj_lst = defaultdict(list)
with open('SCC.txt') as f:
    lines = f.readlines()
    
for line in lines:
    ll = line.strip().split()
    adj_lst[int(ll[0])].append(int(ll[1]))

In [10]:
    
oriG = Graph(adj_lst)
invG = oriG.reverseGraph()

In [10]:
import sys
import threading
sys.setrecursionlimit(300000)
threading.stack_size(4096*110000)

450560000

In [None]:
scclist = invG.computeSCCs()

In [7]:
countNote = []
for sublist in scclist:
    countNote.append(len(sublist))

In [9]:
with open('sccount.txt', 'w') as f:
    for item in countNote:
        f.write("%s\n" % item)

In [11]:
with open('sccount.txt') as f:
    lines = f.readlines()
countlist = []    
for line in lines:
    num = int(line.strip())
    countlist.append(num)

In [13]:
len(countlist)

371762

In [16]:
countlist.sort()

In [18]:
for i in range(7):
    print(countlist[-i])

1
434821
968
459
313
211
205


In [20]:
len(adj_lst)

739454