# <center>Graph Search</center>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Graph Search And Connectivity
<img src="Images/BFS_vs_DFS.png" width="600"/>

## Breath First Search (BFS)

### Algorithm description
<img src="Images/BFS_Code.png" width="600"/>

In [2]:
from BFS import *

graph_example = {'s': ['a', 'b'],
                 'a': ['s', 'c'],
                 'b': ['s', 'c', 'd'],
                 'c': ['a', 'b', 'd', 'e'],
                 'd': ['b', 'c', 'e'],
                 'e': ['c', 'd']}

# Example as in the course 
start_vertex = 's'; BFS_display(graph_example, start_vertex)
start_vertex = 'a'; BFS_display(graph_example, start_vertex)
start_vertex = 'e'; BFS_display(graph_example, start_vertex)

Start exploration with the node s
- Exploring Order: ['s', 'a', 'b', 'c', 'd', 'e']
- Layers: {0: ['s'], 1: ['a', 'b'], 2: ['c', 'd'], 3: ['e']} 

Start exploration with the node a
- Exploring Order: ['a', 's', 'c', 'b', 'd', 'e']
- Layers: {0: ['a'], 1: ['s', 'c'], 2: ['b', 'd', 'e']} 

Start exploration with the node e
- Exploring Order: ['e', 'c', 'd', 'a', 'b', 's']
- Layers: {0: ['e'], 1: ['c', 'd'], 2: ['a', 'b'], 3: ['s']} 



### Shortest Paths    
<img src="Images/BFS_ShortestPath.png" width="600"/>

In [3]:
start_vertex = 's'; target_vertex = 'e'
BFS_display(graph_example, start_vertex, target_vertex)

Start exploration with the node s
- Exploring Order: ['s', 'a', 'b', 'c', 'd', 'e']
- Layers: {0: ['s'], 1: ['a', 'b'], 2: ['c', 'd'], 3: ['e']} 

BFS shortest path: start vertex s and target vertex e
Distance path: 3 and shortest path: ['s', 'b', 'd', 'e']


### Connected Components via BFS
<img src="Images/BFS_Connected_Components.png" width="600"/>

In [4]:
graph_SCC_example = {1: [3, 5],
                     2: [4],
                     3: [1, 5],
                     4: [2],
                     5: [1, 3, 7, 9],
                     6: [8, 10],
                     7: [5],
                     8: [6, 10],
                     9: [5],
                     10:[6, 8]
}

number_SCCs, SCCs = BFS_SCC(graph_SCC_example)
print(f'There is {number_SCCs} connected components in the graph:')
for SCC in SCCs:
    print(f'*{SCC}')

There is 3 connected components in the graph:
*[2, 4]
*[1, 3, 5, 7, 9]
*[6, 8, 10]


## Deep First Search (DFS)

### Algorithm description
<img src="Images/DFS_overview.png" width="600"/>
<img src="Images/DFS_Code.png" width="600"/>

In [5]:
from DFS import *

graph_example = {'s': ['a', 'b'],
                 'a': ['s', 'c'],
                 'b': ['s', 'c', 'd'],
                 'c': ['a', 'e', 'd'],
                 'd': ['b', 'c', 'e'],
                 'e': ['c', 'd']}

start_vertex = 's'
exploring_params = DFS_simple_exploration(graph_example, start_vertex)
print('The exploring order with DFS is:', exploring_params.explored)

The exploring order with DFS is: ['s', 'a', 'c', 'e', 'd', 'b']


In [6]:
# On multiple connected components graph:
params = DFS_loop(graph_SCC_example)
print('Exploring order:', params.explored)
print('SCCs:', params.SCCs)

Exploring order: [10, 6, 8, 9, 5, 1, 3, 7, 4, 2]
SCCs: {10: [10, 6, 8], 9: [9, 5, 1, 3, 7], 4: [4, 2]}


<img src="Images/DFS_properties.png" width="600"/>

### Strictly Connected Components (SCC)
<img src="Images/DFS_SCC.png" width="600"/>
<img src="Images/DFS_Kosaraju.png" width="600"/>
<img src="Images/DFS_SCC_Code.png" width="600"/>

The file contains the edges of a directed graph. Vertices are labeled as positive integers from 1 to 875714. Every row indicates an edge, the vertex label in first column is the tail and the vertex label in second column is the head (recall the graph is directed, and the edges are directed from the first column vertex to the second column vertex). So for example, the 11th row looks liks : "2 47646". This just means that the vertex with label 2 has an outgoing edge to the vertex with label 47646

Your task is to code up the algorithm from the video lectures for computing strongly connected components (SCCs), and to run this algorithm on the given graph.

Output Format: You should output the sizes of the 5 largest SCCs in the given graph, in decreasing order of sizes, separated by commas (avoid any spaces). So if your algorithm computes the sizes of the five largest SCCs to be 500, 400, 300, 200 and 100, then your answer should be "500,400,300,200,100" (without the quotes). If your algorithm finds less than 5 SCCs, then write 0 for the remaining terms. Thus, if your algorithm computes only 3 SCCs whose sizes are 400, 300, and 100, then your answer should be "400,300,100,0,0" (without the quotes).  (Note also that your answer should not have any spaces in it.)

WARNING: This is the most challenging programming assignment of the course. Because of the size of the graph you may have to manage memory carefully. The best way to do this depends on your programming language and environment, and we strongly suggest that you exchange tips for doing this on the discussion forums.

### Course example
<img src="Images/korasaju_example1.png" width="500"/>
<img src="Images/korasaju_example2.png" width="500"/>

In [7]:
from korasaju import *

G = {1: [4],
     2: [8],
     3: [6], 
     4: [7],
     5: [2],
     6: [9],
     7: [1],
     8: [5, 6],
     9: [3, 7]}

korasaju(G, verbose=True)

---------Graphs---------
Input  : {1: [4], 2: [8], 3: [6], 4: [7], 5: [2], 6: [9], 7: [1], 8: [5, 6], 9: [3, 7]}
Step 1 : {1: [7], 2: [5], 3: [9], 4: [1], 5: [8], 6: [3, 8], 7: [4, 9], 8: [2], 9: [6]}
Step 2 : {1: 7, 2: 3, 3: 1, 4: 8, 5: 2, 6: 5, 7: 9, 8: 4, 9: 6}
Step 3 : {1: [5], 2: [3], 3: [4], 4: [2, 5], 5: [6], 6: [1, 9], 7: [8], 8: [9], 9: [7]}
---------Results--------
Leader :  {9: 9, 7: 9, 8: 9, 6: 6, 1: 6, 5: 6, 4: 4, 2: 4, 3: 4}
Leaders :  {9, 4, 6}
SCCs : {9: [9, 7, 8], 6: [6, 1, 5], 4: [4, 2, 3]}


In [None]:
import os

SCC_file = '../Data/SCC.txt'
filepath = os.path.join(os.getcwd(), SCC_file)
graph, graph_rev = load_data(filepath)
nbr_nodes = len(graph.keys())
print(f'The number of nodes in the graph is: {nbr_nodes}')

results = korasaju(graph, graph_rev)

The number of nodes in the graph is: 875714


In [None]:
#Expected answer 434821,968,459,313,211