### Copyright 2022 Edward Späth, Frankfurt University of Applied Sciences, FB2, Computer Science
### No liability or warranty; only for educational and non-commercial purposes
### See some basic hints for working with Jupyter notebooks in README.md

## Breadth-First Search (BFS) and Depth-First Search (DFS) without visualization

In [1]:
class Node:
    node_id = 0
    def __init__(self, name_input):
        self.name = name_input
        self.predecessor = 'Cannot be accessed'
        self.distance = 'Infinity'
        self.color = 'WHITE'
        self.adjacencylist = []
        my_dict[self.name] = Node.node_id
        Node.node_id += 1
        # For DFS
        self.t_discovered = 'NIL'
        self.t_finished = 'NIL'

## Global variables for storing information

In [2]:
my_dict = {} # for easier access, each node's name returns said node's index in the list if hashed. 
nodes = []
visited_nodes = []

## Functions for resetting results or the entire graph

In [3]:
def reset_results(): # Resets data after results were printed
    global visited_nodes, discovered_nodes, time
    visited_nodes, discovered_nodes = [], []
    time = 0 # For DFS
    # Set node statistics back to initial values.
    for node in nodes:  
        node.predecessor = 'Cannot be accessed'
        node.distance = 'Infinity'
        node.color = 'WHITE'
        # For DFS
        node.t_discovered = 'NIL'
        node.t_finished = 'NIL'

In [4]:
def reset_graph():
    global nodes, my_dict
    nodes = []
    my_dict = {}
    Node.node_id = 0

## Functions for creating a graph

In [5]:
def create_graph(nodearray, edgearray, adjacencylists, use_adjacencylist):
    add_nodes(nodearray)
    if use_adjacencylist == True:
        edgearray = convert_from_adjacencylist(adjacencylists)
    add_edges(edgearray)

def add_nodes(nodearray):
    global nodes
    for node_name in nodearray:
        if my_dict.get(node_name) is None: # If node was not added already
            nodes.append(Node(node_name))

def convert_from_adjacencylist(adjacencylists):
    converted_array = []
    curr_node = None
    for index, element in enumerate(adjacencylists):
        if index % 2 == 0:
            # The node
            curr_node = element
        else:
            # Its adjacencylist
            for adjacency in element:
                converted_array.append((curr_node, adjacency))
    return converted_array

def add_edges(edgearray):
    for start, dest in edgearray:
        # In case it is not a weighted graph and user has put random values as weights
        start_index = my_dict.get(start) # Access to index of node element instead of name
        dest_index = my_dict.get(dest)
        # If my_dict.get(x) returns None that means that x was not found in the hashmap, implying the node was not inserted and does not exist or else it would be in the hashmap
        if start_index is not None and dest_index is not None:
            # If this edge was not added before (multiple edges facing the same direction between two nodes is forbidden here)
            if dest not in nodes[start_index].adjacencylist:
                # Each node is given a unique id at the time of insertion. By sorting the adjacencylist according to this unique id,
                # you can define which node is chosen when there are multiple options available.
                # Having nodearray = ['A', 'B', 'C' ...] gurantees that if given a "choice", the algorithm will always visit 'A' over 'B', 'B' over 'C' and 'A' over 'C'
                at_index = len(nodes[start_index].adjacencylist)
                for index, adjacent in enumerate(nodes[start_index].adjacencylist):
                    if my_dict[adjacent.name] > my_dict[dest]:
                        at_index = index
                        break
                nodes[start_index].adjacencylist.insert(at_index, nodes[my_dict[dest]])

## Enqueue and dequeue operations (for BFS)

In [6]:
def enqueue(Q, element):
    Q.append(element)

def dequeue(Q):
    global visited_nodes
    dequeued_element = Q[0]
    visited_nodes.append(dequeued_element.name)
    del Q[0]
    return dequeued_element

## BFS algorithm

In [7]:
def BFS(s_name):
    # my_dict.get(start_node_name) returns None <--> start_node does not exist in nodes
    s_index = my_dict.get(s_name)
    if s_index is None:
        print("\nERROR: The Starting Node", s_name, "does not exist. Please make sure you have given the correct name to the start variable")
        return
    s = nodes[s_index]
    print("The BFS algorithm has started\n")
    s.color = 'GRAY'
    s.distance = 0
    s.predecessor = 'NIL'
    Q = [] # Initiate an empty queue
    enqueue(Q, s)
    while len(Q) > 0:
        u = dequeue(Q)
        print("Currently visiting ", u.name, ':', sep='')
        # For adjacent nodes in adjacencylist
        for ν in u.adjacencylist:
            # If the node has not been visited or discovered already
            if ν.color == 'WHITE':
                ν.color = 'GRAY'
                print("\tBy visiting ", u.name, ", the node ", ν.name, " has been discovered", sep='')
                # Set adjacent node's predecessor to u node
                ν.predecessor = u
                # Set adjacent node's distance to distance from start to u plus 1
                ν.distance = u.distance + 1
                # Enqueue the adjacent node
                enqueue(Q, ν)
        # Mark the finished node as visited
        u.color = 'BLACK'
        print("\tThe node", u.name, "has been visited and finished\n")
    print_results('BFS')

## DFS algorithm

In [8]:
time = 0
def DFS(s_name):
    # my_dict.get(s_name) returns None <--> starting node does not exist in nodes
    s_index = my_dict.get(s_name)
    if s_index is None:
        print("\nERROR: The Starting Node", s_name, "does not exist. Please make sure you have given the correct name to the start variable")
        return
    print("The DFS algortihm has started\n")
    s = nodes[s_index]
    s.distance = 0
    s.predecessor = 'NIL'
    dfs_visit(s)
    print_results('DFS')

def dfs_visit(u):
    global time
    time = time + 1
    u.t_discovered = time
    u.color = 'GRAY'
    for ν in u.adjacencylist:
        if ν.color == 'WHITE':
            ν.predecessor = u
            ν.distance = u.distance + 1
            print("The node", u.name, "has discovered node", ν.name)
            dfs_visit(ν)
    u.color = 'BLACK'
    time = time + 1
    u.t_finished = time
    print("The node", u.name, "has been visited")

## Function to print results

In [9]:
def print_results(traversal_type):
    if traversal_type == 'DFS':
        print('') # For new line
    print("The algorithm is over")
    # Calculate of order of nodes being visited in DFS
    if traversal_type == 'DFS':
        # Sorted by time of discovery
        nodes_sorted = [None] * (2*len(nodes)) # So that there is enough space for all elements
        for node in nodes:
            if isinstance(node.t_discovered, int):
                nodes_sorted.insert(node.t_discovered, node.name)
        for element in nodes_sorted:
            if element != None:
                visited_nodes.append(element)
    print("The nodes were visited in the following order:\n", visited_nodes)
    print("Here is each node's data:\n")
    for node in nodes:
        print("\tName:", node.name)
        # In case the predecessor is 'NIL' or 'Cannot be accessed', you have to print it as a string
        if isinstance(node.predecessor, str):
            print("\tPredecessor:", node.predecessor)
        # If the predecessor is not 'NIL' or 'Cannot be accessed', you have to print the node's name attribute
        else:
            print("\tPredecessor:", node.predecessor.name)
        print("\tDistance:", node.distance)
        if traversal_type == 'DFS':
            print("\tTime of discovery:", node.t_discovered)
            print("\tTime visisted:", node.t_finished)
        print('') # For new line
    reset_results()

## Example

In [10]:
# Input the names of the nodes here. Regardless of edge input method!
# Nodes will be visited in the order of how they are given in nodearray if multiple options exist

nodearray = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']

# All edges have length = 1 and are directed. If you want it to be more customizable, see the visualized version "BFS_DFS.ipynb" in the same GitHub folder as this file

# It is a list (array) of 2-tuples. both are strings refering to the node names. 
# Left is the start of an edge and the right is the destination.
# Syntax: edgearray = [('START', 'DESTINATION'), ('STARTx', 'DESTINATIONy'), ...]

edgearray = [('A', 'B'), ('A', 'C'), ('B', 'C'), ('D', 'B'), ('C', 'D'), ('E', 'D'), ('E', 'F'), ('C', 'G'), ('A', 'H'), ('G', 'H')]

# Alternatively you can input the data in form of adjacencylists. The left string is the node and the n-tuples to the right are the nodes to which the left node has edges.
# If the node has no adjacent nodes, you can leave the parentheses empty (like with 'F' below) or leave that line out entirely (like with 'H' being left out)

adjacencylists = [('A'), (('B', 'C', 'H')),
                  ('B'), (('C')),
                  ('C'), (('D', 'G')),
                  ('D'), (('B')),
                  ('E'), (('D', 'F')),
                  ('F'), (()),
                  ('G'), (('H')),
                 ]

# Here you have select which input method you want to be used.
# setting "use_adjacencylist = True" uses the adjacencylist input.
# setting it to "use_adjacencylist = False" uses the edgearray representation at the top
use_adjacencylist = True

# Select start node here. Input its name as a string.
start_node = 'A'

create_graph(nodearray, edgearray, adjacencylists, use_adjacencylist)
# Change the algorithm being called here. Either "BFS(start_node)" or "DFS(start_node)"
BFS(start_node)
reset_graph()

The BFS algorithm has started

Currently visiting A:
	By visiting A, the node B has been discovered
	By visiting A, the node C has been discovered
	By visiting A, the node H has been discovered
	The node A has been visited and finished

Currently visiting B:
	The node B has been visited and finished

Currently visiting C:
	By visiting C, the node D has been discovered
	By visiting C, the node G has been discovered
	The node C has been visited and finished

Currently visiting H:
	The node H has been visited and finished

Currently visiting D:
	The node D has been visited and finished

Currently visiting G:
	The node G has been visited and finished

The algorithm is over
The nodes were visited in the following order:
 ['A', 'B', 'C', 'H', 'D', 'G']
Here is each node's data:

	Name: A
	Predecessor: NIL
	Distance: 0

	Name: B
	Predecessor: A
	Distance: 1

	Name: C
	Predecessor: A
	Distance: 1

	Name: D
	Predecessor: C
	Distance: 2

	Name: E
	Predecessor: Cannot be accessed
	Distance: Infin

## Yet another example

In [11]:
nodearray = ['A', 'B', 'C', 'D', 'E']

edgearray = [('A', 'E'), ('B', 'A'), ('B', 'C'), ('B', 'D'), ('B', 'E'), ('C', 'B'), ('D', 'B'), ('E', 'A'), ('E', 'B'), ('E', 'D')]

adjacencylists = [('A'), (('E')),
                  ('B'), (('A', 'C', 'D', 'E')),
                  ('C'), (('B')),
                  ('D'), (('B')),
                  ('E'), (('A', 'B', 'D')),
                 ]

use_adjacencylist = True

start_node = 'A'

create_graph(nodearray, edgearray, adjacencylists, use_adjacencylist)
DFS(start_node)
reset_graph()

The DFS algortihm has started

The node A has discovered node E
The node E has discovered node B
The node B has discovered node C
The node C has been visited
The node B has discovered node D
The node D has been visited
The node B has been visited
The node E has been visited
The node A has been visited

The algorithm is over
The nodes were visited in the following order:
 ['A', 'E', 'B', 'C', 'D']
Here is each node's data:

	Name: A
	Predecessor: NIL
	Distance: 0
	Time of discovery: 1
	Time visisted: 10

	Name: B
	Predecessor: E
	Distance: 2
	Time of discovery: 3
	Time visisted: 8

	Name: C
	Predecessor: B
	Distance: 3
	Time of discovery: 4
	Time visisted: 5

	Name: D
	Predecessor: B
	Distance: 3
	Time of discovery: 6
	Time visisted: 7

	Name: E
	Predecessor: A
	Distance: 1
	Time of discovery: 2
	Time visisted: 9



## Your tests go here...

In [12]:
nodearray = ['A', 'B', 'C', 'D']

edgearray = [('A', 'B'), ('A', 'C'), ('C', 'D')]

adjacencylists = [('A'), (('B', 'C')),
                  ('C'), (('D')),
                 ]

use_adjacencylist = True

start_node = 'A'

create_graph(nodearray, edgearray, adjacencylists, use_adjacencylist)
BFS(start_node)
reset_graph()

The BFS algorithm has started

Currently visiting A:
	By visiting A, the node B has been discovered
	By visiting A, the node C has been discovered
	The node A has been visited and finished

Currently visiting B:
	The node B has been visited and finished

Currently visiting C:
	By visiting C, the node D has been discovered
	The node C has been visited and finished

Currently visiting D:
	The node D has been visited and finished

The algorithm is over
The nodes were visited in the following order:
 ['A', 'B', 'C', 'D']
Here is each node's data:

	Name: A
	Predecessor: NIL
	Distance: 0

	Name: B
	Predecessor: A
	Distance: 1

	Name: C
	Predecessor: A
	Distance: 1

	Name: D
	Predecessor: C
	Distance: 2

