In [None]:
# Initialize Otter
import otter
grader = otter.Notebook("paths_on_graphs.ipynb")

### Please restart the kernel after running next cell

In [None]:
# version shenanigans
!pip install -r requirements.txt --quiet
import otter
grader = otter.Notebook("paths_on_graphs.ipynb")
assert otter.__version__ >= "4.2.0", "Please restart your kernel."

In [None]:
import networkx as nx
import tqdm
from heapq import heappush, heappop
import numpy as np

### Representing graphs in code

There are multiple ways to represent graphs in code. In class we covered adjacency matrices (https://people.eecs.berkeley.edu/~vazirani/algorithms/chap3.pdf#page=2) and adjacency lists (https://people.eecs.berkeley.edu/~vazirani/algorithms/chap3.pdf#page=3). There is also the edge list representation, in which you store the edges in a single 1 dimensional list. In general for 170, we choose to use the adjacency list representation since it let's us quickly determine all of a given nodes neighbors.

In many programming problems, verticies are typically labelled $0$ through $n-1$ for convenience (recall that arrays and lists in most languages begin at index 0). This allows us to represent an adjacency list using a list of lists. Given an edge list, the following code will create an adjacency list for an __unweighted undirected graph__.

In [None]:
def make_adj_list(n, edge_list):
    """
    args:
        n:int = number of nodes in the graph. The nodes are labelled with integers 0 through n-1
        edge_list:List[Tuple(int,int)] = edge list where each tuple (u,v) represents the undirected 
            edge (u,v) in the graph
    return:
        A List[List[int]] representing the adjacency list 
    """
    # using a list of lists
    adj_list = [[] for i in range(0,n)] 
    for edge in edge_list:
        adj_list[edge[0]].append(edge[1]) # need to include both directions for the edge
        adj_list[edge[1]].append(edge[0])
    return adj_list


### Q1) DFS

In class we showed how to use DFS to check if there exists a path between two nodes, topologically sort nodes, and find SCC's. In those algorithms, pre and post numbers were used.

Here you'll implement a variation of DFS to print out the path between two nodes. In many problems, we want to be able to find the actual path between two nodes, not just determine if it exists. Return the path as a list of nodes on that path. For example, the path `s -> a -> b -> c -> t` corresponds to the list `[s, a, b, c, t]`. Note you will not need to implement calculating pre and post numbers for this exercise.

*Hint:*
1) It may be helpful to define a helper function which performs DFS.
2) It may be helpful to maintain an extra data structure which for each node $u$, stores which node $v$ will be the previous node on some $s$ to $u$ path. See Q4 for more details.

In [None]:
def dfs_path(adj_list, s, t):
    """
    args:
        adj_list:List[List] = an adjacency list 
        s:int = an int representing the starting node
        t:int = an int representing the destination node

    return: 
        a list of nodes starting with s and ending with t representing an s to t path if it exists. 
        Returns an empty list otherwise
    """
    ...

### Debugging

You can create sample tests in the following cells to help debug your solution. We provide a few small tests as an example, but they might not be comprehensive.

To add a new graph to the test, append a new edge list to `edge_lists` as shown in the next cell.  
__Remember that these edges are undirected, so do not add both directions of an edge, both (u,v) and (v,u) to the edge list.__

In [None]:
edge_lists = []
edge_lists.append([(0,1), (0,2), (1,2), (2,3), (3,4), (3,5), (4,5)])   # edge list of first graph
edge_lists.append([(0,1), (0,2), (1,2), (3,4), (3,5), (4,5)])          # edge list of second graph
# add any additional tests here

In [None]:
s_list = []
s_list.append(0)  # s for first graph 
s_list.append(1)  # s for second graph 
# add any additional tests here

t_list = []
t_list.append(3)  # t for first graph
t_list.append(4)  # t for second graph
# add any additional tests here

n_list = []
n_list.append(6)  # n = 6 for first graph
n_list.append(6)  # n = 6 for second graph
# add any additional tests here

The following is a simplified version of the autograder, you may want to add more print statements or other debugging statements to check your function.

In [None]:
import matplotlib.pyplot as plt
index = 1
for s, t, n, edge_list in zip(s_list, t_list, n_list, edge_lists):
    print("Testing graph:", index)
    index += 1
    
    adj_list_graph = make_adj_list(n, edge_list) # function defined earlier
    
    path = dfs_path(adj_list_graph, s, t) 
    
    nx_graph = nx.Graph(edge_list)
    
    # uncomment the following to plot each graph
    '''
    nx.draw(nx_graph, with_labels=True)
    plt.title(f"Graph with {n} vertices and start node {s} and destination {t}")
    plt.show()
    '''
    
    if not nx.has_path(nx_graph,s,t):
        assert len(path) == 0, f"your dfs_path found an s-t path when there isn't one."
    else:
        # checks that the path returned is a real path in the graph and that it starts and ends 
        # at the right vertices
        assert nx.is_simple_path(nx_graph, path), f"your dfs_path did not return a valid simple path"
        assert path[0] == s, f"your dfs_path returned a valid simple path, but it does not start at node s"
        assert path[-1] == t, f"your dfs_path returned a valid simple path, but it does not end at node t"

print("Success")

In [None]:
grader.check("q1")

### Q2) BFS

Similar to the above implementation of DFS, here you'll implement BFS such that BFS returns the s-t path. Return the path as a list of nodes on that path. For example, the path `s -> a -> b -> c -> t` corresponds to the list `[s, a, b, c, t]`.

*Hint:*
1) It may be helpful to maintain an extra data structure which for each node $u$, stores which node $v$ will be the previous node on some $s$ to $u$ path. See Q4 for more details.

In [None]:
import queue

def bfs_path(adj_list, s, t):
    """
    args:
        adj_list:List[List] = an adjacency list 
        s:int = an int representing the starting node
        t:int = an int representing the destination node

    return: 
        a list of nodes starting with s and ending with t representing the shortest s to t path if it exists. 
        Returns an empty list otherwise
    """
    ...

### Debugging

You can create sample tests in the following cells to help debug your solution. We provide a few small tests as an example, but they might not be comprehensive.

To add a new graph to the test, append a new edge list to `edge_lists` as shown in the next cell.  
__Remember that these edges are undirected, so do not add both directions of an edge, both (u,v) and (v,u) to the edge list.__

In [None]:
edge_lists = []
edge_lists.append([(0,1), (0,2), (1,2), (2,3), (3,4), (3,5), (4,5)])   # edge list of first graph
edge_lists.append([(0,1), (0,2), (1,2), (3,4), (3,5), (4,5)])          # edge list of second graph
# add any additional tests here

In [None]:
s_list = []
s_list.append(0)  # s for first graph 
s_list.append(1)  # s for second graph 
# add any additional tests here

t_list = []
t_list.append(3)  # t for first graph
t_list.append(4)  # t for second graph
# add any additional tests here

n_list = []
n_list.append(6)  # n = 6 for first graph
n_list.append(6)  # n = 6 for second graph
# add any additional tests here

For each test case you also need to add the expected distance from $s$ to $t$. Add -1 if there is no $s-t$ path.

In [None]:
expected_distance = []
expected_distance.append(2)   # 0-2-3 is the shortest path in the first graph
expected_distance.append(-1)  # there is no path from 1 to 4 in the second graph
# add any additional tests here


The following is a simplified version of the autograder, you may want to add more print statements or other debugging statements to check your function.

In [None]:
import matplotlib.pyplot as plt
index = 1
for s, t, n, edge_list, dist in zip(s_list, t_list, n_list, edge_lists, expected_distance):
    print("Testing graph:", index)
    index += 1
    adj_list_graph = make_adj_list(n, edge_list) # function defined earlier
    
    path = bfs_path(adj_list_graph, s, t) 
    
    nx_graph = nx.Graph(edge_list)
    
    # uncomment the following to plot each graph
    '''
    nx.draw(nx_graph, with_labels=True)
    plt.title(f"Graph with {n} vertices and start node {s} and destination {t}")
    plt.show()
    '''
    
    if not nx.has_path(nx_graph,s,t):
        assert len(path) == 0, f"your bfs_path found an s-t path when there isn't one."
    else:
        # checks that the path returned is a real path in the graph, that it starts and ends 
        # at the right vertices, and that it is the shortest s-t path
        assert nx.is_simple_path(nx_graph, path), f"your bfs_path did not return a valid simple path"
        assert path[0] == s, f"your bfs_path returned a valid simple path, but it does not start at node s"
        assert path[-1] == t, f"your bfs_path returned a valid simple path, but it does not end at node t"
        assert dist + 1 == len(path), f"your bfs_path did not return the shortest path"

print("Success")

In [None]:
grader.check("q2")

### Q3) Dijkstra's Algorithm
If you need a refresher on how the algorithm works, check out pp.120-121 from DPV: https://people.eecs.berkeley.edu/~vazirani/algorithms/chap4.pdf#page=7. 

Here you'll implement Dijkstra's algorithm as outlined in the textbook. This function computes and returns the `distance` and `prev` lists. 

*Hints:*
1) To implement Dijkstra's algorithm, we need to use a priority queue. The `heapq` library is the most commonly used for this purpose. See https://docs.python.org/3/library/heapq.html
2) `heapq` maintains a min priority queue, which means that the top element is the smallest element.
3) If tuples are pushed into a `heapq`, they will be sorted by the first element.
4) `heapq` does not have a `decreaseKey` function. Think about how to get around this limitation using hint 3 and the fact once `distance[v]` has been set, it will never decrease (ie any future s-v paths found will be `distance[v]` or longer)

In [None]:
def shortest_path(adj_list, s):
    """
    args:
        adj_list:List[List[Tuple(int,int)]] =  an adjacency list representation of the undirected graph.
                adj_list[v] consists of tuples (u, d) such that (v, u) is an edge of weight d.
        s:int = an int representing the start vertex from which we need to find the shortest distances.
    
    return:
        distance:List[int] = a dictionary d such that d[u] is the length of the shortest path
                from s to u. By definition, d[V] = 0.
        parent:List[int] = a dictionary p such that p[u] is the parent of u on the shortest path
                from s to u. In other words, if the shortest path from s to u is (s, x, y, z, u),
                then p[u] = z, p[z] = y, ..., p[x] = s. We define p[s] to be None.
    """
    ...

### Debugging

You can create sample tests in the following cells to help debug your solution. We provide a few small tests as an example, but they might not be comprehensive.

To add a new graph to the test, append a new edge list to `edge_lists` as shown in the next cell.  
__Remember that these edges are undirected, so do not add both directions of an edge, both (u,v) and (v,u) to the edge list. Since the edges here are weighted, the third element in the tuple represents the edge's weight. Note we only use integer weights in our tests.__

In [None]:
edge_lists = []
edge_lists.append([(0,1,1), (0,2,5), (1,2,2), (2,3,3), (3,4,6), (3,5,1), (4,5,1)])   # edge list of first graph
edge_lists.append([(0,1,1), (0,2,5), (1,2,2), (3,4,6), (3,5,1), (4,5,1)])            # edge list of second graph
# add any additional tests here

In [None]:
s_list = []
s_list.append(0)  # s for first graph 
s_list.append(1)  # s for second graph 
# add any additional tests here

t_list = []
t_list.append(3)  # t for first graph
t_list.append(4)  # t for second graph
# add any additional tests here

n_list = []
n_list.append(6)  # n = 6 for first graph
n_list.append(6)  # n = 6 for second graph
# add any additional tests here

For each test case you also need to add the expected distance from $s$ to $t$. Add -1 if there is no $s-t$ path.

In [None]:
expected_distance = []
expected_distance.append(6)   # 0-2-3 is the shortest path in the first graph
expected_distance.append(-1)  # there is no path from 1 to 4 in the second graph
# add any additional tests here


The following is a simplified version of the autograder, you may want to add more print statements or other debugging statements to check your function.

In [None]:
def make_weighted_adj_list(n, edge_list):
    """
    args:
        n:int = number of nodes in the graph. The nodes are labelled with integers 0 through n-1
        edge_list:List[Tuple(int,int)] = edge list where each tuple (u,v) represents the undirected 
            edge (u,v) in the graph
    return:
        A List[List[int]] representing the adjacency list 
    """
    # using a list of lists
    adj_list = [[] for i in range(0,n)] 
    for edge in edge_list:
        adj_list[edge[0]].append((edge[1], edge[2])) # need to include both directions for the edge
        adj_list[edge[1]].append((edge[0], edge[2]))
    return adj_list

import matplotlib.pyplot as plt
index = 0
for s, t, n, edge_list, dist in zip(s_list, t_list, n_list, edge_lists, expected_distance):
    print("Testing graph:", index)
    index += 1
    adj_list_graph = make_weighted_adj_list(n, edge_list) # function defined earlier
    
    distance, prev = shortest_path(adj_list_graph, s) 
    
    nx_graph = nx.Graph()
    for u,v,d in edge_list:
        nx_graph.add_edge(u,v,weight=d)
        
    # uncomment the following to plot each graph
    '''
    nx.draw(nx_graph, with_labels=True)
    plt.title(f"Graph with {n} vertices and start node {s} and destination {t}")
    plt.show()
    '''
    
    # check the distance arrays are correct
    if not nx.has_path(nx_graph,s,t):
        assert distance[t] == float('inf'), f"your shortest_path found an s-t path when there isn't one."
    else:
        # checks that the path returned is a real path in the graph, that it starts and ends 
        # at the right vertices, and that it is the shortest s-t path
        assert dist == distance[t], f"your shortest_path did not return the shortest path"

print("Success")

__Note we check distances using the `distance` array and shortest paths using the `parents` array.__

In [None]:
grader.check("q3")

## Submission

Make sure you have run all cells in your notebook in order before running the cell below, so that all images/graphs appear in the output. The cell below will generate a zip file for you to submit.

In [None]:
grader.export(pdf=False, force_save=True, run_tests=True)