In [None]:
# Initialize Otter
import otter
grader = otter.Notebook("Kruskals.ipynb")

### Please restart the kernel after running next cell

In [None]:
# version shenanigans
!pip install -r requirements.txt --quiet
import otter
grader = otter.Notebook("Kruskals.ipynb")
assert otter.__version__ >= "4.2.0", "Please restart your kernel."

In [None]:
from heapq import heappush, heappop
import numpy as np
import networkx as nx
import tqdm
import time

### Union Find Data Structure
Below is an implementation of the union find data structure. You may find it useful for your implementation of Kruskal's algorithm later on.


In [None]:
class UnionFind:
    def __init__(self, n):
        '''
        args:
            n:int = number of nodes in the union find data structures. Nodes are index 
                by integers between 0 and n-1
        '''
        self.n = n
        self.parents = [i for i in range(n)]
        self.rank = [1]*n
    
    def find(self, i):
        '''
        args:
            i:int = index of some node
        returns:
            an integer representing the root of the set in which node i belongs
        '''
        assert i >= 0 and i <= self.n-1, f"Node {i} is not in the data structure. Only nodes {0} through {n-1} exist."
        if i != self.parents[i]:
            self.parents[i] = self.find(self.parents[i])
        return self.parents[i]
    
    def union(self, i, j):
        '''
        args:
            i:int = index of some node
            j:int = index of some node
            
        joins the sets containing nodes i and j
        '''
        assert i >= 0 and i <= self.n-1, f"Node {i} is not in the data structure. Only nodes {0} through {n-1} exist."
        assert j >= 0 and j <= self.n-1, f"Node {j} is not in the data structure. Only nodes {0} through {n-1} exist."
        
        pi, pj = self.find(i), self.find(j)
        if pi != pj:
            if self.rank[pi] < self.rank[pj]:
                self.parents[pi] = pj
            elif self.rank[pi] > self.rank[pj]:
                self.parents[pi] = pi
            else:
                self.parents[pi] = pj
                self.rank[pi] += 1
                

### Q1. Kruskal's MST Algorithm

Now it is time to implement Kruskal's algorithm! Implement the following function which returns a list of edges representing a minimum spanning tree. If the graph is not connected, you should return None

Note that nodes in the graph are labelled with integers between 0 and n-1.

You may not use any built in libraries or modules to compute the MST.

_Hints:_
1) You're given the adjacency list representation of the graph; however, Kruskal's does not use adjacecny lists. Convert your adjacecny list to an edge list in linear time.
2) By default, the `sort()` and `sorted()` functions sort lists of tuples in ascending order based on their first element.

__Note: your solution should not take inordinate amounts of time to run. If it takes more than 60 seconds to run, it is too slow__

In [None]:
def kruskal(G):
    """
    args:
        G:List[List[Tuple[int,int]]] = The input graph G is represented as a list of n lists, where 
            each element of the list G[u] (where u is an integer between 0 and n-1) is a tuple with 
            2 elements: (v, w) means there's an edge $(u,v)$ of weight $w$.
            
            You can assume that we will only feed simple graphs G to your implementation as input. 
            Also G is undirected, so if an edge (u,v) exists it will be found in both G[u] and G[v].
    
    returns:
        List[Tuple[int,int]] = a list of edges belonging to an MST or None if the graph is not connected. Do not
            return the edge weights. If there's an edge (u,v) with weight w, only return (u,v)
    """
    ...
            

In [None]:
grader.check("q1")

### Debugging
The otter tests are pasted here for your convenience. Feel free to add whatever print statements or assertions you'd like when debugging.

In [None]:
def test_q1(nx, tqdm, np, kruskal):
    for n in tqdm.tqdm(range(10,1000,15)):
        random_graph = nx.gnp_random_graph(n,0.08)
        edge_list = random_graph.edges
        G = [[] for _ in range(n)]
        for e in edge_list:
            w = np.random.randint(-1000, 1000)
            
            # create adjacency list
            G[e[0]].append((e[1], w))
            G[e[1]].append((e[0], w))
            
            # add weights to networkx graph
            random_graph[e[0]][e[1]]['weight'] = w
            random_graph[e[1]][e[0]]['weight'] = w
            
        # check that the correct number of edges are returned
        T1 = kruskal(G)
        if not nx.is_connected(random_graph):
            assert T1 is None, f"kruskal did not return None for a unconnected graph!"
            continue
        assert T1 is not None, f"kruskal returned None for a connected graph!"
        assert len(T1) == n-1, f"kruskal returned {len(T1)} edges rather than n-1 edges on a connected graph!"
        for e in T1:
            assert random_graph.has_edge(e[0], e[1]), f"kruskal returned an edge that's not in the graph!"
        
        # Check that the edge lists forms a tree. Since we already checked that it contains n-1 edges, 
        # this condition ensures that the tree is a spanning tree.
        TG = nx.Graph()
        TG.add_edges_from(T1)
        nx.is_tree(TG)
        
        # check that the weight of the spanning tree is correct
        w1 = 0
        for e in T1:
            w1 += random_graph[e[0]][e[1]]['weight']
        w2 = 0
        mst = list(nx.minimum_spanning_edges(random_graph))
        for e in mst:
            w2 += random_graph[e[0]][e[1]]['weight']
        assert w1 == w2, f"kruskal returned a spanning tree of weight {w1} but the true MST as weight {w2}"
        

## Submission

Make sure you have run all cells in your notebook in order before running the cell below, so that all images/graphs appear in the output. The cell below will generate a zip file for you to submit.

In [None]:
grader.export(pdf=False, force_save=True, run_tests=True)