    import networkx as nx
    
https://networkx.github.io/documentation/stable/tutorial.html

## Graphs
#### Creating a graph
    G = nx.Graph()
#### Creating nodes, edges from dataframe (source, target, weight)
    for row in df.index:
        source = df.source[row]
        target = df.target[row]
        weight = df.weight[row]
        G.add_edge(source, target, weight)
#### Visualize a graph
    nx.draw(G, with_labels=True,node_size=500, node_color = node_color, alpha = .7, font_weight='bold', font_size=6);

## Nodes
#### Adding nodes (custom)
    #create empty dictionary for each node as a name
    names = {}
    # create an empty list for each node color
    node_color = []
    # loop for names in dataset nodes to add
    for n, person in enumerate(df.column1.unique()):
        name = "optional text{}".format(n)
        names[person] = name
        G.add_node(name)
        node_color.append("green")
    # loop for another dataset column (can add as mnay as needed)
      for n, biz in enumerate(df.column2_id.unique()):
        name = "Optional text{}".format(n)
        names[biz] = name
        G.add_node(name)
        node_color.append("blue")

## Edges
#### adding edges
    # connects edges between nodes that have a relation to one another
    for row in df.index:
        source = df.source[row]
        target = df.target[row]
        weight = df.weight[row]
        G.add_edge(source, target, weight)

## Dijkstras Algorithm
#### Dijkstra's algorithm is essentially a depth based search. It commences at the starting node, spanning out to neighboring nodes and in turn visiting their neighbors in search of the destination. The function should take in three parameters: the graph G, the starting node u, and the destination node v. From there, return the minimum distance between nodes u and v. In the case that there is no path connecting u and v, return a null value. Creates a list of nodes outlining the directions for the shortest path from the starting node to the destination node as well as the number of edges. 

### built in functions for node path and length
    nx.dijkstra_path(graph, 'start_node', 'finish_node') #provides shortest node path
    nx.dijkstra_path_length(graph, 'start_node', 'finish_node') #provides shortest edges used

### Coded version
    def dijkstra(G, u, v, return_path_directions=True):
        """G is the graph in question.
        u is the starting node
        v is the destination node
    
        Returns path, distance
        """
        visited = set()
        unvisited = set(G.nodes)
        distances = {u:0}
        shortest_paths = {u:[u]}
        for node in unvisited:
            if node == u:
                continue
            else:
                distances[node] = np.inf
        cur_node = u
        weight = 1 #set default weight for non-weighted graphs
        while len(unvisited)>0:
            if cur_node == v:
                break
            if min([distances[node] for node in unvisited]) == np.inf:
                print("There is no path between u and v.")
                return np.nan
            #Pull up neighbors
            neighbors = G[cur_node]
            for node in neighbors:
                #Future update:Add weight update for weighted graphs
                #Set either the distance through the current node or a previous shorter path
                if distances[cur_node] + weight < distances[node]:
                    distances[node] = distances[cur_node] + weight
                    shortest_paths[node] = shortest_paths[cur_node] + [node]
            #Mark current node as visited
            visited.add(cur_node)
            unvisited.remove(cur_node)
                cur_node = sorted([(node, distances[node]) for node in unvisited], key=lambda x:x[1])[0][0] 
        #Set the node with the minimum distance as the current node
        if return_path_directions:
        return shortest_paths[v], distances[v]
        else:
        return distances[v]
        

## Dijkstras Process Visualization
    def dijkstra_visual(G, u, v, return_path_directions=True, show_plots=True):
        #Your code here
        """G is the graph in question.
        u is the starting node
        v is the destination node
    
        Returns path, distance
        """
        if show_plots:
            return_path_directions = True #must have path directions to generate plots
        visited = set()
        visited_edges = []
        unvisited = set(G.nodes)
        distances = {u:0}
        shortest_paths = {u:[u]}    
        for node in unvisited:
            if node == u:
                continue
            else:
                distances[node] = np.inf
        cur_node = u
        weight = 1 #set default weight for non-weighted graphs
        #Create the initial plot
        if show_plots:
            fig = plt.figure(figsize=(20,15))
            ax = fig.add_subplot(561)
            nx.draw(G, pos=nx.random_layout(G, seed=9), with_labels=True, node_color="#1cf0c7",
                    node_size=500, font_weight="bold", width=2, alpha=.8, ax=ax)
            ax.set_title('Step 1')
            plot_n = 2
        while len(unvisited)>0:
            if cur_node == v:
                break
            if min([distances[node] for node in unvisited]) == np.inf:
                print("There is no path between u and v.")
                return np.nan
            #Pull up neighbors
            neighbors = G[cur_node]
            for node in neighbors:
                #Future update:Add weight update for weighted graphs
                #Create a new graph of the neighbor being explored
            if show_plots:
                ax = fig.add_subplot(5,6,plot_n)
                #Base Plot
                nx.draw(G, pos=nx.random_layout(G, seed=9), with_labels=True, node_color="#1cf0c7",
                        node_size=500, font_weight="bold", width=2, alpha=.8, ax=ax)
                #Recolor paths to visited nodeds
                nx.draw_networkx_edges(G, edgelist=visited_edges, pos=nx.random_layout(G, seed=9),
                       width=3, edge_color="#00b3e6", ax=ax);
                #Recolor current path
                nx.draw_networkx_edges(G, edgelist=[(cur_node, node)], pos=nx.random_layout(G, seed=9),
                       width=3, edge_color="#ffd43d", ax=ax);
                ax.set_title('Step {}'.format(plot_n))
                plot_n += 1
            #Set either the distance through the current node or a previous shorter path
            if distances[cur_node] + weight < distances[node]:
                distances[node] = distances[cur_node] + weight
                shortest_paths[node] = shortest_paths[cur_node] + [node]
        #Mark current node as visited
        visited.add(cur_node)
        unvisited.remove(cur_node)
        try:
            #Will error for initial node
            visited_edges.append((shortest_paths[cur_node][-2],cur_node))
        except:
            pass
        #Update the plot for the visited node
        if show_plots:
            ax = fig.add_subplot(5,6,plot_n)
            #Base Plot
            nx.draw(G, pos=nx.random_layout(G, seed=9), with_labels=True, node_color="#1cf0c7",
                    node_size=500, font_weight="bold", width=2, alpha=.8, ax=ax)
            #Recolor paths to visited nodeds
            nx.draw_networkx_edges(G, edgelist=visited_edges, pos=nx.random_layout(G, seed=9),
                       width=3, edge_color="#00b3e6", ax=ax);
            ax.set_title('Step {}'.format(plot_n))
            plot_n += 1
            if plot_n >= 29:
                plt.show()
                return None
        cur_node = sorted([(node, distances[node]) for node in unvisited], key=lambda x:x[1])[0][0] 
        #Set the node with the minimum distance as the current node
    if return_path_directions:
        return shortest_paths[v], distances[v]
    else:
        return distances[v]
        
    dijkstra_visual(graph, 'start_node', 'end_node')

## Centrality Metrics
#### Degree Centrality 
    pd.DataFrame.from_dict(nx.degree_centrality(G), orient='index').sort_values(by=0, ascending=False).head(10).plot(kind='barh', color="#1cf0c7")
    plt.title('Top 10 Characters by Degree Centrality');
#### Closeness Centrality
    pd.DataFrame.from_dict(nx.closeness_centrality(G), orient='index').sort_values(by=0, ascending=False).head(10).plot(kind='barh', color="#1cf0c7")
    plt.title('Top 10 Characters by Closeness Centrality');
#### Betweeness Centrality
    pd.DataFrame.from_dict(nx.betweenness_centrality(G), orient='index').sort_values(by=0, ascending=False).head(10).plot(kind='barh', color="#1cf0c7")
    plt.title('Top 10 Characters by Betweeness Centrality');
#### Table with degree, closeness, betweeness, eigenvector and bridge proxy 
    degrees = nx.degree_centrality(G)
    closeness = nx.closeness_centrality(G)
    betweeness = nx.betweenness_centrality(G)
    eigs = nx.eigenvector_centrality(G)
    bridge_proxy = betweeness/degrees
    centrality = pd.DataFrame([degrees, closeness, betweeness, eigs, bridge_proxy]).transpose()
    centrality.columns = ["degrees", "closeness", "betweeness", "eigs", "bridge_proxy"]
    centrality = centrality.sort_values(by='bridge_proxy', ascending=False)
    centrality.head()

## K-Clique Clustering
#### cluster impact from using different k values   
    for i in range(2,6):
        kc_clusters = list(nx.algorithms.community.k_clique_communities(G, k=i))
        print("With k={}, {} clusters form.".format(i, len(kc_clusters)))
#### visualize k clique clusters
    colors = [("teal","#1cf0c7"),
         ("workzone_yellow","#ffd43d"),
         ("light-blue","#00b3e6"),
         ("medium-blue","#32cefe"),
         ("gray","#efefef"),
         ("dark-blue", "#1443ff")]
    color_dict = dict(colors)
    kc_clusters = list(nx.algorithms.community.k_clique_communities(G, k=2)) #create clusters
    fig = plt.figure(figsize=(35,20))
    for n, ci in enumerate(kc_clusters):
        ci = G.subgraph(ci)
       nx.draw(ci, pos=nx.spring_layout(G, k=2, seed=10), with_labels=True, node_color=colors[n%len(colors)][1], alpha=.8, node_size=10000, font_weight="bold", font_size=18)

## Girvan-Newman Clustering
#### gives you a list of cluster lists corresponding to the clusters that form from removing the top n edges according to some metric, typically edge betweeness.
    gn_clusters = list(nx.algorithms.community.centrality.girvan_newman(G))
    for n, clusters in enumerate(gn_clusters):
        print("After removing {} edges, there are {} clusters.".format(n, len(clusters)))

## Visualizing Clustering Metrics to select optimal # of clusters
#### girvan-newman used, could use k-clique as well
    import numpy as np

    fig = plt.figure(figsize=(12,10))

    max_cluster_size = [max([len(c) for c in cluster]) for cluster in gn_clusters]
    plt.plot(x,max_cluster_size, color=colors[0], label='Max Cluster Size')

    min_cluster_size = [min([len(c) for c in cluster]) for cluster in gn_clusters]
    plt.plot(x,min_cluster_size, color=colors[1], label='Minimum Cluster Size')

    mean_cluster_size = [np.mean([len(c) for c in cluster]) for cluster in gn_clusters]
    plt.plot(x,mean_cluster_size, color=colors[2], label='Mean Cluster Size')

    median_cluster_size = [np.median([len(c) for c in cluster]) for cluster in gn_clusters]
    plt.plot(x,median_cluster_size, color=colors[3], label='Median Cluster Size')

    single_node_clusters = [sum([1 if len(c)==1 else 0 for c in cluster]) for cluster in gn_clusters]
    plt.plot(x,single_node_clusters, color=colors[6], label='Number of Single Node Clusters')

    small_clusters = [sum([1 if len(c)<=5 else 0 for c in cluster ]) for cluster in gn_clusters]
    plt.plot(x,small_clusters, color=colors[5], label='Number of Small Clusters (5 or less nodes)')

    plt.legend(loc=(1.01,.75), fontsize=14)
    plt.title('Cluster Size Metrics versus Number of Edges Removed', fontsize=14)
    plt.xlabel('Number of Edges Removed', fontsize=14)
    plt.ylabel('Cluster Metric')
    plt.ylim(0,80)
    plt.yticks(ticks=list(range(0,80,5)))
    plt.show()
#### choosing a cluster: look to where max and median cluster sizes stop dropping dramatically and where small or single node clusters are not rapidly spawning.

## Converting Graph to Dataframe to organize dense networks
    import numpy as np
    clusters = pd.DataFrame(list(gn_clusters[20]))
    clusters.index = ["Cluster{}".format(i) for i in range(1,len(clusters.index)+1)]
    clusters