In [1]:
import numpy as np
import networkx as nx
from bokeh.io import output_notebook
from bokeh.io import show
from bokeh.plotting import figure, from_networkx
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import collections
from taska import parseWikiData
import random

# Load Graphs

In [2]:
if __name__ == "__main__":
    filepath = 'datasets/PROPERTIES_FOR_DELETION_SML.csv'
    filepath2 = "datasets/WIKIPROJECTS_MED.csv"
    filepath3 = "datasets/USERS_LRG.csv"

    graph1 = parseWikiData(filepath=filepath)
    graph2 = parseWikiData(filepath=filepath2)
    graph3 = parseWikiData(filepath=filepath3)

### Plot using Pyplot

In [None]:
nx.draw(graph1, with_labels = False)
plt.show()

In [None]:
nx.draw(graph2, with_labels = False)
plt.show()

In [None]:
nx.draw(graph3, with_labels = False)
plt.show()

### Plot using Bokeh

In [None]:
 # If we want to plot the graph = subgraph(largest_component)
def bokeh_plot_simple(graph:nx.Graph, title:str, scale=2, crop_factors = None):
    
    crop_factors = dict(x_range=(-1.1,1.1), y_range=(-1.1,1.1)) \
        if crop_factors is None else crop_factors

    plot = figure(
        title=title, tools="",
        toolbar_location=None, **crop_factors)

    mapping = dict((n, i) for i, n in enumerate(graph.nodes))
    graph_mapped = nx.relabel_nodes(graph, mapping)

    graph_plot = from_networkx(
        graph_mapped, nx.spring_layout, scale=scale, center=(0,0))
    plot.renderers.append(graph_plot)

    #output_file("networkx_graph.html")
    show(plot)

output_notebook()

In [None]:
bokeh_plot_simple(graph1, 'Graph 1', 4)

In [None]:
bokeh_plot_simple(graph2, 'Graph 2', 1)

In [None]:
bokeh_plot_simple(graph3, 'Graph 3', 1)

# i) Characteristics

### 1) graph statistics
- Number of Nodes
- Number of Edges
- Number of Connected Components
- Average Degree
- Edge with max degree and its degree
- Number of isolated nodes
- Diameter (impossible if more than 1 Connected Component)
- Degree Distribution

In [4]:
def get_nodes_with_highest_degree(graph):
    degree_sequence = [d for n, d in graph.degree()]

    max_degree = max(degree_sequence)
    nodes_with_max_degree = [n for n, d in graph.degree() if d == max_degree]

    return(max_degree, nodes_with_max_degree)

In [3]:
def get_isolated_nodes(graph):
    nodes_with_0_degree = [n for n, d in graph.degree() if d == 0]

    return len(nodes_with_0_degree)

In [5]:
# i)1) Graph Statistics
def print_graph_statistics(graph:nx.Graph, amount_of_max_degree_nodes:bool=False):
    print("Number of nodes: {}\nNumber of edges: {}".format(
        graph.number_of_nodes(), graph.number_of_edges()
    ))
    print("Number of connected components: {}".format(
        nx.algorithms.components.number_connected_components(graph),
    ))
    print("Average degree: {}\nClustering coefficient: {}".format(
        np.mean([deg for _, deg in graph.degree]),
        nx.algorithms.cluster.average_clustering(graph)
    ))
    max_degree, nodes_with_max_degree = get_nodes_with_highest_degree(graph)
    print(f'Max degree: {max_degree}')
    print(f'Amount of nodes with max degree:{len(nodes_with_max_degree)}') if amount_of_max_degree_nodes else print(f'Nodes with max degree:{nodes_with_max_degree}')

    nodes_with_degree_0 = get_isolated_nodes(graph)
    print(f'Number of Isolated nodes:  {(nodes_with_degree_0)}')

    try:  # attempt to compute the diameter of the graph
        diam = nx.algorithms.approximation.distance_measures.diameter(graph)
        print("Graph diameter: {}".format(diam))
    except:  # an error has  occurred
        print("\nERROR: Could not compute the diameter of the graph.")

In [6]:
print_graph_statistics(graph1)

Number of nodes: 739
Number of edges: 13530
Number of connected components: 4
Average degree: 36.617050067659
Clustering coefficient: 0.7913288282706287
Max degree: 478
Nodes with max degree:['Jura1']
Number of Isolated nodes:  2

ERROR: Could not compute the diameter of the graph.


In [None]:
print_graph_statistics(graph2)

In [None]:
print_graph_statistics(graph3)

### Degree Count Distribution
A histogram representing the degree distribution of the graph

In [None]:
def get_degree_count_distribution(graph):
    degree_sequence = [d for n, d in graph.degree()]
    degree_count = collections.Counter(degree_sequence)
    deg, cnt = zip(*degree_count.items())

    fig, ax = plt.subplots()
    plt.bar(deg, cnt, width=0.8)
    plt.title('Degree Histogram')
    plt.ylabel('Count')
    plt.xlabel('Degree')
    plt.show


In [None]:
get_degree_count_distribution(graph1)

In [None]:
get_degree_count_distribution(graph2)

In [None]:
get_degree_count_distribution(graph3)

### 2) Largest Component Statistics
- On top of above statistics add average shortest path
- Kevin Bacon Node and it's average path from all other nodes

In [10]:
def print_connected_statistics_with_average_shortest_path(component:nx.Graph, amount_of_max_degree_nodes:bool=False):
    print("Number of nodes: {}\nNumber of edges: {}".format(
        component.number_of_nodes(), component.number_of_edges()
    ))
    print("Average path length: {}".format(
    nx.average_shortest_path_length(component)
    ))
    print("Number of connected components: {}".format(
        nx.algorithms.components.number_connected_components(component),
    ))
    print("Average degree: {}\nClustering coefficient: {}".format(
        np.mean([deg for _, deg in component.degree]),
        nx.algorithms.cluster.average_clustering(component)
    ))

    nodes_with_degree_0 = get_isolated_nodes(component)
    print(f'Number of Isolated Nodes:  {nodes_with_degree_0}')

    max_degree, nodes_with_max_degree = get_nodes_with_highest_degree(component)
    print(f'Max degree: {max_degree}')
    print(f'Amount of nodes with max degree:{len(nodes_with_max_degree)}') if amount_of_max_degree_nodes else print(f'Nodes with max degree:{nodes_with_max_degree}')


    try:  # attempt to compute the diameter of the graph
        diam = nx.algorithms.approximation.distance_measures.diameter(component)
        print("Graph diameter: {}".format(diam))
    except:  # an error has  occurred
        print("\nERROR: Could not compute the diameter of the graph.")

In [8]:
def print_statistics_for_largest_component(graph:nx.Graph, amount_of_max_degree_nodes:bool=False):
    largest_component = max(nx.connected_components(graph), key=len)
    graph_largest_components = graph.subgraph(largest_component)
    print_connected_statistics_with_average_shortest_path(graph_largest_components, amount_of_max_degree_nodes)

In [11]:
print_statistics_for_largest_component(graph1)

Number of nodes: 735
Number of edges: 13529
Average path length: 2.1468090233368553
Number of connected components: 1
Average degree: 36.81360544217687
Clustering coefficient: 0.7956353797169995
Number of Isolated Nodes:  0
Max degree: 478
Nodes with max degree:['Jura1']
Graph diameter: 4


In [None]:
print_statistics_for_largest_component(graph2)

In [None]:
print_statistics_for_largest_component(graph3)

In [None]:
def get_degree_count_distribution_largest_component(graph):
    degree_sequence = [d for n, d in graph.degree()]
    degree_count = collections.Counter(degree_sequence)
    deg, cnt = zip(*degree_count.items())

    fig, ax = plt.subplots()
    plt.bar(deg, cnt, width=0.8)
    plt.title('Degree Histogram')
    plt.ylabel('Count')
    plt.xlabel('Degree')
    plt.show

In [None]:
get_degree_count_distribution_largest_component(graph1)

In [None]:
get_degree_count_distribution_largest_component(graph2)

In [None]:
get_degree_count_distribution_largest_component(graph3)

Get the Kevin Bacon node by finding the node with the lowest average path from all other nodes to itself

In [None]:
def get_kevin_bacon_node_for_largest_component(graph:nx.Graph):
    largest_component = max(nx.connected_components(graph), key=len)
    largest_component_graph = graph.subgraph(largest_component)
    average_shortest_path_length = []

    for node in largest_component_graph.nodes():
        path_lengths = nx.single_source_shortest_path_length(largest_component_graph, node)
        avg_length = sum(path_lengths.values())/len(path_lengths)
        average_shortest_path_length.append((node, avg_length))
    
    kevin_bacon_node = None
    min_average_length = float('inf')
    for node, average_length in average_shortest_path_length:
        if average_length < min_average_length:
            kevin_bacon_node = node
            min_average_length = average_length

    

    print(f'Kevin Bacon node: {kevin_bacon_node}\nWith average length: {min_average_length}')
    return kevin_bacon_node, min_average_length

In [None]:
get_kevin_bacon_node_for_largest_component(graph1)

In [None]:
get_kevin_bacon_node_for_largest_component(graph2)

In [None]:
get_kevin_bacon_node_for_largest_component(graph3)

### 3) Node level statistics
Analyse three different graphs by analysing its properties at the node level
- Degrees
- Clustering Coefficients
- Closeness Centrality

In [None]:
def get_node_level_descriptors(graph:nx.Graph):
    degrees = [d for _, d in graph.degree()]
    ccoeffs = [d for _, d in nx.algorithms.cluster.clustering(graph).items()]
    ccentra = [d for _, d in nx.closeness_centrality(graph).items()]

    return {'degrees': degrees, 'clustering coefficients': ccoeffs, 'closenes centrality': ccentra}

In [None]:
def plot_helper_node_level_descriptors(descriptors, titles, key):
    data = {titles[i]: descriptors[i][key] for i in range(len(titles))}
    sns.displot(data, height=4, aspect=2, kde=True)
    plt.title(f'Distribution of {key} (Count)')
    plt.show()
    

In [None]:
def show_node_level_descriptors_degrees(graphs:list[nx.Graph], titles:list[str]):
    
    descriptors = [get_node_level_descriptors(graph) for graph in graphs]
    plot_helper_node_level_descriptors(descriptors, titles, 'degrees')

def show_node_level_descriptors_clustering_centrality(graphs:list[nx.Graph], titles:list[str]):
    
    descriptors = [get_node_level_descriptors(graph) for graph in graphs]
    plot_helper_node_level_descriptors(descriptors, titles, 'clustering coefficients')

def show_node_level_descriptors_closeness_centrality(graphs:list[nx.Graph], titles:list[str]):
    
    descriptors = [get_node_level_descriptors(graph) for graph in graphs]
    plot_helper_node_level_descriptors(descriptors, titles, 'closenes centrality')

## ii) Shortest Paths
Find the shortest paths between the two nodes which are farthest apart

Use the 2-Sweep method for finding the nodes which are furthest apart, i.e. have the same shortest path length as the diameter of the graph.

This will have to be done a single component, check that the largest component indeed contains the farthest shortest path.

Methods for finding shortest path:

- Dijkstra
- Bellman Ford

In [None]:
# Djikstra
def get_shortest_path_largest_component_Dijkstra(graph:nx.Graph, farthest_nodes:tuple):
    start_node, end_node = farthest_nodes

    spath = nx.algorithms.dijkstra_path(graph, start_node, end_node)
    print("\nShortest path: " + " -> ".join([str(n) for n in spath]))

    print("How long is the path among these farthest nodes? {}".format(
    len(spath) - 1))  # here we do -1 to avoid counting the starting node!
    print(f'Should be the same as the diameter of the graph!!!: {nx.algorithms.approximation.distance_measures.diameter(graph)}')

In [None]:
# BF
def get_shortest_path_largest_component_BF(graph:nx.Graph, farthest_nodes):
    start_node, end_node = farthest_nodes

    print(f"Start node: {start_node}\nEnd node: {end_node}") 

    spath = nx.algorithms.bellman_ford_path(graph, start_node, end_node)
    print("\nShortest path: " + " -> ".join([str(n) for n in spath]))

    print("How long is the path among these farthest nodes? {}".format(
    len(spath) - 1))  # here we do -1 to avoid counting the starting node!
    print(f'Should be the same as the diameter of the graph!!!: {nx.algorithms.approximation.distance_measures.diameter(graph)}')

In [12]:
graph1_largest_component = graph1.subgraph(max(nx.connected_components(graph1), key=len))

graph2_largest_component = graph2.subgraph(max(nx.connected_components(graph2), key=len))

graph3_largest_component = graph3.subgraph(max(nx.connected_components(graph3), key=len))


In [None]:
# leveraging the property of the diameter's endpoints being part of the longest shortest path from any node
def get_farthest_nodes_2_sweep(graph:nx.Graph):
    diameter = nx.algorithms.approximation.distance_measures.diameter(graph)
    number_of_nodes = graph.number_of_nodes()
    while True:
        node = list(graph.nodes())[random.randint(0, number_of_nodes)]

        distances_from_node = nx.single_source_shortest_path_length(graph, node)
        start_node = max(distances_from_node, key=distances_from_node.get)

        distances_from_start_node = nx.single_source_shortest_path_length(graph, start_node)
        end_node = max(distances_from_start_node, key=distances_from_start_node.get)

        max_length = nx.shortest_path_length(graph, source = start_node, target=end_node)

        if(diameter == max_length):
            return(start_node, end_node)

In [None]:
def check_diam_is_in_component(graph:nx.Graph, largest_component:nx.Graph):
    try:
        largest_comp_diam = nx.algorithms.approximation.distance_measures.diameter(largest_component)
    except:
        print('This component has no diameter')
        return False

    connected_components = [list(component) for component in nx.connected_components(graph)]
    for comp in connected_components:
        try:  # attempt to compute the diameter of the graph
            diam = nx.algorithms.approximation.distance_measures.diameter(graph)
            if nx.algorithms.approximation.distance_measures.diameter(comp) > largest_comp_diam:
                return False
        except:  # an error has  occurred
            continue
        
    return True



In [None]:
# Check to see if largest path is in largest component
print(f'Graph 1: {check_diam_is_in_component(graph1, graph1_largest_component)}')
print(f'Graph 2: {check_diam_is_in_component(graph2, graph2_largest_component)}')
print(f'Graph 3: {check_diam_is_in_component(graph3, graph3_largest_component)}')

In [None]:
graph1_farthest_nodes = get_farthest_nodes_2_sweep(graph1_largest_component) # diam = 4

In [None]:
graph2_farthest_nodes  = get_farthest_nodes_2_sweep(graph2_largest_component) # diam = 8

In [None]:
# we have assumed that the largest path will be in the largest component
graph3_farthest_nodes = get_farthest_nodes_2_sweep(graph3_largest_component) # diam = 10

In [None]:
get_shortest_path_largest_component_Dijkstra(graph1_largest_component, graph1_farthest_nodes)

In [None]:
get_shortest_path_largest_component_Dijkstra(graph2_largest_component, graph2_farthest_nodes)

In [None]:
get_shortest_path_largest_component_Dijkstra(graph3_largest_component, graph3_farthest_nodes)

In [None]:
get_shortest_path_largest_component_BF(graph1_largest_component, graph1_farthest_nodes)

In [None]:
get_shortest_path_largest_component_BF(graph2_largest_component, graph2_farthest_nodes)

In [None]:
get_shortest_path_largest_component_BF(graph3_largest_component, graph3_farthest_nodes)

## iii) Where is it on random <-> small world <-> regular

- Build equivalent Random graph, using Erdos-Renyi
- Build equivalent Regular graph


To analyse the differences
- get statistics for the random graph
- get statistics for the largest component of the random graph
- get statistics for the regular graph
- get the degree distribution for the random graph
- get the degree distribution for the regular graph
- Calculate the Graph Edit Distance (GED)

note: to use this GED implementation, we must preserve node names to allow for comparison.


In [13]:
def get_equivalent_random_graph_preserve_nodes(graph:nx.Graph, draw=True):
    # n : number of nodes
    # p : frequency of edge occurence
        # max edges: n (n - 1) / 2
        # frequency of edge occurence: number of edges / max edges
    n = graph.number_of_nodes()
    nodes = list(graph.nodes())
    number_of_edges = graph.number_of_edges()
    max_edges = n*(n-1)/2
    p = number_of_edges/max_edges

    equivalent_random = nx.Graph()
    equivalent_random.add_nodes_from(nodes)

    rng = random.Random(None)

    for i in range(n):
        for j in range(i+1, n):
            if rng.random() < p:
                equivalent_random.add_edge(nodes[i], nodes[j])

    nx.draw(equivalent_random, with_labels=False) if draw else 0
    print(f'n: {n}\n p:{p}')
    return equivalent_random

In [14]:
def get_equivalent_regular_graph_preserve_nodes(graph:nx.Graph, draw=True):
    regular_graph = nx.Graph()

    nodes = list(graph.nodes())

    regular_graph.add_nodes_from(nodes)

    n = len(nodes)

    for i, node in enumerate(nodes):
        next_one = nodes[(i+1)%n]
        jump_node = nodes[(i+2)%n]
        regular_graph.add_edge(node, next_one)
        regular_graph.add_edge(node, jump_node)

    if draw:
        fig, ax = plt.subplots(figsize=(10,10))
        nx.draw(regular_graph, pos=nx.circular_layout(regular_graph), with_labels=False)
    return regular_graph

In [15]:
graph1_equivalent_random = get_equivalent_random_graph_preserve_nodes(graph1, False)
graph2_equivalent_random = get_equivalent_random_graph_preserve_nodes(graph2, False)
graph3_equivalent_random = get_equivalent_random_graph_preserve_nodes(graph3, False)

graph1_equivalent_regular = get_equivalent_regular_graph_preserve_nodes(graph1, False)
graph2_equivalent_regular = get_equivalent_regular_graph_preserve_nodes(graph2, False)
graph3_equivalent_regular = get_equivalent_regular_graph_preserve_nodes(graph3, False)

n: 739
 p:0.04961659900766802
n: 1620
 p:0.005171611801218555
n: 11387
 p:0.00038089543160109467


In [16]:
print_graph_statistics(graph1_equivalent_random)
print('\n')
print_statistics_for_largest_component(graph1_equivalent_random)
print('\n'*4)
print_statistics_for_largest_component(graph1_equivalent_regular, True)

Number of nodes: 739
Number of edges: 13583
Number of connected components: 1
Average degree: 36.760487144790254
Clustering coefficient: 0.049744658240079634
Max degree: 59
Nodes with max degree:['Gstupp']
Number of Isolated nodes:  0
Graph diameter: 3


Number of nodes: 739
Number of edges: 13583
Average path length: 2.1018588805644485
Number of connected components: 1
Average degree: 36.760487144790254
Clustering coefficient: 0.049744658240079634
Number of Isolated Nodes:  0
Max degree: 59
Nodes with max degree:['Gstupp']
Graph diameter: 3





Number of nodes: 739
Number of edges: 1478
Average path length: 92.75067750677506
Number of connected components: 1
Average degree: 4.0
Clustering coefficient: 0.5
Number of Isolated Nodes:  0
Max degree: 4
Amount of nodes with max degree:739
Graph diameter: 185


In [None]:
print_graph_statistics(graph2_equivalent_random)
print('\n')
print_statistics_for_largest_component(graph2_equivalent_random)
print('\n'*4)
print_statistics_for_largest_component(graph2_equivalent_regular, True)

In [None]:
print_graph_statistics(graph3_equivalent_random)
print('\n')
print_statistics_for_largest_component(graph3_equivalent_random)
print('\n'*4)
print_statistics_for_largest_component(graph3_equivalent_regular, True)

In [None]:
get_degree_count_distribution(graph1_equivalent_random)

In [None]:
get_degree_count_distribution(graph1_equivalent_regular)

In [None]:
get_degree_count_distribution(graph2_equivalent_random)

In [None]:
get_degree_count_distribution(graph2_equivalent_regular)

In [None]:
get_degree_count_distribution(graph3_equivalent_random)

In [None]:
get_degree_count_distribution(graph3_equivalent_regular)

In [None]:

def print_node_level_comparison(graph:nx.Graph, equivalent_random:nx.Graph, equivalent_regular:nx.Graph):

    show_node_level_descriptors_degrees([graph, equivalent_random, equivalent_regular], ['graph', 'random', 'regular'])
    print('\n'*4)
    show_node_level_descriptors_clustering_centrality([graph, equivalent_random, equivalent_regular], ['graph', 'random', 'regular'])
    print('\n'*4)
    show_node_level_descriptors_closeness_centrality([graph, equivalent_random, equivalent_regular], ['graph', 'random', 'regular'])


In [None]:
print_node_level_comparison(graph1, graph1_equivalent_random, graph1_equivalent_regular)

In [None]:
print_node_level_comparison(graph2, graph2_equivalent_random, graph2_equivalent_regular)

In [None]:
print_node_level_comparison(graph3, graph3_equivalent_random, graph3_equivalent_regular)

In [None]:
def calculate_edge_edit_distance_approximation(graph1:nx.Graph, graph2:nx.Graph):
    graph1_edges = set(graph1.edges())
    graph2_edges = set(graph2.edges())


    unique_edges_to_graph1 = graph1_edges - graph2_edges
    unique_edges_to_graph2 = graph2_edges - graph1_edges

    edit_distance = len(unique_edges_to_graph1) + len(unique_edges_to_graph2)
    print(f'Unique edges to graph1: {len(unique_edges_to_graph1)}/{len(graph1_edges)}\nUnique edges to graph2: {len(unique_edges_to_graph2)}/{len(graph2_edges)}\nApproximation of GED: {edit_distance}')

In [None]:
calculate_edge_edit_distance_approximation(graph1, graph1_equivalent_random)
print('\n\n\n\n')
calculate_edge_edit_distance_approximation(graph1, graph1_equivalent_regular)

In [None]:
calculate_edge_edit_distance_approximation(graph2, graph2_equivalent_random)
print('\n\n\n\n')
calculate_edge_edit_distance_approximation(graph2, graph2_equivalent_regular)

In [None]:
calculate_edge_edit_distance_approximation(graph3, graph3_equivalent_random)
print('\n\n\n\n')
calculate_edge_edit_distance_approximation(graph3, graph3_equivalent_regular)

## v) Two editors are connected iff they have both contributed to any thread in the same page, but not necessarily to the same thread? ( I.e. we would have more connections in the network)

i.e. Redefine the graphs, rename them as net1, net2, and net3

Analyse the resulting graphs with the methods identified above
i.e.
- plot the graphs (bokeh and matplotlib.pyplot)
- graph statistics
- largest component of the graphs statistics
- Shortest paths using Dijkstra and Bellman Ford
- Where is it on the random <-> small world <-> regular continuum
    - statistics (as outlined above)
    - degree distributions for the three graphs
    - Node level comparisons (Degrees, Clustering Centrality, Closeness Centrality)
    - Graph Edit Distance

In [None]:
def create_graph_connected_by_thread_in_same_page(network_data:str):
    net = pd.read_csv(network_data)

    graph = nx.Graph()

    for page, dataframe in net.groupby('page_name'):
        nodes = dataframe['username'].unique()
        for i in range(len(nodes)):
            for j in range (i + 1, len(nodes)):
                graph.add_edge(nodes[i], nodes[j])

    return graph



In [None]:
net1 = create_graph_connected_by_thread_in_same_page('datasets/PROPERTIES_FOR_DELETION_SML.csv')
net2 = create_graph_connected_by_thread_in_same_page("datasets/WIKIPROJECTS_MED.csv")
net3 = create_graph_connected_by_thread_in_same_page("datasets/USERS_LRG.csv")

### Plot the graphs

In [None]:
nx.draw(net1, with_labels = False)
plt.show()

In [None]:
nx.draw(net2, with_labels = False)
plt.show()

In [None]:
nx.draw(net3, with_labels = False)
plt.show()

In [None]:
bokeh_plot_simple(net1, 'Net 1', 4)

In [None]:
bokeh_plot_simple(net2, 'Net 2', 1)

In [None]:
bokeh_plot_simple(net3, 'Net 3', 2)

### Graph Statistics

In [None]:
print_graph_statistics(net1)

In [None]:
print_graph_statistics(net2)

In [None]:
print_graph_statistics(net3)

### Largest Component Graph Statistics

In [None]:
print_statistics_for_largest_component(net1)

In [None]:
print_statistics_for_largest_component(net2)

In [None]:
print_statistics_for_largest_component(net3)

In [None]:
get_kevin_bacon_node_for_largest_component(net1)

In [None]:
get_kevin_bacon_node_for_largest_component(net2)

In [None]:
get_kevin_bacon_node_for_largest_component(net3)

### Shortest Paths


In [None]:

net1_largest_component = net1.subgraph(max(nx.connected_components(net1), key=len))

net2_largest_component = net2.subgraph(max(nx.connected_components(net2), key=len))

net3_largest_component = net3.subgraph(max(nx.connected_components(net3), key=len))

In [None]:
print(f'Net 1: {check_diam_is_in_component(net1, net1_largest_component)}')
print(f'Net 2: {check_diam_is_in_component(net2, net2_largest_component)}')
print(f'Net 3: {check_diam_is_in_component(net3, net3_largest_component)}')


In [None]:
net1_farthest_nodes  = get_farthest_nodes_2_sweep(net1_largest_component)

In [None]:
net2_farthest_nodes  = get_farthest_nodes_2_sweep(net2_largest_component)

In [None]:
net3_farthest_nodes  = get_farthest_nodes_2_sweep(net3_largest_component)

In [None]:
get_shortest_path_largest_component_Dijkstra(net1_largest_component, net1_farthest_nodes)

In [None]:
get_shortest_path_largest_component_Dijkstra(net2_largest_component, net2_farthest_nodes)

In [None]:
get_shortest_path_largest_component_Dijkstra(net3_largest_component, net3_farthest_nodes)

In [None]:
get_shortest_path_largest_component_BF(net1_largest_component, net1_farthest_nodes)

In [None]:
get_shortest_path_largest_component_BF(net2_largest_component, net2_farthest_nodes)

In [None]:
get_shortest_path_largest_component_BF(net3_largest_component, net3_farthest_nodes)

### where on the random <-> small world <-> regular continuum

In [None]:
net1_equivalent_random = get_equivalent_random_graph_preserve_nodes(net1, False)
net2_equivalent_random = get_equivalent_random_graph_preserve_nodes(net2, False)
net3_equivalent_random = get_equivalent_random_graph_preserve_nodes(net3, False)

net1_equivalent_regular = get_equivalent_regular_graph_preserve_nodes(net1, False)
net2_equivalent_regular = get_equivalent_regular_graph_preserve_nodes(net2, False)
net3_equivalent_regular = get_equivalent_regular_graph_preserve_nodes(net3, False)

#### Graph Statistics (For Comparison)

In [None]:
print_graph_statistics(net1_equivalent_random)
print('\n')
print_statistics_for_largest_component(net1_equivalent_random)
print('\n'*4)
print_statistics_for_largest_component(net1_equivalent_regular, True)

In [None]:
print_graph_statistics(net2_equivalent_random)
print('\n')
print_statistics_for_largest_component(net2_equivalent_random)
print('\n'*4)
print_statistics_for_largest_component(net2_equivalent_regular, True)

In [None]:
print_graph_statistics(net3_equivalent_random)
print('\n')
print_statistics_for_largest_component(net3_equivalent_random)
print('\n'*4)
print_statistics_for_largest_component(net3_equivalent_regular, True)

#### Degree Count Distributions

In [None]:
get_degree_count_distribution(net1)

In [None]:
get_degree_count_distribution(net1_equivalent_random)

In [None]:
get_degree_count_distribution(net1_equivalent_regular)

In [None]:
get_degree_count_distribution(net2)

In [None]:
get_degree_count_distribution(net2_equivalent_random)

In [None]:
get_degree_count_distribution(net2_equivalent_regular)

In [None]:
get_degree_count_distribution(net3)

In [None]:
get_degree_count_distribution(net3_equivalent_random)

In [None]:
get_degree_count_distribution(net3_equivalent_regular)

#### Node Level Comparisons

In [None]:
print_node_level_comparison(net1, net1_equivalent_random, net1_equivalent_regular)

In [None]:
print_node_level_comparison(net2, net2_equivalent_random, net2_equivalent_regular)

In [None]:
print_node_level_comparison(net3, net3_equivalent_random, net3_equivalent_regular)

#### Graph Edit Distance

In [None]:
calculate_edge_edit_distance_approximation(net1, net1_equivalent_random)
print('\n\n\n\n')
calculate_edge_edit_distance_approximation(net1, net1_equivalent_regular)

In [None]:
calculate_edge_edit_distance_approximation(net2, net2_equivalent_random)
print('\n\n\n\n')
calculate_edge_edit_distance_approximation(net2, net2_equivalent_regular)

In [None]:
calculate_edge_edit_distance_approximation(net3, net3_equivalent_random)
print('\n\n\n\n')
calculate_edge_edit_distance_approximation(net3, net3_equivalent_regular)