In [3]:
import pandas as pd
import seaborn as sns
import networkx as nx
import numpy as np
import re
import matplotlib.pyplot as plt
import itertools
import sklearn.datasets
from sklearn.metrics.pairwise import euclidean_distances
import math
import random

In [4]:
def createRandomNetwork(n, p):
    #indices = list(range(0,n))
    edges = []
    for i in range(0, n):
        for j in range(i + 1, n):
            r = random.uniform(0,1)
            if r <= p: # Random chance
                # Now check for conditions
                #if j in edges.keys() might not be neccessary
                edges.append((i,j))
    return edges

In [5]:
n = 550
indices = list(range(0,n))
G1 = nx.Graph()
G1.add_nodes_from(indices)
edges = createRandomNetwork(n, 0.001)
G1.add_edges_from(edges)

In [6]:
edges

[(2, 242),
 (3, 181),
 (5, 25),
 (5, 420),
 (5, 476),
 (7, 432),
 (11, 77),
 (11, 173),
 (12, 201),
 (14, 16),
 (16, 463),
 (17, 363),
 (18, 440),
 (20, 537),
 (23, 246),
 (24, 83),
 (24, 148),
 (24, 506),
 (29, 69),
 (30, 410),
 (31, 473),
 (42, 158),
 (44, 256),
 (50, 539),
 (52, 57),
 (55, 427),
 (61, 85),
 (61, 463),
 (61, 522),
 (68, 383),
 (69, 373),
 (70, 495),
 (75, 201),
 (81, 318),
 (83, 196),
 (83, 221),
 (84, 466),
 (85, 398),
 (89, 266),
 (90, 347),
 (90, 368),
 (93, 395),
 (96, 173),
 (96, 224),
 (98, 393),
 (104, 380),
 (108, 508),
 (115, 405),
 (115, 501),
 (116, 423),
 (117, 265),
 (119, 193),
 (120, 457),
 (120, 520),
 (127, 529),
 (134, 421),
 (137, 387),
 (138, 266),
 (144, 481),
 (147, 350),
 (149, 274),
 (150, 165),
 (151, 226),
 (152, 460),
 (153, 277),
 (155, 448),
 (156, 307),
 (160, 502),
 (161, 468),
 (162, 237),
 (164, 382),
 (168, 274),
 (171, 417),
 (175, 397),
 (178, 476),
 (179, 485),
 (182, 331),
 (183, 297),
 (188, 238),
 (191, 494),
 (194, 344),
 (195

In [7]:
indices = list(range(0,n))
G2 = nx.Graph()
G2.add_nodes_from(indices)
edges = createRandomNetwork(n, 0.0059)
G2.add_edges_from(edges)

In [8]:
indices = list(range(0,n))
G3 = nx.Graph()
G3.add_nodes_from(indices)
edges = createRandomNetwork(n, 0.01)
G3.add_edges_from(edges)

In [9]:
df1 = nx.to_pandas_edgelist(G1)
df1.to_csv("Random1.csv")

In [10]:
df2 = nx.to_pandas_edgelist(G2)
df2.to_csv("Random2.csv")

In [11]:
df3 = nx.to_pandas_edgelist(G3)
df3.to_csv("Random3.csv")

In [12]:
nodes = list(range(0,n))
dfNodes = pd.DataFrame(data = nodes, columns=["Node"])
dfNodes.to_csv("RandomNodes.csv")

In [18]:
def save_graph_as_table(list_of_graphs):
    """
    Create a table with various graph properties for each graph in the list.
    """
    values = []
    for (G, p) in list_of_graphs:
        # Number of nodes and edges
        num_nodes = G.number_of_nodes()
        num_edges = G.number_of_edges()
        
        # Average degree
        avg_degree = 2 * num_edges / num_nodes if num_nodes > 0 else 0
        
        # Density
        density = nx.density(G)
        
        # Connected components
        components = [len(c) for c in nx.connected_components(G)]
        num_components = len(components)
        largest_component_size = max(components) if components else 0
        
        # Diameter of the largest component (if applicable)
        if num_components > 1:
            largest_component = max(nx.connected_components(G), key=len)
            subgraph = G.subgraph(largest_component)
            diameter = nx.diameter(subgraph) if nx.is_connected(subgraph) else float('inf')
        else:
            diameter = nx.diameter(G) if nx.is_connected(G) else float('inf')
        
        # Average path length (computed per component)
        try:
            avg_path_length = nx.average_shortest_path_length(G)
        except nx.NetworkXError:
            avg_path_length = float('inf')
        
        # Clustering coefficient
        clustering_coefficient = nx.average_clustering(G)
        
        # Degree distribution (list of degrees of all nodes)
        degree_distribution = [d for _, d in G.degree()]
        
        # Community structure
        communities = list(nx.community.greedy_modularity_communities(G))
        modularity = nx.algorithms.community.modularity(G, communities)
        
        # Centralities
        degree_centrality = nx.degree_centrality(G)
        betweenness_centrality = nx.betweenness_centrality(G)
        
        # Adding all data to a dictionary for the table row
        values.append({
            #"Nodes": num_nodes,
            #"Edges": num_edges,
            "p": p,
            "Average Degree": avg_degree,
            "Density": density,
            "Components": num_components,
            "Component Sizes": components,
            "Largest Component Size": largest_component_size,
            "Diameter": diameter,
            "Average Path Length": avg_path_length,
            "Clustering Coefficient": clustering_coefficient,
            "Degree Distribution": degree_distribution,
            "Modularity": modularity,
            "Degree Centrality": degree_centrality,
            "Betweenness Centrality": betweenness_centrality
        })
    
    # Convert the list of dictionaries into a pandas DataFrame for tabular representation
    table = pd.DataFrame(values)
    display(table)
    
    table.to_csv("graph_metrics.csv", index=False)

In [19]:
save_graph_as_table([(G1, 0.001),(G2, 0.0059),(G3, 0.01)])

Unnamed: 0,p,Average Degree,Density,Components,Component Sizes,Largest Component Size,Diameter,Average Path Length,Clustering Coefficient,Degree Distribution,Modularity,Degree Centrality,Betweenness Centrality
0,0.001,0.509091,0.000927,410,"[1, 1, 3, 2, 1, 5, 1, 2, 1, 1, 1, 5, 3, 1, 7, ...",7,5,inf,0.0,"[0, 0, 1, 1, 0, 3, 0, 1, 0, 0, 0, 2, 1, 0, 1, ...",0.983367,"{0: 0.0, 1: 0.0, 2: 0.0018214936247723133, 3: ...","{0: 0.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0, 5: 3...."
1,0.0059,3.123636,0.00569,27,"[524, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...",524,13,inf,0.006128,"[1, 3, 3, 3, 6, 2, 2, 4, 3, 0, 0, 4, 3, 3, 5, ...",0.603175,"{0: 0.0018214936247723133, 1: 0.00546448087431...","{0: 0.0, 1: 0.003544015929124829, 2: 0.0100819..."
2,0.01,5.647273,0.010286,3,"[548, 1, 1]",548,7,inf,0.008605,"[1, 7, 5, 4, 8, 5, 6, 7, 6, 6, 9, 5, 5, 6, 5, ...",0.397378,"{0: 0.0018214936247723133, 1: 0.01275045537340...","{0: 0.0, 1: 0.006629498502852745, 2: 0.0021636..."


In [None]:
import networkx as nx
from networkx.algorithms.community import greedy_modularity_communities
import numpy as np

def getStats(G):
    # Example graph (replace with your own)

    # Calculate graph properties
    graph_properties = {}

    components = [len(c) for c in nx.connected_components(G)]
    num_components = len(components)
    largest_component_size = max(components) if components else 0
    # Počet komponent souvislosti (Number of connected components)
    graph_properties['num_components'] = len(components)

    # Distribuce velikosti komponent souvislosti (Size distribution of connected components)
    graph_properties['component_sizes'] = components

    # Velikost největší komponenty souvislosti (Size of the largest connected component)
    graph_properties['largest_component_size'] = largest_component_size

    # Průměr (Diameter) - per component
    if nx.is_connected(G):
        graph_properties['diameter'] = nx.diameter(G)
    else:
        graph_properties['diameters'] = {
            f"component_{i}": nx.diameter(G.subgraph(c))
            for i, c in enumerate(nx.connected_components(G))
        }

    # Průměrná vzdálenost (Average distance)
    if nx.is_connected(G):
        graph_properties['avg_distance'] = nx.average_shortest_path_length(G)
    else:
        graph_properties['avg_distances'] = {
            f"component_{i}": nx.average_shortest_path_length(G.subgraph(c))
            for i, c in enumerate(nx.connected_components(G))
        }

    # Shlukovací koeficient (Clustering coefficient)
    graph_properties['clustering_coefficient'] = nx.average_clustering(G)

    # Distribuce stupňů (Degree distribution)
    graph_properties['degree_distribution'] = [d for n, d in G.degree()]

    # Komunitní struktura (Community structure)
    communities = list(greedy_modularity_communities(G))
    graph_properties['num_communities'] = len(communities)
    graph_properties['community_sizes'] = [len(c) for c in communities]

    # Centrality measures
    graph_properties['centralities'] = {
        'degree_centrality': nx.degree_centrality(G),
        'betweenness_centrality': nx.betweenness_centrality(G),
        'closeness_centrality': nx.closeness_centrality(G),
    }

    # Display the dictionary
    for key, value in graph_properties.items():
        print(f"{key}: {value}")
    
    return graph_properties


In [28]:
stats = []
stats.append(getStats(G1))
stats.append(getStats(G2))
stats.append(getStats(G3))
dfStats = pd.DataFrame(stats)
display(dfStats)

NetworkXError: Graph is not connected.