In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [34]:
import networkx as nx
import math
import json
import matplotlib.pyplot as plt
import time


class BaselineCoverTree:
    def __init__(self, nodes, distance_func):
        """
        Initialize the baseline cover tree.
        :param nodes: List of nodes in the graph or metric space.
        :param distance_func: A function that computes distances between nodes.
        """
        self.nodes = nodes
        self.distance_func = distance_func
        self.parent_child_map = {}  # To track parent-child relationships

    def build_cover_tree(self, nodes, radius_step=1, start_node=None):
        """
        Build the hierarchical cover tree using cluster-based optimization.
        :param nodes: List of nodes in the graph.
        :param radius_step: Step size to reduce the radius at each level (default: 1).
        :param start_node: The starting node for center selection.
        :return: A dictionary representing levels of the tree.
        """
        levels = {}  # Dictionary to store levels of the tree
        radius = max(self.distance_func(u, v) for u in nodes for v in nodes if u != v)  # Initial radius
        level = 0

        while radius >= 1:  # Stop at the smallest meaningful radius
            uncovered_nodes = set(nodes)
            centers = []

            # Start with the specified start_node if provided
            if start_node in uncovered_nodes:
                centers.append(start_node)
                uncovered_nodes.remove(start_node)
                # Remove all nodes within the current radius of this start_node
                uncovered_nodes -= {n for n in uncovered_nodes if self.distance_func(start_node, n) <= radius}

            # Cluster nodes based on the radius
            while uncovered_nodes:
                # Select any uncovered node as a new center
                center = uncovered_nodes.pop()
                centers.append(center)

                # Remove all nodes within the current radius of this center
                uncovered_nodes -= {n for n in uncovered_nodes if self.distance_func(center, n) <= radius}

            levels[level] = centers

            # Establish parent-child connections
            if level > 0:  # Skip the first level (no parents at level -1)
                for child in centers:
                    # Find all potential parents from the previous level
                    parents = [
                        parent for parent in levels[level - 1]
                        if self.distance_func(parent, child) <= radius * 2
                    ]
                    self.parent_child_map[child] = parents

            radius /= 2  # Halve the radius
            level += 1

        return levels

    def get_edges(self):
        """
        Retrieve edges to plot the cover tree.
        Each parent node connects to its children at the next level.
        """
        edges = []
        for child, parents in self.parent_child_map.items():
            for parent in parents:
                edges.append((parent, child))
        return edges


def compute_doubling_dimension(graph):
    """
    Compute the doubling dimension of a graph using an optimized cover tree.
    :param graph: The input graph (NetworkX graph).
    :return: Doubling dimension for all nodes as starting points.
    """
    # Shortest-path metric
    lengths = dict(nx.all_pairs_dijkstra_path_length(graph))

    def graph_distance(u, v):
        return lengths[u][v]

    # Nodes of the graph
    nodes = list(graph.nodes)
    best_doubling_dimension = float("inf")
    best_start_node = None

    for start_node in nodes:
        cover_tree = BaselineCoverTree(nodes, graph_distance)
        cover_tree.tree = cover_tree.build_cover_tree(nodes, start_node=start_node)

        # Calculate doubling constant and doubling dimension
        doubling_constant = max(
            sum(
                1
                for child in cover_tree.tree.get(level + 1, [])
                if graph_distance(parent, child) <= 2 ** level
            )
            for level, parents in cover_tree.tree.items()
            for parent in parents
        )
        doubling_dimension = math.log2(doubling_constant)

        if doubling_dimension < best_doubling_dimension:
            best_doubling_dimension = doubling_dimension
            best_start_node = start_node

    return best_start_node, best_doubling_dimension

In [35]:
def read_graph_from_json(filename):
    G = nx.Graph()
    with open(filename, "r") as f:
        data = json.load(f)

    inList = data[0]['inList']

    # Iterate through inList to create edges
    for target_node, sources in enumerate(inList):
        for source_node in sources:
            G.add_edge(source_node, target_node, weight=1)

    return G

# File containing graph data for Twitter Dataset
json_file = "drive/MyDrive/Colab Notebooks/congress_network_data.json"

# Build the graph
G = read_graph_from_json(json_file)

# Uncomment to Use Synthetic Data
# edges = [
#     ("A", "B", 1),
#     ("A", "C", 1),
#     ("A", "D", 1),
#     ("B", "E", 1),
#     ("B", "F", 1),
#     ("B", "G", 1),
#     ("C", "H", 1),
#     ("C", "I", 1),
#     ("D", "J", 1),
#     ("D", "K", 1),
#     ("E", "L", 1),
#     ("E", "M", 1),
#     ("F", "N", 1),
#     ("F", "O", 1),
#     ("G", "P", 1),
#     ("G", "Q", 1),
#     ("H", "R", 1),
#     ("H", "S", 1),
#     ("I", "T", 1),
#     ("I", "U", 1),
#     ("J", "V", 1),
#     ("J", "W", 1),
#     ("K", "X", 1),
#     ("K", "Y", 1),
#     ("L", "Z", 1),
#     ("M", "N", 1),
#     ("M", "P", 1),
#     ("N", "Q", 1),
#     ("O", "R", 1),
#     ("O", "T", 1),
#     ("P", "U", 1),
#     ("Q", "V", 1),
#     ("R", "W", 1),
#     ("S", "X", 1),
#     ("T", "Y", 1),
#     ("U", "Z", 1),
#     ("V", "M", 1),
#     ("W", "O", 1),
#     ("X", "P", 1),
#     ("Y", "R", 1),
#     ("Z", "T", 1)
# ]

# Build the graph for Synthetic Data
# G = nx.Graph()
# G.add_weighted_edges_from(edges)

In [37]:
start_time = time.time()

# Compute the doubling dimension using the optimized cover tree
start_node, best_dimension = compute_doubling_dimension(G)

end_time = time.time()


print(f"Best starting node: {start_node}")
print(f"Best doubling dimension: {best_dimension}")


execution_time = end_time - start_time
print(f"\nExecution Time: {execution_time:.6f} seconds")

Best starting node: 28
Best doubling dimension: 5.426264754702098

Execution Time: 42.965515 seconds
