# Experiement 1 - Parallel Processing using Dask alone

1. Graph Building

2. Querying

3. Subgraph Visualization

4. Updation, Deletion and Creation of Node Atrributes, Edge Attributes, Nodes and Edges

In [1]:
#!pip install dask[complete] graphviz

In [18]:
#!pip install plotly

Collecting plotly
  Downloading plotly-5.24.1-py3-none-any.whl.metadata (7.3 kB)
Collecting tenacity>=6.2.0 (from plotly)
  Downloading tenacity-9.0.0-py3-none-any.whl.metadata (1.2 kB)
Downloading plotly-5.24.1-py3-none-any.whl (19.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.1/19.1 MB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading tenacity-9.0.0-py3-none-any.whl (28 kB)
Installing collected packages: tenacity, plotly
Successfully installed plotly-5.24.1 tenacity-9.0.0


In [1]:
# import dask.dataframe as dd
import pandas as pd 

df = pd.read_csv('levels1mill.csv')


In [16]:
import networkx as nx
import pandas as pd
import multiprocessing as mp
from concurrent.futures import ProcessPoolExecutor
from typing import Dict, List, Set, Tuple
from collections import defaultdict
import numpy as np
from functools import partial

class LevelBasedGraph:
    def __init__(self):
        self.G = nx.DiGraph()
        self.level_index = defaultdict(set)  # Quick access to nodes by level
        self.node_level_map = {}  # Quick lookup for node levels
        self._lock = mp.Lock()
        
    def bulk_create_from_df(self, df: pd.DataFrame, num_workers: int = mp.cpu_count()):
        """Parallel graph construction using level-based partitioning"""
        # Group data by level
        level_groups = dict(tuple(df.groupby('levelno')))
        
        # Create level-specific subgraphs in parallel
        with ProcessPoolExecutor(max_workers=num_workers) as executor:
            futures = []
            for level, level_df in level_groups.items():
                futures.append(
                    executor.submit(self._create_level_subgraph, level_df)
                )
            
            # Combine results
            for future in futures:
                subgraph, level_nodes = future.result()
                with self._lock:
                    self.G.update(subgraph)
                    level = level_nodes[0][1]  # Get level from first node
                    self.level_index[level].update(n[0] for n in level_nodes)
                    self.node_level_map.update(dict(level_nodes))

    @staticmethod
    def _create_level_subgraph(df: pd.DataFrame) -> Tuple[nx.DiGraph, List[Tuple]]:
        """Create a subgraph for a specific level"""
        subG = nx.DiGraph()
        
        # Prepare node and edge data
        nodes_data = [(row['nodeid'], row['levelno']) for _, row in df.iterrows()]
        edges_data = [(row['nodeid'], row['foreignkey']) for _, row in df.iterrows()]
        
        # Add nodes with attributes
        node_attrs = {
            row['nodeid']: {
                'node_type': row['node_type'],
                'node_weight': row['node_weight'],
                'node_expiry': row['node_expiry']
            }
            for _, row in df.iterrows()
        }
        
        # Add edges with attributes
        edge_attrs = {
            (row['nodeid'], row['foreignkey']): {
                'edge_cost': row['edge_cost'],
                'edge_number': row['edge_number'],
                'time_stamp': row['time_stamp']
            }
            for _, row in df.iterrows()
        }
        
        subG.add_nodes_from(n[0] for n in nodes_data)
        nx.set_node_attributes(subG, node_attrs)
        subG.add_edges_from(edges_data)
        nx.set_edge_attributes(subG, edge_attrs)
        
        return subG, nodes_data

    def get_subgraph_by_node(self, node_id: str, levels_up: int = 1, levels_down: int = 1) -> nx.DiGraph:
        """Efficiently extract subgraph around a node using level-based traversal"""
        if node_id not in self.node_level_map:
            return nx.DiGraph()
            
        current_level = self.node_level_map[node_id]
        relevant_levels = range(
            max(1, current_level - levels_up),
            min(max(self.level_index.keys()) + 1, current_level + levels_down + 1)
        )
        
        # Get nodes in relevant levels that are connected to our target node
        nodes_to_check = {node_id}
        for level in relevant_levels:
            level_nodes = self.level_index[level]
            connected_nodes = set()
            
            for node in nodes_to_check:
                # Get predecessors and successors in the current level
                predecessors = set(self.G.predecessors(node)) & level_nodes
                successors = set(self.G.successors(node)) & level_nodes
                connected_nodes.update(predecessors | successors)
            
            nodes_to_check.update(connected_nodes)
        
        # Extract the subgraph
        return self.G.subgraph(nodes_to_check).copy()

    # def parallel_update_attributes(self, updates: List[Tuple[str, Dict]], attr_type: str = 'node'):
    #     """Parallel attribute updates for nodes or edges"""
    #     def update_chunk(chunk):
    #         with self._lock:
    #             if attr_type == 'node':
    #                 nx.set_node_attributes(self.G, dict(chunk))
    #             else:
    #                 nx.set_edge_attributes(self.G, dict(chunk))
        
    #     # Split updates into chunks for parallel processing
    #     chunk_size = len(updates) // mp.cpu_count()
    #     chunks = [updates[i:i + chunk_size] for i in range(0, len(updates), chunk_size)]
        
    #     with ProcessPoolExecutor() as executor:
    #         executor.map(update_chunk, chunks)

    # def delete_nodes_and_update(self, nodes_to_delete: Set[str]):
    #     """Efficiently delete nodes and update indexes"""
    #     with self._lock:
    #         # Update level index
    #         for level, nodes in self.level_index.items():
    #             self.level_index[level] = nodes - nodes_to_delete
            
    #         # Update node level map
    #         for node in nodes_to_delete:
    #             self.node_level_map.pop(node, None)
            
    #         # Remove from main graph
    #         self.G.remove_nodes_from(nodes_to_delete)

# Usage example
def create_and_query_graph(csv_file: str):
    # Read CSV
    df = pd.read_csv("levels1mill.csv")
    
    # Create graph
    graph = LevelBasedGraph()
    graph.bulk_create_from_df(df)
    
    # Get subgraph example
    subgraph = graph.get_subgraph_by_node('versyskiyo', levels_up=1, levels_down=2)
    print(subgraph)
    
    return graph, subgraph

# # Example parallel attribute update
# def update_node_attributes(graph: LevelBasedGraph, updates: List[Tuple[str, Dict]]):
#     graph.parallel_update_attributes(updates, attr_type='node')



In [3]:
import networkx as nx
import pandas as pd
import multiprocessing as mp
from concurrent.futures import ProcessPoolExecutor
from typing import Dict, List, Set, Tuple, Any
from collections import defaultdict, deque
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import plotly.graph_objects as go
import numpy as np
from dataclasses import dataclass
import time

@dataclass
class Message:
    source: str
    target: str
    data: Any

class PregelGraphProcessor:
    def __init__(self):
        self.G = nx.DiGraph()
        self.level_index = defaultdict(set)
        self.node_level_map = {}
        self.message_queues = defaultdict(deque)
        self.visualization_queue = mp.Queue()
        self._lock = mp.Lock()

    def bulk_create_from_df(self, df: pd.DataFrame, num_workers: int = mp.cpu_count()):
        """Parallel graph construction using level-based partitioning"""
        # Group data by level
        level_groups = dict(tuple(df.groupby('levelno')))
        
        # Create level-specific subgraphs in parallel
        with ProcessPoolExecutor(max_workers=num_workers) as executor:
            futures = []
            for level, level_df in level_groups.items():
                futures.append(
                    executor.submit(self._create_level_subgraph, level_df)
                )
            
            # Combine results
            for future in futures:
                subgraph, level_nodes = future.result()
                with self._lock:
                    self.G.update(subgraph)
                    level = level_nodes[0][1]  # Get level from first node
                    self.level_index[level].update(n[0] for n in level_nodes)
                    self.node_level_map.update(dict(level_nodes))
    @staticmethod
    def _create_level_subgraph(df: pd.DataFrame) -> Tuple[nx.DiGraph, List[Tuple]]:
        """Create a subgraph for a specific level"""
        subG = nx.DiGraph()
        
        # Prepare node and edge data
        nodes_data = [(row['nodeid'], row['levelno']) for _, row in df.iterrows()]
        edges_data = [(row['nodeid'], row['foreignkey']) for _, row in df.iterrows()]
        
        # Add nodes with attributes
        node_attrs = {
            row['nodeid']: {
                'node_type': row['node_type'],
                'node_weight': row['node_weight'],
                'node_expiry': row['node_expiry']
            }
            for _, row in df.iterrows()
        }
        
        # Add edges with attributes
        edge_attrs = {
            (row['nodeid'], row['foreignkey']): {
                'edge_cost': row['edge_cost'],
                'edge_number': row['edge_number'],
                'time_stamp': row['time_stamp']
            }
            for _, row in df.iterrows()
        }
        
        subG.add_nodes_from(n[0] for n in nodes_data)
        nx.set_node_attributes(subG, node_attrs)
        subG.add_edges_from(edges_data)
        nx.set_edge_attributes(subG, edge_attrs)
        
        return subG, nodes_data
    
    def get_subgraph_by_node(self, node_id: str, levels_up: int = 1, levels_down: int = 1) -> nx.DiGraph:
        """Efficiently extract subgraph around a node using level-based traversal"""
        if node_id not in self.node_level_map:
            return nx.DiGraph()
            
        current_level = self.node_level_map[node_id]
        relevant_levels = range(
            max(1, current_level - levels_up),
            min(max(self.level_index.keys()) + 1, current_level + levels_down + 1)
        )
        
        # Get nodes in relevant levels that are connected to our target node
        nodes_to_check = {node_id}
        for level in relevant_levels:
            level_nodes = self.level_index[level]
            connected_nodes = set()
            
            for node in nodes_to_check:
                # Get predecessors and successors in the current level
                predecessors = set(self.G.predecessors(node)) & level_nodes
                successors = set(self.G.successors(node)) & level_nodes
                connected_nodes.update(predecessors | successors)
            
            nodes_to_check.update(connected_nodes)
        
        # Extract the subgraph
        return self.G.subgraph(nodes_to_check).copy()

        
    def process_messages(self, vertex: str, messages: List[Message]) -> List[Message]:
        """Process messages for a vertex in Pregel style"""
        with self._lock:
            vertex_data = self.G.nodes[vertex]
            level = self.node_level_map[vertex]
            
            # Process incoming messages and generate outgoing messages
            outgoing_messages = []
            for msg in messages:
                if msg.data['type'] == 'update':
                    # Update vertex attributes
                    self.G.nodes[vertex].update(msg.data['attributes'])
                elif msg.data['type'] == 'delete':
                    # Mark for deletion
                    self.level_index[level].remove(vertex)
                    neighbors = list(self.G.predecessors(vertex)) + list(self.G.successors(vertex))
                    self.G.remove_node(vertex)
                    # Notify neighbors
                    for neighbor in neighbors:
                        outgoing_messages.append(Message(
                            source=vertex,
                            target=neighbor,
                            data={'type': 'neighbor_deleted'}
                        ))
            
            return outgoing_messages

    def visualize_subgraph(self, center_node: str, levels_up: int = 1, levels_down: int = 1) -> go.Figure:
        """Create interactive visualization of subgraph using Plotly"""
        subgraph = self.get_subgraph_by_node(center_node, levels_up, levels_down)
        
        # Create layout using Kamada-Kawai algorithm for better visualization
        pos = nx.kamada_kawai_layout(subgraph)
        
        # Prepare node traces with different colors for different levels
        node_traces = []
        edge_traces = []
        
        # Create color map for levels
        unique_levels = set(self.node_level_map[n] for n in subgraph.nodes())
        color_map = plt.cm.get_cmap('viridis')(np.linspace(0, 1, len(unique_levels)))
        level_colors = dict(zip(unique_levels, color_map))
        
        # Create nodes
        for node in subgraph.nodes():
            level = self.node_level_map[node]
            color = f'rgb({",".join(map(str, level_colors[level][:3]*255))})'
            
            node_trace = go.Scatter(
                x=[pos[node][0]],
                y=[pos[node][1]],
                mode='markers+text',
                name=f'Level {level}',
                marker=dict(
                    size=20,
                    color=color,
                    line=dict(width=2)
                ),
                text=[node],
                hoverinfo='text',
                showlegend=False
            )
            node_traces.append(node_trace)
        
        # Create edges
        edge_x = []
        edge_y = []
        for edge in subgraph.edges():
            x0, y0 = pos[edge[0]]
            x1, y1 = pos[edge[1]]
            edge_x.extend([x0, x1, None])
            edge_y.extend([y0, y1, None])
            
        edge_trace = go.Scatter(
            x=edge_x,
            y=edge_y,
            mode='lines',
            line=dict(width=1, color='#888'),
            hoverinfo='none',
            showlegend=False
        )
        
        # Create figure
        fig = go.Figure(data=[edge_trace] + node_traces,
                     layout=go.Layout(
                         showlegend=False,
                         hovermode='closest',
                         margin=dict(b=20,l=5,r=5,t=40),
                         xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                         yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                         title=f'Subgraph centered on {center_node}'
                     ))
        
        return fig

    def run_parallel_operations(self, operations: List[Dict]):
        """Run parallel operations using Pregel-like processing"""
        with ProcessPoolExecutor() as executor:
            futures = []
            for op in operations:
                if op['type'] == 'update':
                    futures.append(executor.submit(
                        self.send_message,
                        op['target'],
                        Message(source='system', target=op['target'], data=op)
                    ))
                elif op['type'] == 'delete':
                    futures.append(executor.submit(
                        self.process_deletion,
                        op['target']
                    ))
            
            # Wait for all operations to complete
            for future in futures:
                future.result()

    def process_deletion(self, node: str):
        """Process node deletion with cascading updates"""
        with self._lock:
            if node in self.G:
                level = self.node_level_map[node]
                # Get affected subgraph
                subgraph = self.extract_subgraph(node, levels_up=1, levels_down=1)
                # Remove node and update indexes
                self.level_index[level].remove(node)
                del self.node_level_map[node]
                self.G.remove_node(node)
                # Visualize affected subgraph
                self.visualize_subgraph(list(subgraph.nodes())[0] if subgraph.nodes() else None)

def demo_usage():
    """Demonstrate usage of the enhanced graph processor"""
    # Initialize processor
    processor = PregelGraphProcessor()
    
    # Load data
    df = pd.read_csv('levels1mill.csv')
    
    # Create initial graph
    start_time = time.time()
    processor.bulk_create_from_df(df)
    print(f"Graph creation time: {time.time() - start_time:.2f} seconds")
    
    # Example: Visualize subgraph
    start_time = time.time()
    fig = processor.visualize_subgraph('versyskiyo')
    print(f"Visualization time: {time.time() - start_time:.2f} seconds")
    
    # Save or display the visualization
    fig.write_html("subgraph_visualization.html")
    
    # Example: Run parallel operations
    operations = [
        {'type': 'update', 'target': 'node1', 'attributes': {'weight': 1.5}},
        {'type': 'delete', 'target': 'node2'},
        # Add more operations as needed
    ]
    
    start_time = time.time()
    processor.run_parallel_operations(operations)
    print(f"Parallel operations time: {time.time() - start_time:.2f} seconds")

if __name__ == "__main__":
    demo_usage()

Graph creation time: 31.79 seconds


MemoryError: Unable to allocate 298. GiB for an array with shape (200009, 200009) and data type float64

In [5]:
import networkx as nx
import pandas as pd
import plotly.graph_objects as go
import numpy as np
from typing import Dict, Set, Optional
from collections import defaultdict
import time

class MemoryEfficientVisualizer:
    def __init__(self):
        self.G = nx.DiGraph()
        self.level_index = defaultdict(set)
        self.node_level_map = {}
        self.layout_cache = {}
        
    def extract_bounded_subgraph(self, center_node: str, max_nodes: int = 1000) -> nx.DiGraph:
        """Extract a bounded subgraph using BFS with node limit"""
        if center_node not in self.G:
            return nx.DiGraph()
            
        subgraph_nodes = {center_node}
        queue = [(center_node, 0)]
        current_level = self.node_level_map[center_node]
        level_range = 1
        
        while queue and len(subgraph_nodes) < max_nodes:
            node, depth = queue.pop(0)
            
            # Get neighbors within level range
            neighbors = set()
            for neighbor in self.G.predecessors(node):
                if self.node_level_map[neighbor] >= current_level - level_range:
                    neighbors.add(neighbor)
            for neighbor in self.G.successors(node):
                if self.node_level_map[neighbor] <= current_level + level_range:
                    neighbors.add(neighbor)
            
            # Add neighbors to subgraph
            for neighbor in neighbors:
                if neighbor not in subgraph_nodes and len(subgraph_nodes) < max_nodes:
                    subgraph_nodes.add(neighbor)
                    queue.append((neighbor, depth + 1))
        
        return self.G.subgraph(subgraph_nodes).copy()

    def calculate_efficient_layout(self, subgraph: nx.DiGraph) -> Dict:
        """Calculate layout using a memory-efficient approach"""
        # Use spring_layout with optimized parameters for large graphs
        pos = nx.spring_layout(
            subgraph,
            k=1/np.sqrt(len(subgraph)),  # Optimal distance between nodes
            iterations=50,  # Reduce iterations for speed
            seed=42  # For consistency
        )
        return pos

    def create_interactive_visualization(self, center_node: str, max_nodes: int = 1000) -> go.Figure:
        """Create memory-efficient interactive visualization"""
        start_time = time.time()
        
        # Extract bounded subgraph
        subgraph = self.extract_bounded_subgraph(center_node, max_nodes)
        print(f"Subgraph extraction time: {time.time() - start_time:.2f}s")
        print(f"Subgraph size: {len(subgraph)} nodes")
        
        # Calculate layout
        layout_time = time.time()
        pos = self.calculate_efficient_layout(subgraph)
        print(f"Layout calculation time: {time.time() - layout_time:.2f}s")
        
        # Prepare visualization data
        node_traces = []
        
        # Create color map for levels
        unique_levels = set(self.node_level_map[n] for n in subgraph.nodes())
        colors = plt.cm.get_cmap('viridis')(np.linspace(0, 1, len(unique_levels)))
        level_colors = dict(zip(unique_levels, colors))
        
        # Create edges (batch processing)
        edge_x = []
        edge_y = []
        for edge in subgraph.edges():
            x0, y0 = pos[edge[0]]
            x1, y1 = pos[edge[1]]
            edge_x.extend([x0, x1, None])
            edge_y.extend([y0, y1, None])
        
        edge_trace = go.Scatter(
            x=edge_x,
            y=edge_y,
            mode='lines',
            line=dict(width=0.5, color='#888'),
            hoverinfo='none'
        )
        
        # Create nodes (batch processing)
        for level in unique_levels:
            level_nodes = [n for n in subgraph.nodes() if self.node_level_map[n] == level]
            if not level_nodes:
                continue
                
            node_x = [pos[node][0] for node in level_nodes]
            node_y = [pos[node][1] for node in level_nodes]
            
            node_trace = go.Scatter(
                x=node_x,
                y=node_y,
                mode='markers+text',
                name=f'Level {level}',
                marker=dict(
                    size=10,
                    color=f'rgb({",".join(map(str, level_colors[level][:3]*255))})',
                    line=dict(width=1)
                ),
                text=level_nodes,
                textposition="top center",
                hoverinfo='text'
            )
            node_traces.append(node_trace)
        
        # Create figure with minimal layout options
        fig = go.Figure(
            data=[edge_trace] + node_traces,
            layout=go.Layout(
                showlegend=False,
                margin=dict(b=20,l=5,r=5,t=40),
                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                title=f'Subgraph centered on {center_node} (showing {len(subgraph)} nodes)'
            )
        )
        
        print(f"Total visualization time: {time.time() - start_time:.2f}s")
        return fig

    def bulk_create_from_df(self, df: pd.DataFrame):
        """Efficiently create graph from DataFrame"""
        # Create nodes and edges in bulk
        nodes = df['nodeid'].unique()
        edges = df[['nodeid', 'foreignkey']].values
        
        # Add nodes and edges to graph
        self.G.add_nodes_from(nodes)
        self.G.add_edges_from(edges)
        
        # Build level index
        for _, row in df.iterrows():
            node_id = row['nodeid']
            level = row['levelno']
            self.level_index[level].add(node_id)
            self.node_level_map[node_id] = level

def demo_usage():
    """Demonstrate usage with memory monitoring"""
    import psutil
    process = psutil.Process()
    
    # Initialize visualizer
    visualizer = MemoryEfficientVisualizer()
    
    # Load data
    print(f"Initial memory usage: {process.memory_info().rss / 1024 / 1024:.2f} MB")
    df = pd.read_csv('levels1mill.csv')
    
    # Create graph
    start_time = time.time()
    visualizer.bulk_create_from_df(df)
    print(f"Graph creation time: {time.time() - start_time:.2f}s")
    print(f"Memory usage after graph creation: {process.memory_info().rss / 1024 / 1024:.2f} MB")
    
    # Create visualization
    start_time = time.time()
    fig = visualizer.create_interactive_visualization('versyskiyo', max_nodes=1000)
    print(f"Visualization time: {time.time() - start_time:.2f}s")
    print(f"Final memory usage: {process.memory_info().rss / 1024 / 1024:.2f} MB")
    
    # Save visualization
    fig.write_html("memory_efficient_visualization.html")

if __name__ == "__main__":
    demo_usage()

Initial memory usage: 7104.22 MB
Graph creation time: 37.44s
Memory usage after graph creation: 2471.36 MB
Subgraph extraction time: 0.22s
Subgraph size: 1000 nodes
Layout calculation time: 2.84s
Total visualization time: 3.12s
Visualization time: 3.12s
Final memory usage: 2487.48 MB


  colors = plt.cm.get_cmap('viridis')(np.linspace(0, 1, len(unique_levels)))


In [9]:
import networkx as nx
import pandas as pd
import plotly.graph_objects as go
import numpy as np
from typing import Dict, Set, Optional
from collections import defaultdict
import time
import json

class AttributeVisualizer:
    def __init__(self):
        self.G = nx.DiGraph()
        self.level_index = defaultdict(set)
        self.node_level_map = {}
        
    def bulk_create_from_df(self, df: pd.DataFrame):
        """Create graph with all attributes from DataFrame"""
        # Create nodes with attributes
        for _, row in df.iterrows():
            node_id = row['nodeid']
            self.G.add_node(
                node_id,
                level=row['levelno'],
                node_type=row['node_type'],
                node_weight=row['node_weight'],
                node_expiry=row['node_expiry']
            )
            self.level_index[row['levelno']].add(node_id)
            self.node_level_map[node_id] = row['levelno']
            
            # Add edge with attributes if foreignkey exists
            if pd.notna(row['foreignkey']):
                self.G.add_edge(
                    node_id,
                    row['foreignkey'],
                    edge_cost=row['edge_cost'],
                    edge_number=row['edge_number'],
                    time_stamp=row['time_stamp']
                )

    def create_interactive_visualization(self, center_node: str, max_nodes: int = 1000) -> go.Figure:
        """Create visualization with detailed hover information"""
        start_time = time.time()
        
        # Extract bounded subgraph
        subgraph = self.extract_bounded_subgraph(center_node, max_nodes)
        print(f"Subgraph extraction time: {time.time() - start_time:.2f}s")
        
        # Calculate layout
        pos = nx.spring_layout(
            subgraph,
            k=1/np.sqrt(len(subgraph)),
            iterations=50,
            seed=42
        )
        
        # Prepare node traces with hover information
        node_traces = []
        unique_levels = set(self.node_level_map[n] for n in subgraph.nodes())
        colors = plt.cm.get_cmap('viridis')(np.linspace(0, 1, len(unique_levels)))
        level_colors = dict(zip(unique_levels, colors))
        
        # Create node traces by level with hover text
        for level in unique_levels:
            level_nodes = [n for n in subgraph.nodes() if self.node_level_map[n] == level]
            if not level_nodes:
                continue
                
            node_x = []
            node_y = []
            hover_texts = []
            
            for node in level_nodes:
                node_x.append(pos[node][0])
                node_y.append(pos[node][1])
                
                # Create detailed hover text with all node attributes
                attrs = subgraph.nodes[node]
                hover_text = f"""
                Node ID: {node}
                Level: {attrs.get('level', 'N/A')}
                Type: {attrs.get('node_type', 'N/A')}
                Weight: {attrs.get('node_weight', 'N/A')}
                Expiry: {attrs.get('node_expiry', 'N/A')}
                """
                hover_texts.append(hover_text)
            
            node_trace = go.Scatter(
                x=node_x,
                y=node_y,
                mode='markers+text',
                name=f'Level {level}',
                marker=dict(
                    size=15,
                    color=f'rgb({",".join(map(str, level_colors[level][:3]*255))})',
                    line=dict(width=1)
                ),
                text=[n[:10] + '...' if len(n) > 10 else n for n in level_nodes],
                textposition="top center",
                hovertext=hover_texts,
                hoverinfo='text'
            )
            node_traces.append(node_trace)
        
        # Create edge trace with hover information
        edge_x = []
        edge_y = []
        edge_hover_texts = []
        
        for edge in subgraph.edges():
            x0, y0 = pos[edge[0]]
            x1, y1 = pos[edge[1]]
            edge_x.extend([x0, x1, None])
            edge_y.extend([y0, y1, None])
            
            # Create hover text for edge with all edge attributes
            edge_attrs = subgraph.edges[edge]
            hover_text = f"""
            From: {edge[0]}
            To: {edge[1]}
            Cost: {edge_attrs.get('edge_cost', 'N/A')}
            Number: {edge_attrs.get('edge_number', 'N/A')}
            Timestamp: {edge_attrs.get('time_stamp', 'N/A')}
            """
            edge_hover_texts.extend([hover_text, hover_text, None])
        
        edge_trace = go.Scatter(
            x=edge_x,
            y=edge_y,
            mode='lines',
            line=dict(width=1, color='#888'),
            hovertext=edge_hover_texts,
            hoverinfo='text'
        )
        
        # Create figure with custom hover layout
        fig = go.Figure(
            data=[edge_trace] + node_traces,
            layout=go.Layout(
                title=f'Subgraph centered on {center_node} (showing {len(subgraph)} nodes)',
                showlegend=True,
                hovermode='closest',
                margin=dict(b=20, l=5, r=5, t=40),
                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                # Customize hover label style
                hoverlabel=dict(
                    bgcolor="white",
                    font_size=12,
                    font_family="Arial"
                )
            )
        )
        
        print(f"Total visualization time: {time.time() - start_time:.2f}s")
        return fig

    def extract_bounded_subgraph(self, center_node: str, max_nodes: int = 1000) -> nx.DiGraph:
        """Extract bounded subgraph with all attributes"""
        if center_node not in self.G:
            return nx.DiGraph()
            
        subgraph_nodes = {center_node}
        queue = [(center_node, 0)]
        current_level = self.node_level_map[center_node]
        level_range = 1
        
        while queue and len(subgraph_nodes) < max_nodes:
            node, depth = queue.pop(0)
            
            for neighbor in self.G.predecessors(node):
                if (neighbor not in subgraph_nodes and 
                    len(subgraph_nodes) < max_nodes and 
                    self.node_level_map[neighbor] >= current_level - level_range):
                    subgraph_nodes.add(neighbor)
                    queue.append((neighbor, depth + 1))
                    
            for neighbor in self.G.successors(node):
                if (neighbor not in subgraph_nodes and 
                    len(subgraph_nodes) < max_nodes and 
                    self.node_level_map[neighbor] <= current_level + level_range):
                    subgraph_nodes.add(neighbor)
                    queue.append((neighbor, depth + 1))
        
        return self.G.subgraph(subgraph_nodes).copy()

def demo_usage():
    """Demonstrate usage with hover information"""
    # Initialize visualizer
    visualizer = AttributeVisualizer()
    
    # Load data
    df = pd.read_csv('levels1mill.csv')
    print(f"Loaded {len(df)} rows of data")
    
    # Create graph
    start_time = time.time()
    visualizer.bulk_create_from_df(df)
    print(f"Graph creation time: {time.time() - start_time:.2f}s")
    
    # Create and save visualization
    fig = visualizer.create_interactive_visualization('versyskiyo', max_nodes=10000)
    fig.write_html("interactive_visualization.html")
    print("Visualization saved to 'interactive_visualization.html'")

if __name__ == "__main__":
    demo_usage()

Loaded 1700013 rows of data
Graph creation time: 55.47s
Subgraph extraction time: 0.06s



The get_cmap function was deprecated in Matplotlib 3.7 and will be removed in 3.11. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap()`` or ``pyplot.get_cmap()`` instead.



Total visualization time: 163.33s
Visualization saved to 'interactive_visualization10k.html'
