In [1]:
import random
import string
import networkx as nx
from typing import Dict, Any, List, Tuple, Optional, Set
import json
from collections import defaultdict
import uuid


class NetworkGenerator:
    def __init__(self, schema_json: Dict[str, Any]):
        self.schema = schema_json
        self.node_instances: Dict[str, Dict[str, Dict[str, Any]]] = {}
        self.edge_instances: Dict[str, set] = defaultdict(set)
        self.timestamp = 0
        self.operations = []

        # Separate core and supplement nodes
        self.core_nodes = {
            node_type: info
            for node_type, info in schema_json["nodes"].items()
            if info["usage"] == "core"
        }
        self.supplement_nodes = {
            node_type: info
            for node_type, info in schema_json["nodes"].items()
            if info["usage"] == "supplement"
        }

    def generate_random_value(self, feature_type: str) -> Any:
        """Generate random value based on feature type."""
        if feature_type == "string":
            return "".join(random.choices(string.ascii_letters, k=8))
        elif feature_type == "float":
            return round(random.uniform(1, 1000), 2)
        elif feature_type == "integer":
            return random.randint(1, 100)
        else:
            return None

    def _get_core_topology(self) -> List[str]:
        """Get core nodes in topological order (root to leaf)."""
        graph = nx.DiGraph()

        # Add all core nodes to the graph first
        for node_type in self.core_nodes:
            graph.add_node(node_type)

        # Add edges between core nodes
        for edge_info in self.schema["edges"].values():
            source_type = edge_info["source"]
            target_type = edge_info["target"]
            if source_type in self.core_nodes and target_type in self.core_nodes:
                graph.add_edge(source_type, target_type)

        try:
            # Return topological sort (from leaf to root)
            return list(reversed(list(nx.topological_sort(graph))))
        except nx.NetworkXUnfeasible:
            raise ValueError("Core node hierarchy contains cycles")

    def _node_exists(self, node_type: str, node_id: str) -> bool:
        """Check if a node with the given type and ID already exists."""
        return (
            node_type in self.node_instances
            and node_id in self.node_instances[node_type]
        )

    def _get_parent_type_and_edge(self, node_type: str) -> Optional[Tuple[str, str]]:
        """Get the parent node type and edge type for a given node type."""
        for edge_type, edge_info in self.schema["edges"].items():
            if (
                edge_info["target"] == node_type
                and edge_info["source"] in self.core_nodes
                and node_type in self.core_nodes
            ):
                return edge_info["source"], edge_type
        return None

    def _create_core_node(self, node_type: str, node_id: str) -> str:
        """Create a single core node."""
        if self._node_exists(node_type, node_id):
            return node_id

        # Generate node properties
        properties = {}
        for feature_name, feature_type in self.schema["nodes"][node_type][
            "features"
        ].items():
            if feature_name == "id":
                properties[feature_name] = node_id
            else:
                properties[feature_name] = self.generate_random_value(feature_type)

        # Create node operation
        operation = {
            "action": "create",
            "type": "schema",
            "payload": {
                "node_id": node_id,
                "node_type": node_type,
                "properties": properties,
            },
            "timestamp": self.timestamp,
        }
        self.timestamp += 1
        self.operations.append(operation)

        # Store node instance
        if node_type not in self.node_instances:
            self.node_instances[node_type] = {}
        self.node_instances[node_type][node_id] = operation

        return node_id

    def _create_supplement_node(self, node_type: str) -> str:
        """Create a supplement node."""
        node_id = str(uuid.uuid4())
        return self._create_core_node(node_type, node_id)

    def _create_edge(self, source_id: str, target_id: str, edge_type: str) -> None:
        """Create an edge between two nodes."""
        # Check if edge already exists
        edge_key = (source_id, target_id, edge_type)
        if edge_key in self.edge_instances[edge_type]:
            return

        # Generate edge properties
        properties = {}
        for feature_name, feature_type in self.schema["edges"][edge_type][
            "features"
        ].items():
            properties[feature_name] = self.generate_random_value(feature_type)

        operation = {
            "action": "create",
            "type": "schema",
            "payload": {
                "source_id": source_id,
                "target_id": target_id,
                "edge_type": edge_type,
                "properties": properties,
            },
            "timestamp": self.timestamp,
        }
        self.timestamp += 1
        self.operations.append(operation)

        # Store edge instance
        self.edge_instances[edge_type].add(edge_key)

    def create_network(self, nodes_per_type: Dict[str, int]) -> List[Dict[str, Any]]:
        """Generate network with proper hierarchical IDs."""
        self.operations = []
        self.node_instances = {}
        self.edge_instances = defaultdict(set)

        # Process core nodes in topological order
        topology = self._get_core_topology()
        node_mapping = defaultdict(dict)  # Maps parent ID to child IDs

        for node_type in topology:
            count = nodes_per_type.get(node_type, 0)
            if count <= 0:
                continue

            parent_info = self._get_parent_type_and_edge(node_type)

            if parent_info is None:
                # Root nodes - simple numbering
                for i in range(1, count + 1):
                    node_id = str(i)
                    self._create_core_node(node_type, node_id)
                    node_mapping[node_type][node_id] = []
            else:
                parent_type, edge_type = parent_info
                for parent_id in self.node_instances[parent_type]:
                    # Calculate children per parent
                    parent_count = len(self.node_instances[parent_type])
                    children_per_parent = max(1, count // parent_count)
                    extra = 1 if count % parent_count > 0 else 0

                    # Create child nodes with hierarchical IDs
                    for i in range(1, children_per_parent + extra + 1):
                        child_id = f"{parent_id}-{i}"
                        self._create_core_node(node_type, child_id)
                        node_mapping[parent_type][parent_id].append(child_id)
                        node_mapping[node_type][child_id] = []
                        self._create_edge(parent_id, child_id, edge_type)

        supplement_node_ids = {}  # Store created supplement node IDs by type
        for node_type in self.supplement_nodes:
            count = nodes_per_type.get(node_type, 0)
            supplement_node_ids[node_type] = []
            for _ in range(count):
                node_id = self._create_supplement_node(node_type)
                supplement_node_ids[node_type].append(node_id)

        # Then create all connections for supplement nodes
        for source_type, source_ids in supplement_node_ids.items():
            # Find valid edge types for this supplement node type
            valid_edges = {
                edge_type: edge_info
                for edge_type, edge_info in self.schema["edges"].items()
                if edge_info["source"] == source_type
            }

            for source_id in source_ids:
                for edge_type, edge_info in valid_edges.items():
                    target_type = edge_info["target"]
                    valid_targets = []
                    
                    # Handle connections to core nodes
                    if target_type in self.node_instances:
                        valid_targets.extend(
                            node_id 
                            for node_id, node in self.node_instances[target_type].items()
                            if node["payload"]["node_type"] == target_type
                        )
                    
                    # Handle connections to other supplement nodes
                    if target_type in supplement_node_ids:
                        valid_targets.extend(
                            node_id 
                            for node_id in supplement_node_ids[target_type]
                            if node_id != source_id  # Prevent self-loops
                        )

                    if valid_targets:
                        # Connect to random target nodes (1-3 connections)
                        target_ids = random.sample(
                            valid_targets,
                            min(random.randint(1, 3), len(valid_targets)),
                        )
                        for target_id in target_ids:
                            self._create_edge(source_id, target_id, edge_type)

        return self.operations

In [2]:
import random
import string
import networkx as nx
from typing import Dict, Any, List, Tuple, Optional, Set
import json
from collections import defaultdict
import uuid


class NetworkGenerator:
    def __init__(self, schema_json: Dict[str, Any]):
        self.schema = schema_json
        self.node_instances: Dict[str, Dict[str, Dict[str, Any]]] = {}
        self.edge_instances: Dict[str, set] = defaultdict(set)
        self.timestamp = 0
        self.operations = []

        # Separate core and supplement nodes
        self.core_nodes = {
            node_type: info
            for node_type, info in schema_json["nodes"].items()
            if info["usage"] == "core"
        }
        self.supplement_nodes = {
            node_type: info
            for node_type, info in schema_json["nodes"].items()
            if info["usage"] == "supplement"
        }

    def generate_random_value(self, feature_type: str) -> Any:
        """Generate random value based on feature type."""
        if feature_type == "string":
            return "".join(random.choices(string.ascii_letters, k=8))
        elif feature_type == "float":
            return round(random.uniform(1, 1000), 2)
        elif feature_type == "integer":
            return random.randint(1, 100)
        else:
            return None

    def _get_core_topology(self) -> List[str]:
        """Get core nodes in topological order (root to leaf)."""
        graph = nx.DiGraph()

        # Add all core nodes to the graph first
        for node_type in self.core_nodes:
            graph.add_node(node_type)

        # Add edges between core nodes
        for edge_info in self.schema["edges"].values():
            source_type = edge_info["source"]
            target_type = edge_info["target"]
            if source_type in self.core_nodes and target_type in self.core_nodes:
                graph.add_edge(source_type, target_type)

        try:
            # Return topological sort (from leaf to root)
            return list(nx.topological_sort(graph))
        except nx.NetworkXUnfeasible:
            raise ValueError("Core node hierarchy contains cycles")

    def _node_exists(self, node_type: str, node_id: str) -> bool:
        """Check if a node with the given type and ID already exists."""
        return (
            node_type in self.node_instances
            and node_id in self.node_instances[node_type]
        )

    def _get_parent_type_and_edge(self, node_type: str) -> Optional[Tuple[str, str]]:
        """Get the parent node type and edge type for a given node type."""
        for edge_type, edge_info in self.schema["edges"].items():
            if (
                edge_info["target"] == node_type
                and edge_info["source"] in self.core_nodes
                and node_type in self.core_nodes
            ):
                return edge_info["source"], edge_type
        return None

    def _create_core_node(self, node_type: str, node_id: str) -> str:
        """Create a single core node."""
        if self._node_exists(node_type, node_id):
            return node_id

        # Generate node properties
        properties = {}
        for feature_name, feature_type in self.schema["nodes"][node_type][
            "features"
        ].items():
            if feature_name == "id":
                properties[feature_name] = node_id
            else:
                properties[feature_name] = self.generate_random_value(feature_type)

        # Create node operation
        operation = {
            "action": "create",
            "type": "schema",
            "payload": {
                "node_id": node_id,
                "node_type": node_type,
                "properties": properties,
            },
            "timestamp": self.timestamp,
        }
        self.timestamp += 1
        self.operations.append(operation)

        # Store node instance
        if node_type not in self.node_instances:
            self.node_instances[node_type] = {}
        self.node_instances[node_type][node_id] = operation

        return node_id

    def _create_supplement_node(self, node_type: str) -> str:
        """Create a supplement node."""
        node_id = str(uuid.uuid4())
        return self._create_core_node(node_type, node_id)

    def _create_edge(self, source_id: str, target_id: str, edge_type: str) -> None:
        """Create an edge between two nodes."""
        # Check if edge already exists
        edge_key = (source_id, target_id, edge_type)
        if edge_key in self.edge_instances[edge_type]:
            return

        # Generate edge properties
        properties = {}
        for feature_name, feature_type in self.schema["edges"][edge_type][
            "features"
        ].items():
            properties[feature_name] = self.generate_random_value(feature_type)

        operation = {
            "action": "create",
            "type": "schema",
            "payload": {
                "source_id": source_id,
                "target_id": target_id,
                "edge_type": edge_type,
                "properties": properties,
            },
            "timestamp": self.timestamp,
        }
        self.timestamp += 1
        self.operations.append(operation)

        # Store edge instance
        self.edge_instances[edge_type].add(edge_key)

    def create_network(self, nodes_per_type: Dict[str, int]) -> List[Dict[str, Any]]:
        """Generate network with proper hierarchical IDs."""
        self.operations = []
        self.node_instances = {}
        self.edge_instances = defaultdict(set)

        # Process core nodes in topological order
        topology = self._get_core_topology()
        node_mapping = defaultdict(dict)  # Maps parent ID to child IDs

        for node_type in topology:
            count = nodes_per_type.get(node_type, 0)
            if count <= 0:
                continue

            parent_info = self._get_parent_type_and_edge(node_type)

            if parent_info is None:
                # Root nodes - simple numbering
                for i in range(1, count + 1):
                    node_id = str(i)
                    self._create_core_node(node_type, node_id)
                    node_mapping[node_type][node_id] = []
            else:
                parent_type, edge_type = parent_info
                for parent_id in self.node_instances[parent_type]:
                    # Calculate children per parent
                    parent_count = len(self.node_instances[parent_type])
                    children_per_parent = max(1, count // parent_count)
                    extra = 1 if count % parent_count > 0 else 0

                    # Create child nodes with hierarchical IDs
                    for i in range(1, children_per_parent + extra + 1):
                        child_id = f"{parent_id}-{i}"
                        self._create_core_node(node_type, child_id)
                        node_mapping[parent_type][parent_id].append(child_id)
                        node_mapping[node_type][child_id] = []
                        self._create_edge(parent_id, child_id, edge_type)

        supplement_node_ids = {}  # Store created supplement node IDs by type
        for node_type in self.supplement_nodes:
            count = nodes_per_type.get(node_type, 0)
            supplement_node_ids[node_type] = []
            for _ in range(count):
                node_id = self._create_supplement_node(node_type)
                supplement_node_ids[node_type].append(node_id)

        # Then create all connections for supplement nodes
        for source_type, source_ids in supplement_node_ids.items():
            # Find valid edge types for this supplement node type
            valid_edges = {
                edge_type: edge_info
                for edge_type, edge_info in self.schema["edges"].items()
                if edge_info["source"] == source_type
            }

            for source_id in source_ids:
                for edge_type, edge_info in valid_edges.items():
                    target_type = edge_info["target"]
                    valid_targets = []

                    # Handle connections to core nodes
                    if target_type in self.node_instances:
                        valid_targets.extend(
                            node_id
                            for node_id, node in self.node_instances[
                                target_type
                            ].items()
                            if node["payload"]["node_type"] == target_type
                        )

                    # Handle connections to other supplement nodes
                    if target_type in supplement_node_ids:
                        valid_targets.extend(
                            node_id
                            for node_id in supplement_node_ids[target_type]
                            if node_id != source_id  # Prevent self-loops
                        )

                    if valid_targets:
                        # Connect to random target nodes (1-3 connections)
                        target_ids = random.sample(
                            valid_targets,
                            min(random.randint(1, 3), len(valid_targets)),
                        )
                        for target_id in target_ids:
                            self._create_edge(source_id, target_id, edge_type)

        return self.operations

In [3]:
# Example schema is provided as a dictionary
schema = json.load(open("../metadata/relations.json", "r"))
generator = NetworkGenerator(schema)

custom_counts = {
    "BusinessUnit": 1,
    "ProductFamily": 1,
    "ProductOffering": 1,
    "Facility": 2,
    "Parts": 2,
    "Warehouse": 2,
    "Supplier": 2,
}
create_ops = generator.create_network(custom_counts)

print(f"Generated {len(create_ops)} operations")

Generated 74 operations


In [4]:
API_URL = "http://localhost:8000"

timestamp = 0
version = "v5"

In [5]:
import requests
import time

for op in create_ops:
    op["version"] = version
    # requests.post(f"{API_URL}/schema/live/update", json=op)
    timestamp += 1
    # time.sleep(0.4)
    print(f"Completed operation {timestamp} {op['action']} {op['type']} {op['payload']}")

Completed operation 1 create schema {'node_id': '1', 'node_type': 'Parts', 'properties': {'id': '1', 'name': 'aABCCrVi', 'description': 'loGChDNJ', 'type': 'AdyaEcxI', 'cost': 634.31, 'importance': 46, 'expected_life': 45, 'units_in_chain': 30, 'expiry': 60}}
Completed operation 2 create schema {'node_id': '2', 'node_type': 'Parts', 'properties': {'id': '2', 'name': 'WVxNJJvR', 'description': 'XXSNeRWS', 'type': 'bonAuQKE', 'cost': 483.07, 'importance': 46, 'expected_life': 84, 'units_in_chain': 7, 'expiry': 38}}
Completed operation 3 create schema {'node_id': '1-1', 'node_type': 'Facility', 'properties': {'id': '1-1', 'name': 'tQYTBfKD', 'type': 'fCfJRcOp', 'location': 'VNfZknkN', 'max_capacity': 27, 'operating_cost': 568.32}}
Completed operation 4 create schema {'source_id': '1', 'target_id': '1-1', 'edge_type': 'PartsToFacility', 'properties': {'quantity': 8, 'distance_from_warehouse': 939.72, 'transport_cost': 248.11, 'lead_time': 10}}
Completed operation 5 create schema {'node_id'