In [35]:
# Import Libraries
from arango import ArangoClient
import networkx as nx

In [39]:
def sanitize_key(key, counter):
    """
    Sanitize a key to make it valid for ArangoDB and ensure uniqueness.
    """
    if not isinstance(key, str) or key.lower() == "nan":
        key = f"unknown_{counter}"
    else:
        invalid_chars = ["/", ":", "@", ".", " "]
        for char in invalid_chars:
            key = key.replace(char, "_")
    return f"{key}_{counter}"

In [52]:
from arango import ArangoClient
import networkx as nx

def sanitize_key(key, counter):
    """
    Sanitize a key to make it valid for ArangoDB and ensure uniqueness.
    """
    if not isinstance(key, str) or key.lower() == "nan":
        key = f"unknown_{counter}"  # Replace invalid or empty keys with a placeholder
    else:
        # Replace invalid characters with underscores
        invalid_chars = ["/", ":", "@", ".", " ", ",", ";", "!", "?", "&", "=", "+", "*", "^", "%", "$", "#", "(", ")", "[", "]", "{", "}", "<", ">", "|", "\\", '"', "'", "`", "~"]
        for char in invalid_chars:
            key = key.replace(char, "_")
    # Append a counter to ensure uniqueness
    key = f"{key}_{counter}"
    # Truncate the key if it's too long
    max_length = 254
    if len(key) > max_length:
        key = key[:max_length]
    return key

def load_graph_to_arangodb(graph, db_name="graph_db", graph_name="ai_incidents_graph"):
    """
    Load a NetworkX graph into ArangoDB.
    """
    try:
        # Initialize the ArangoDB client
        client = ArangoClient(hosts="http://localhost:8529")

        # Connect to the system database
        sys_db = client.db("_system", username="root", password="passwd")

        # Create the database if it doesn't exist
        if not sys_db.has_database(db_name):
            sys_db.create_database(db_name)
            print(f"Database '{db_name}' created.")

        # Connect to the target database
        db = client.db(db_name, username="root", password="passwd")

        # Create the graph if it doesn't exist
        if not db.has_graph(graph_name):
            graph_collection = db.create_graph(graph_name)
            print(f"Graph '{graph_name}' created.")

        # Get the graph and collections
        graph_collection = db.graph(graph_name)
        if not graph_collection.has_vertex_collection("nodes"):
            graph_collection.create_vertex_collection("nodes")
            print("Collection 'nodes' created.")

        if not graph_collection.has_edge_collection("edges"):
            graph_collection.create_edge_definition(
                edge_collection="edges",
                from_vertex_collections=["nodes"],
                to_vertex_collections=["nodes"]
            )
            print("Collection 'edges' created.")

        # Add nodes to ArangoDB
        nodes_collection = db.collection("nodes")
        node_counter = 0  # Counter to ensure unique keys for nodes
        node_mapping = {}  # Map original node names to sanitized keys
        for node in graph.nodes():
            sanitized_key = sanitize_key(node, node_counter)
            nodes_collection.insert({"_key": sanitized_key})
            node_mapping[node] = sanitized_key  # Store the mapping
            node_counter += 1  # Increment counter for the next node

        # Add edges to ArangoDB
        edges_collection = db.collection("edges")
        for edge in graph.edges(data=True):
            source = node_mapping[edge[0]]  # Use the sanitized key for the source node
            target = node_mapping[edge[1]]  # Use the sanitized key for the target node
            edges_collection.insert({
                "_from": f"nodes/{source}",
                "_to": f"nodes/{target}",
                "weight": edge[2].get("weight", 1)
            })

        print(f"Graph '{graph_name}' loaded into ArangoDB.")

    except Exception as e:
        print(f"Error loading graph into ArangoDB: {e}")

# Load the graph from the saved file
G = nx.read_graphml("graph.graphml")

# Print node keys to check for invalid values
print("Checking node keys for invalid values:")
for node in G.nodes():
    print(f"Node: {node}, Type: {type(node)}")

# Load the graph into ArangoDB
load_graph_to_arangodb(G)

Checking node keys for invalid values:
Node: Content Recommendation, Content Search, Hate Speech Detection, NSFW Content Detection, Type: <class 'str'>
Node: Content-based Filtering, Collaborative Filtering, Type: <class 'str'>
Node: Hate Speech Detection, Type: <class 'str'>
Node: Character NGrams, Type: <class 'str'>
Node: Autonomous Driving, Type: <class 'str'>
Node: nan, Type: <class 'str'>
Node: AI Voice Assistant, Type: <class 'str'>
Node: Automatic Speech Recognition, Language Modeling, Acoustic Fingerprint, Type: <class 'str'>
Node: Face Recognition, Type: <class 'str'>
Node: Deepfake Video Generation, Type: <class 'str'>
Node: Neural Network, Face Detection, Recurrent Neural Network, Generative Adversarial Network, Type: <class 'str'>
Node: Substance Detection, Smart Devices, Type: <class 'str'>
Node: Gesture Recognition, Type: <class 'str'>
Node: Chatbot, Type: <class 'str'>
Node: Autoencoder, Distributional Learning, Type: <class 'str'>
Node: Automatic Skill Assessment, Type