# Importing the library Neo4j:

In [3]:
from neo4j import GraphDatabase

# Define the class Neo4jEmbeddingGenerator

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
import numpy as np
from torch_geometric.data import Data
from neo4j import GraphDatabase  # Import GraphDatabase

# Neo4j Embedding Generator
class Neo4jEmbeddingGenerator:
    def __init__(self, uri, user, password, database):
        self.driver = GraphDatabase.driver(uri, auth=(user, password), database=database)

    def close(self):
        self.driver.close()

    def run_query(self, query):
        with self.driver.session() as session:
            result = session.run(query)
            return list(result)  # Return the results as a list

    def generate_fastrp_embeddings(self):
        # Step 1: Drop the graph if it already exists
        drop_graph_query = """
        CALL gds.graph.drop('myGraph', false) YIELD graphName;
        """
        try:
            self.run_query(drop_graph_query)
            print("Existing graph 'myGraph' dropped.")
        except Exception as e:
            print(f"No existing graph to drop or another issue: {str(e)}")

        # Step 2: Project the graph
        project_graph_query = """
        CALL gds.graph.project('myGraph', 'Concept', '*')
        YIELD graphName;
        """
        self.run_query(project_graph_query)

        # Step 3: Run FastRP on the projected graph
        fastrp_query = """
        CALL gds.fastRP.write('myGraph', {
            writeProperty: 'embedding',
            embeddingDimension: 128
        })
        YIELD nodePropertiesWritten;
        """
        result = self.run_query(fastrp_query)

        # Fetch the number of node properties written
        node_properties_written = [record['nodePropertiesWritten'] for record in result]
        return node_properties_written[0] if node_properties_written else 0

    def get_node_embeddings(self):
        query = """
        MATCH (n:Concept)
        RETURN n.name AS name, n.embedding AS embedding
        """
        result = self.run_query(query)

        # Convert results to a list of dictionaries
        return [{"name": record["name"], "embedding": record["embedding"]} for record in result]


# Generate embeddings using FastRp

In [5]:
# Initialize connection to Neo4j
uri = "bolt://localhost:7687"
user = "neo4j"
password = "abdou12345;"
database = "graph2"

embedding_generator = Neo4jEmbeddingGenerator(uri, user, password, database)

# Generate embeddings
try:
    nodes_written = embedding_generator.generate_fastrp_embeddings()
    print(f"Embeddings generated for {nodes_written} nodes.")

    # Retrieve and print node embeddings
    embeddings = embedding_generator.get_node_embeddings()
    for emb in embeddings:
        print(f"Name: {emb['name']}, Embedding: {emb['embedding']}")
finally:
    embedding_generator.close()


Embeddings generated for 548 nodes.
Name: guide, Embedding: [0.1557779312133789, 0.019695114344358444, 0.0, 0.0, 0.273948609828949, 0.019695114344358444, 0.03939022868871689, -0.0017829835414886475, 0.03939022868871689, 0.05908534303307533, -0.273948609828949, -0.05908534675836563, -0.060868337750434875, 0.1772560179233551, 0.09669258445501328, -0.02147810161113739, 0.019695114344358444, 0.2936437129974365, -0.05908534675836563, 0.0179121270775795, 0.02147810161113739, 0.1378657966852188, 0.0179121270775795, -0.11638769507408142, 0.03939022868871689, 0.07878045737743378, -0.03939022868871689, -0.273948609828949, 0.019695114344358444, 0.09847557544708252, 0.11817068606615067, -0.13608281314373016, 0.1772560328245163, -0.09669258445501328, 0.21486327052116394, 0.0, 0.019695114344358444, -0.019695114344358444, -0.273948609828949, 0.3330339789390564, 0.273948609828949, -0.1557779312133789, 0.11638769507408142, 0.2166462540626526, -0.03939022868871689, -0.04117320477962494, 0.13608281314373

# GNN Model

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

  from .autonotebook import tqdm as notebook_tqdm


In [30]:
# Cosine similarity function to compare embeddings
def cosine_similarity(a, b):
    norm_a = np.linalg.norm(a)
    norm_b = np.linalg.norm(b)

    # Check if either vector is zero, if so return 0 as similarity
    if norm_a == 0 or norm_b == 0:
        return 0.0
    
    return np.dot(a, b) / (norm_a * norm_b)

# Search for similar nodes based on cosine similarity
def search_node(embeddings, query_embedding, top_k=5):
    similarities = []
    for emb in embeddings:
        name = emb["name"]
        embedding = emb["embedding"]
        similarity = cosine_similarity(query_embedding, embedding)
        similarities.append((name, similarity))
    
    # Sort by similarity in descending order and return top_k results
    similarities.sort(key=lambda x: x[1], reverse=True)
    return similarities[:top_k]

In [31]:
class GNNModel(nn.Module):
    def __init__(self, num_features, hidden_dim):
        super(GNNModel, self).__init__()
        self.conv1 = GCNConv(num_features, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, hidden_dim)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

# Exemple d'utilisation
model = GNNModel(num_features=128, hidden_dim=64)

In [None]:
# Initialize Neo4j connection
uri = "bolt://localhost:7687"
user = "neo4j"
password = "abdou12345;"
database = "graph2"

embedding_generator = Neo4jEmbeddingGenerator(uri, user, password, database)

In [32]:


# Generate embeddings and search for similar nodes
try:
    nodes_written = embedding_generator.generate_fastrp_embeddings()
    print(f"Embeddings generated for {nodes_written} nodes.")

    # Retrieve node embeddings
    embeddings = embedding_generator.get_node_embeddings()

    # Afficher chaque embedding avec le nom du noeud
    print("\n--- Node Embeddings ---")
    for emb in embeddings:
        name = emb["name"]
        vector = emb["embedding"]
        print(f"Node: {name}, Embedding Vector: {vector}")
    
    # Example search query: Let's assume you are searching for nodes similar to "risk"
    query_node_name = "risk"
    query_embedding = next(emb["embedding"] for emb in embeddings if emb["name"] == query_node_name)

    # Perform search for similar nodes
    top_similar_nodes = search_node(embeddings, query_embedding)
    print(f"\nTop similar nodes to {query_node_name}:")
    for name, score in top_similar_nodes:
        print(f"Node: {name}, Similarity: {score:.4f}")

finally:
    embedding_generator.close()

# Example GNN usage (no training for now, just a basic run)
# Convert the embeddings and graph data to torch tensors for GNN
node_embeddings = torch.tensor([emb['embedding'] for emb in embeddings], dtype=torch.float)

# Placeholder for edge_index (fetch actual edges from Neo4j)
# The edge index is typically a 2D tensor where each column represents an edge (from node i to node j)
edge_index = torch.tensor([[0, 1], [1, 2]], dtype=torch.long)  # Replace with actual edges

# Create GNN model
model = GNNModel(num_features=128, hidden_dim=64)

# Forward pass through the GNN
output = model(node_embeddings, edge_index)
print("\nGNN Output:")
print(output)


Existing graph 'myGraph' dropped.
Embeddings generated for 548 nodes.

--- Node Embeddings ---
Node: guide, Embedding Vector: [0.14285719394683838, -0.01731109619140625, -0.16378153860569, 0.10100849717855453, -0.08369739353656769, 0.3311763405799866, 0.0, -0.020924348384141922, 0.06277304887771606, -0.18470588326454163, -0.020924348384141922, 0.10462174564599991, 0.020924348384141922, -0.020924348384141922, 0.14285719394683838, -0.10100849717855453, 0.020924348384141922, -0.06277304887771606, 0.0, 0.12193284928798676, 0.14285719394683838, -0.020924348384141922, 0.0, -0.31025198101997375, 0.0, 0.01731109619140625, -0.06277304887771606, 0.10462174564599991, 0.0, 0.041848696768283844, 0.12554609775543213, -0.16739478707313538, -0.16378153860569, -0.16378153860569, 0.14285719394683838, -0.020924348384141922, 0.041848696768283844, -0.12193284928798676, -0.020924348384141922, 0.041848696768283844, -0.12554609775543213, -0.06277304887771606, -0.14647044241428375, -0.041848696768283844, 0.0, 