In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
import numpy as np
from torch_geometric.data import Data
from neo4j import GraphDatabase

# Neo4j Embedding Generator: This class manages the connection to Neo4j and generates node embeddings using the FastRP algorithm.

Key principles of FastRP:

Utilization of Neighbors: FastRP considers a node's neighbors to calculate its embedding, including direct neighbors (1-hop) and more distant ones (2-hop, 3-hop, etc.). The further a neighbor, the lesser its impact on the embedding.

Combination of Neighbors: FastRP aggregates information from a node's neighbors at various distances to compute its embedding, capturing the local graph structure around the node.

Random Projection: FastRP projects node features into a low-dimensional space using a random projection technique, reducing computational costs while preserving node similarity relations.

In [7]:


# --- Neo4j Embedding Generator ---
class Neo4jEmbeddingGenerator:
    def __init__(self, uri, user, password, database):
        self.driver = GraphDatabase.driver(uri, auth=(user, password), database=database)

    def close(self):
        self.driver.close()

    def run_query(self, query):
        with self.driver.session() as session:
            result = session.run(query)
            return list(result)

    def generate_fastrp_embeddings(self):
        # Drop existing graph if any
        drop_graph_query = """
        CALL gds.graph.drop('myGraph', false) YIELD graphName;
        """
        try:
            self.run_query(drop_graph_query)
            print("Existing graph 'myGraph' dropped.")
        except Exception as e:
            print(f"No existing graph to drop or another issue: {str(e)}")

        # Project the graph
        project_graph_query = """
        CALL gds.graph.project('myGraph', 'Concept', '*')
        YIELD graphName;
        """
        self.run_query(project_graph_query)

        # Run FastRP
        fastrp_query = """
        CALL gds.fastRP.write('myGraph', {
            writeProperty: 'embedding',
            embeddingDimension: 128
        })
        YIELD nodePropertiesWritten;
        """
        result = self.run_query(fastrp_query)
        node_properties_written = [record['nodePropertiesWritten'] for record in result]
        return node_properties_written[0] if node_properties_written else 0

    def get_node_embeddings(self):
        query = """
        MATCH (n:Concept)
        RETURN n.name AS name, n.embedding AS embedding
        """
        result = self.run_query(query)
        return [{"name": record["name"], "embedding": record["embedding"]} for record in result]




# 1. Cosine Similarity Function
The function cosine_similarity calculates the cosine similarity between two vectors, a and b. It computes the dot product of the two vectors and divides it by the product of their magnitudes (norms). If either vector has zero magnitude, it returns a similarity score of 0.0. Cosine similarity measures the angle between two vectors in an embedding space, often used to quantify similarity between them.

# 2. Search for Similar Nodes
The search_node function takes a list of node embeddings and a query embedding, calculating the cosine similarity between the query embedding and each node embedding. It stores the results as tuples of node names and similarity scores, sorts them in descending order of similarity, and returns the top k most similar nodes. This function is used to find nodes in a graph that are most similar to a given query.

# 3. GNN Model Definition
The GNNModel class defines a simple Graph Neural Network (GNN) using two layers of Graph Convolutional Networks (GCN). The model takes in node features (x) and an edge index (which defines the graph structure) and passes them through two graph convolution layers:

The first layer applies graph convolution to the input features and uses ReLU as the activation function.
The second layer performs another graph convolution without any activation.
This model captures the structural and feature-based relationships between nodes in a graph. It can be used for node classification, link prediction, or other tasks involving graph-structured data.

In [4]:
# --- Cosine Similarity Function ---
def cosine_similarity(a, b):
    norm_a = np.linalg.norm(a)
    norm_b = np.linalg.norm(b)
    if norm_a == 0 or norm_b == 0:
        return 0.0
    return np.dot(a, b) / (norm_a * norm_b)


# --- Search for Similar Nodes ---
def search_node(embeddings, query_embedding, top_k=5):
    similarities = []
    for emb in embeddings:
        name = emb["name"]
        embedding = emb["embedding"]
        similarity = cosine_similarity(query_embedding, embedding)
        similarities.append((name, similarity))
    similarities.sort(key=lambda x: x[1], reverse=True)
    return similarities[:top_k]


# --- GNN Model Definition ---
class GNNModel(nn.Module):
    def __init__(self, num_features, hidden_dim):
        super(GNNModel, self).__init__()
        self.conv1 = GCNConv(num_features, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, hidden_dim)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x


# GNN Model : Un modèle de réseau neuronal graphique (GNN) basé sur deux couches de convolution de graphes (GCNConv).
# Main Execution : La génération des embeddings, la recherche de similarités, et l'utilisation d'un modèle GNN pour un passage en avant sont effectuées dans cette section.

In [11]:
# --- Main Execution ---
if __name__ == "__main__":
    # Initialize Neo4j connection
    uri = "bolt://localhost:7687"
    user = "neo4j"
    password = "abdou12345;"
    database = "graph2"

    embedding_generator = Neo4jEmbeddingGenerator(uri, user, password, database)

    try:
        # Generate embeddings
        nodes_written = embedding_generator.generate_fastrp_embeddings()
        print(f"Embeddings generated for {nodes_written} nodes.")

        # Retrieve node embeddings
        embeddings = embedding_generator.get_node_embeddings()

        # Print each embedding with the node name
        print("\n--- Node Embeddings ---")
        for emb in embeddings:
            name = emb["name"]
            vector = emb["embedding"]
            print(f"Node: {name}, Embedding Vector: {vector}")

        # Search for similar nodes based on a query
        query_node_name = "risk"
        query_embedding = next(emb["embedding"] for emb in embeddings if emb["name"] == query_node_name)

        top_similar_nodes = search_node(embeddings, query_embedding)
        print(f"\nTop similar nodes to {query_node_name}:")
        for name, score in top_similar_nodes:
            print(f"Node: {name}, Similarity: {score:.4f}")

        # --- Example GNN usage ---
        # Convert the embeddings to torch tensors
        node_embeddings = torch.tensor([emb['embedding'] for emb in embeddings], dtype=torch.float)

        # Placeholder for edge_index (replace with actual edges from Neo4j)
        edge_index = torch.tensor([[0, 1], [1, 2]], dtype=torch.long)  # Replace with actual edges

        # Create GNN model
        model = GNNModel(num_features=128, hidden_dim=64)

        # Forward pass through the GNN
        output = model(node_embeddings, edge_index)
        print("\nGNN Output:")
        print(output)

    finally:
        embedding_generator.close()


Existing graph 'myGraph' dropped.
Embeddings generated for 548 nodes.

--- Node Embeddings ---
Node: guide, Embedding Vector: [0.14433759450912476, 0.12371794134378433, -0.061858970671892166, 0.0, -0.041239310055971146, -0.041239310055971146, 0.12371793389320374, 0.1855769008398056, -0.16495724022388458, -0.061858970671892166, -0.14433759450912476, 0.020619655027985573, -0.16495725512504578, -0.020619655027985573, 0.041239313781261444, -0.16495725512504578, 0.14433759450912476, 0.020619655027985573, 0.0, 0.041239310055971146, 0.12371793389320374, 0.12371793389320374, -0.020619655027985573, -0.02061966061592102, -0.020619655027985573, -0.32991448044776917, 0.020619655027985573, -0.14433759450912476, 0.2886751890182495, 0.1855768859386444, -0.041239310055971146, -0.041239313781261444, 0.12371794134378433, 0.020619645714759827, -0.14433757960796356, 0.020619655027985573, -0.10309828072786331, -0.16495724022388458, -0.020619655027985573, 0.020619655027985573, -0.16495724022388458, -0.12371