In [13]:
prompt = """
Extract entities and relationships from the following code. Return the nodes and edges in JSON format.

Code :
``` 
from sentence_transformers import SentenceTransformer
import numpy as np
from KnowledgeGraph import kg

class GraphEmbedding:
    def __init__(self, model_name='all-MiniLM-L6-v2'):
        self.model = SentenceTransformer(model_name)

    def embed_node(self, node):
        return self.model.encode(node)

    def embed_graph(self, graph):
        embeddings = {}
        for node in graph.nodes():
            embeddings[node] = self.embed_node(node)
        return embeddings

# Usage
ge = GraphEmbedding()
node_embeddings = ge.embed_graph(kg.graph)
```

Format the output as JSON with the following keys and nothing else:
Adjacency_matrix
"""

In [4]:
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import List, Dict
from llama_index.llms.langchain import LangChainLLM

load_dotenv()

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    tiemout = 15
)

In [14]:
response = llm.invoke(prompt)

In [15]:
print(response.content)

```json
{
"Adjacency_matrix": {
"GraphEmbedding": {
"model": "SentenceTransformer",
"__init__": "self.model",
"embed_node": "self.model.encode(node)",
"embed_graph": {
"embeddings": "{}",
"node": "graph.nodes()",
"embeddings[node]": "self.embed_node(node)"
}
},
"SentenceTransformer": {
"model_name": "all-MiniLM-L6-v2"
},
"ge": "GraphEmbedding()",
"node_embeddings": "ge.embed_graph(kg.graph)",
"kg": "KnowledgeGraph",
"kg.graph": "graph",
"graph": "graph.nodes()",
"node": "node",
"embeddings": "{}",
"self.model": "SentenceTransformer(model_name)",
"self.model.encode(node)": "np.array",
"np.array": "embeddings[node]"
}
}
```


In [8]:
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser

nodes_schema = ResponseSchema(name="nodes",
                                   description="List of nodes",
                                   type="List")

edges_schema = ResponseSchema(name="edges",
                                   description="List of edges",
                                   type="List")

response_schemas = [nodes_schema, edges_schema]

In [9]:
output_parser = StructuredOutputParser.from_response_schemas(response_schemas=response_schemas)

In [15]:
format_inst = output_parser.get_format_instructions()

In [16]:
print(format_inst)

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"nodes": List  // List of nodes
	"edges": List  // List of edges
}
```


In [2]:
new_prompt = """
Extract the details from the following Python code and format them as a JSON object. Each class should be a node with its methods as nodes connected by edges. Each method should have nodes for its attributes and return values. Each edge should describe the relationship between nodes.

For Example:

Code :
``` 
from sentence_transformers import SentenceTransformer
import numpy as np
from KnowledgeGraph import kg

class GraphEmbedding:
    def __init__(self, model_name='all-MiniLM-L6-v2'):
        self.model = SentenceTransformer(model_name)

    def embed_node(self, node):
        return self.model.encode(node)

    def embed_graph(self, graph):
        embeddings = {}
        for node in graph.nodes():
            embeddings[node] = self.embed_node(node)
        return embeddings

# Usage
ge = GraphEmbedding()
node_embeddings = ge.embed_graph(kg.graph)
```

Output:
```json
{
  "nodes": [
    {"id": "1", "label": "GraphEmbedding"},
    {"id": "2", "label": "__init__"},
    {"id": "3", "label": "model_name"},
    {"id": "4", "label": "model"},
    {"id": "5", "label": "embed_node"},
    {"id": "6", "label": "node"},
    {"id": "7", "label": "model.encode(node)"},
    {"id": "8", "label": "embed_graph"},
    {"id": "9", "label": "graph"},
    {"id": "10", "label": "embeddings"},
    {"id": "11", "label": "graph.nodes()"},
    {"id": "12", "label": "embed_node(node)"}
  ],
  "edges": [
    {"source": "1", "target": "2", "label": "has method"},
    {"source": "2", "target": "3", "label": "has parameter"},
    {"source": "2", "target": "4", "label": "has attribute"},
    {"source": "1", "target": "5", "label": "has method"},
    {"source": "5", "target": "6", "label": "has parameter"},
    {"source": "5", "target": "7", "label": "returns"},
    {"source": "1", "target": "8", "label": "has method"},
    {"source": "8", "target": "9", "label": "has parameter"},
    {"source": "8", "target": "10", "label": "has attribute"},
    {"source": "8", "target": "11", "label": "has attribute"},
    {"source": "8", "target": "12", "label": "returns"}
  ]
}
```

Given Code:

```
import networkx as nx
from pyvis.network import Network
import json

class KnowledgeGraph:
    def __init__(self):
        self.graph = nx.Graph()

    def add_entity(self, entity):
        self.graph.add_node(entity)

    def add_relationship(self, entity1, entity2, relationship):
        self.graph.add_edge(entity1, entity2, relationship=relationship)

    def visualize(self, filename='graph.html'):
        net = Network(notebook=False)
        net.from_nx(self.graph)
        net.show(filename, notebook=False)

    def save(self, filename='knowledge_graph.json'):
        data = nx.node_link_data(self.graph)
        with open(filename, 'w') as f:
            json.dump(data, f)

    def load(self, filename='knowledge_graph.json'):
        with open(filename, 'r') as f:
            data = json.load(f)
        self.graph = nx.node_link_graph(data)


```

"""

In [5]:
new_res = llm.invoke(new_prompt)

In [None]:
print(new_res.content)

In [10]:
res_as_dict = output_parser.parse(new_res.content)

In [13]:
print(res_as_dict)

{'nodes': [{'id': '1', 'label': 'KnowledgeGraph'}, {'id': '2', 'label': '__init__'}, {'id': '3', 'label': 'graph'}, {'id': '4', 'label': 'nx.Graph()'}, {'id': '5', 'label': 'add_entity'}, {'id': '6', 'label': 'entity'}, {'id': '7', 'label': 'self.graph.add_node(entity)'}, {'id': '8', 'label': 'add_relationship'}, {'id': '9', 'label': 'entity1'}, {'id': '10', 'label': 'entity2'}, {'id': '11', 'label': 'relationship'}, {'id': '12', 'label': 'self.graph.add_edge(entity1, entity2, relationship=relationship)'}, {'id': '13', 'label': 'visualize'}, {'id': '14', 'label': 'filename'}, {'id': '15', 'label': "'graph.html'"}, {'id': '16', 'label': 'net'}, {'id': '17', 'label': 'Network(notebook=False)'}, {'id': '18', 'label': 'net.from_nx(self.graph)'}, {'id': '19', 'label': 'net.show(filename, notebook=False)'}, {'id': '20', 'label': 'save'}, {'id': '21', 'label': 'data'}, {'id': '22', 'label': 'nx.node_link_data(self.graph)'}, {'id': '23', 'label': 'f'}, {'id': '24', 'label': 'json.dump(data, f)

In [21]:
print(type(res_as_dict))

<class 'dict'>


In [22]:
print(res_as_dict['nodes'])

[{'id': '1', 'label': 'Entity'}, {'id': '2', 'label': 'name'}, {'id': '3', 'label': 'Relation'}, {'id': '4', 'label': 'entity1'}, {'id': '5', 'label': 'relationship'}, {'id': '6', 'label': 'entity2'}, {'id': '7', 'label': 'ExtractedData'}, {'id': '8', 'label': 'entities'}, {'id': '9', 'label': 'relations'}]


In [1]:
import networkx as nx
from sentence_transformers import SentenceTransformer
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains import RetrievalQA
from langchain_core.retrievers import BaseRetriever
from langchain.schema import Document
import numpy as np
from dotenv import load_dotenv

load_dotenv()

class GraphRetriever(BaseRetriever):
    def __init__(self, graph, embedder, depth=3):
        self.graph = graph
        self.embedder = embedder
        self.depth = depth

    def get_relevant_documents(self, query):
        # Embed the query
        query_embedding = self.embedder.encode(query)

        # Find the most similar node to the query
        similarities = []
        for node, data in self.graph.nodes(data=True):
            node_text = f"{data['label']} (ID: {node})"
            node_embedding = self.embedder.encode(node_text)
            similarity = np.dot(query_embedding, node_embedding) / (np.linalg.norm(query_embedding) * np.linalg.norm(node_embedding))
            similarities.append((node, similarity))

        # Sort nodes by similarity
        sorted_nodes = sorted(similarities, key=lambda x: x[1], reverse=True)

        # Get subgraph and create documents
        documents = []
        for start_node, _ in sorted_nodes[:3]:  # Consider top 3 most similar nodes
            subgraph = nx.ego_graph(self.graph, start_node, radius=self.depth)
            context = self.subgraph_to_text(subgraph)
            documents.append(Document(page_content=context, metadata={"start_node": start_node}))

        return documents

    def subgraph_to_text(self, subgraph):
        text = ""
        for node, data in subgraph.nodes(data=True):
            text += f"Node: {data['label']} (ID: {node})\n"
            for neighbor in subgraph.neighbors(node):
                edge_data = subgraph[node][neighbor]
                text += f"  - {edge_data['label']} -> {subgraph.nodes[neighbor]['label']} (ID: {neighbor})\n"
        return text

class GraphRAG:
    def __init__(self, graph_data):
        self.graph = self.create_graph(graph_data)
        self.embedder = SentenceTransformer('all-MiniLM-L6-v2')
        self.vector_store = None
        self.qa_chain = None
        self.graph_retriever = GraphRetriever(self.graph, self.embedder)

    def create_graph(self, graph_data):
        G = nx.Graph()
        for node in graph_data['nodes']:
            G.add_node(node['id'], label=node['label'])
        for edge in graph_data['edges']:
            G.add_edge(edge['source'], edge['target'], label=edge['label'])
        return G

    def create_embeddings(self):
        node_texts = [f"{data['label']} (ID: {node})" for node, data in self.graph.nodes(data=True)]
        edge_texts = [f"{self.graph.nodes[edge[0]]['label']} - {edge[2]['label']} - {self.graph.nodes[edge[1]]['label']}" 
                      for edge in self.graph.edges(data=True)]
        all_texts = node_texts + edge_texts
        embeddings = self.embedder.encode(all_texts)
        return all_texts, embeddings

    def create_vector_store(self):
        texts, embeddings = self.create_embeddings()
        self.vector_store = FAISS.from_embeddings(
            text_embeddings=list(zip(texts, embeddings)),
            embedding=HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2')
        )

    def setup_qa_chain(self):
        llm = ChatGoogleGenerativeAI(
            model="gemini-1.5-flash",
            temperature=0.3,
            max_tokens=None,
            max_retries=2,
            tiemout = 15
        )
        self.qa_chain = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=self.graph_retriever
        )

    def query(self, question):
        if not self.qa_chain:
            raise ValueError("QA chain not set up. Call setup_qa_chain() first.")
        return self.qa_chain.run(question)

    def get_subgraph(self, center_node, depth=3):
        subgraph_nodes = nx.ego_graph(self.graph, center_node, radius=depth)
        return self.graph.subgraph(subgraph_nodes)


  from tqdm.autonotebook import tqdm, trange


In [None]:
# Usage
graph_data = res_as_dict
graph_rag = GraphRAG(graph_data)
graph_rag.create_vector_store()
graph_rag.setup_qa_chain()

# Example query
question = "What is the use of KnowledgeGraph Class and how does it functions?"
answer = graph_rag.query(question)
print(f"Q: {question}\nA: {answer}")

# Get subgraph
subgraph = graph_rag.get_subgraph('1', depth=2)
print("Subgraph nodes:", subgraph.nodes(data=True))
print("Subgraph edges:", subgraph.edges(data=True))