In [13]:
prompt = """
Extract entities and relationships from the following code. Return the entities and relationships in a adjacency matrix format, where nodes can be functions, objects or variables and edge is the value it is returning to other nodes.

Code :
``` 
from sentence_transformers import SentenceTransformer
import numpy as np
from KnowledgeGraph import kg

class GraphEmbedding:
    def __init__(self, model_name='all-MiniLM-L6-v2'):
        self.model = SentenceTransformer(model_name)

    def embed_node(self, node):
        return self.model.encode(node)

    def embed_graph(self, graph):
        embeddings = {}
        for node in graph.nodes():
            embeddings[node] = self.embed_node(node)
        return embeddings

# Usage
ge = GraphEmbedding()
node_embeddings = ge.embed_graph(kg.graph)
```

Format the output as JSON with the following keys and nothing else:
Adjacency_matrix
"""

In [49]:
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import List, Dict
from llama_index.llms.langchain import LangChainLLM

load_dotenv()

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    tiemout = 15
)

In [14]:
response = llm.invoke(prompt)

In [15]:
print(response.content)

```json
{
"Adjacency_matrix": {
"GraphEmbedding": {
"model": "SentenceTransformer",
"__init__": "self.model",
"embed_node": "self.model.encode(node)",
"embed_graph": {
"embeddings": "{}",
"node": "graph.nodes()",
"embeddings[node]": "self.embed_node(node)"
}
},
"SentenceTransformer": {
"model_name": "all-MiniLM-L6-v2"
},
"ge": "GraphEmbedding()",
"node_embeddings": "ge.embed_graph(kg.graph)",
"kg": "KnowledgeGraph",
"kg.graph": "graph",
"graph": "graph.nodes()",
"node": "node",
"embeddings": "{}",
"self.model": "SentenceTransformer(model_name)",
"self.model.encode(node)": "np.array",
"np.array": "embeddings[node]"
}
}
```


In [28]:
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser

adj_matrix_schema = ResponseSchema(name="Adjacency_matrix",
                                   description="Adjacency matrix of the code",
                                   type="List")

response_schemas = [adj_matrix_schema]

In [29]:
output_parser = StructuredOutputParser.from_response_schemas(response_schemas=response_schemas)

In [30]:
format_inst = output_parser.get_format_instructions()

In [31]:
print(format_inst)

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"Adjacency_matrix": List  // Adjacency matrix of the code
}
```


In [50]:
new_prompt = """
Extract entities and relationships from the following code. Return the entities and relationships in a adjacency matrix format. Make sure the adjacency matrix should represent a Directed Graph.

In the adjacency matrix where:
- Rows and columns represent the entities (nodes).
- Matrix elements represent the relationships between entities.
- Use 0 if there's no direct relationship.
- Use a word or short phrase to describe the relationship if one exists.


Code :
``` 
from sentence_transformers import SentenceTransformer
import numpy as np
from KnowledgeGraph import kg

class GraphEmbedding:
    def __init__(self, model_name='all-MiniLM-L6-v2'):
        self.model = SentenceTransformer(model_name)

    def embed_node(self, node):
        return self.model.encode(node)

    def embed_graph(self, graph):
        embeddings = {}
        for node in graph.nodes():
            embeddings[node] = self.embed_node(node)
        return embeddings

# Usage
ge = GraphEmbedding()
node_embeddings = ge.embed_graph(kg.graph)
```

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"Adjacency_matrix": List  // Adjacency matrix of the code
}
```

"""

In [51]:
new_res = llm.invoke(new_prompt)

In [52]:
print(new_res.content)




In [41]:
res_as_dict = output_parser.parse(new_res.content)

In [42]:
print(res_as_dict)

{'Adjacency_matrix': [['', 'model_name', 'model', 'embed_node', 'embed_graph', 'ge', 'node_embeddings', 'kg', 'graph', 'node', 'embeddings'], ['model_name', 0, 'parameter', 0, 0, 0, 0, 0, 0, 0, 0], ['model', 0, 0, 'attribute', 0, 0, 0, 0, 0, 0, 0], ['embed_node', 0, 0, 0, 0, 0, 0, 0, 0, 'parameter', 0], ['embed_graph', 0, 0, 0, 0, 0, 0, 0, 'parameter', 0, 'return'], ['ge', 0, 0, 0, 0, 0, 'attribute', 0, 0, 0, 0], ['node_embeddings', 0, 0, 0, 0, 0, 0, 0, 0, 0, 'return'], ['kg', 0, 0, 0, 0, 0, 0, 0, 'attribute', 0, 0], ['graph', 0, 0, 0, 0, 0, 0, 0, 0, 0, 'parameter'], ['node', 0, 0, 0, 0, 0, 0, 0, 0, 0, 'parameter'], ['embeddings', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]}


In [43]:
print(type(res_as_dict))

<class 'dict'>


In [44]:
print(res_as_dict['Adjacency_matrix'])

[['', 'model_name', 'model', 'embed_node', 'embed_graph', 'ge', 'node_embeddings', 'kg', 'graph', 'node', 'embeddings'], ['model_name', 0, 'parameter', 0, 0, 0, 0, 0, 0, 0, 0], ['model', 0, 0, 'attribute', 0, 0, 0, 0, 0, 0, 0], ['embed_node', 0, 0, 0, 0, 0, 0, 0, 0, 'parameter', 0], ['embed_graph', 0, 0, 0, 0, 0, 0, 0, 'parameter', 0, 'return'], ['ge', 0, 0, 0, 0, 0, 'attribute', 0, 0, 0, 0], ['node_embeddings', 0, 0, 0, 0, 0, 0, 0, 0, 0, 'return'], ['kg', 0, 0, 0, 0, 0, 0, 0, 'attribute', 0, 0], ['graph', 0, 0, 0, 0, 0, 0, 0, 0, 0, 'parameter'], ['node', 0, 0, 0, 0, 0, 0, 0, 0, 0, 'parameter'], ['embeddings', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]


In [None]:
[['',          'model_name', 'model',   'embed_node', 'embed_graph', 'ge', 'node_embeddings', 'kg', 'graph', 'node', 'embeddings'], 
 ['model_name',      0,      'parameter', 0,             0,            0,         0,           0,      0,      0,         0      ],
 ['model',           0,         0,      'attribute',     0,            0,         0,           0,      0,      0,         0      ],
 ['embed_node',      0,          0,          0,          0,            0,         0,           0,      0,    'parameter',  0     ],
 ['embed_graph',     0,          0,          0,          0,              0,          0, 0, 'parameter', 0, 'return'], 
 ['ge', 0, 0, 0, 0, 0, 'attribute', 0, 0, 0, 0], 
 ['node_embeddings', 0, 0, 0, 0, 0, 0, 0, 0, 0, 'return'], 
 ['kg', 0, 0, 0, 0, 0, 0, 0, 'attribute', 0, 0], 
 ['graph', 0, 0, 0, 0, 0, 0, 0, 0, 0, 'parameter'], 
 ['node', 0, 0, 0, 0, 0, 0, 0, 0, 0, 'parameter'], 
 ['embeddings', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
