In [13]:
prompt = """
Extract entities and relationships from the following code. Return the nodes and edges in JSON format.

Code :
``` 
from sentence_transformers import SentenceTransformer
import numpy as np
from KnowledgeGraph import kg

class GraphEmbedding:
    def __init__(self, model_name='all-MiniLM-L6-v2'):
        self.model = SentenceTransformer(model_name)

    def embed_node(self, node):
        return self.model.encode(node)

    def embed_graph(self, graph):
        embeddings = {}
        for node in graph.nodes():
            embeddings[node] = self.embed_node(node)
        return embeddings

# Usage
ge = GraphEmbedding()
node_embeddings = ge.embed_graph(kg.graph)
```

Format the output as JSON with the following keys and nothing else:
Adjacency_matrix
"""

In [6]:
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import List, Dict
from llama_index.llms.langchain import LangChainLLM

load_dotenv()

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    tiemout = 15
)

  from .autonotebook import tqdm as notebook_tqdm


In [14]:
response = llm.invoke(prompt)

In [15]:
print(response.content)

```json
{
"Adjacency_matrix": {
"GraphEmbedding": {
"model": "SentenceTransformer",
"__init__": "self.model",
"embed_node": "self.model.encode(node)",
"embed_graph": {
"embeddings": "{}",
"node": "graph.nodes()",
"embeddings[node]": "self.embed_node(node)"
}
},
"SentenceTransformer": {
"model_name": "all-MiniLM-L6-v2"
},
"ge": "GraphEmbedding()",
"node_embeddings": "ge.embed_graph(kg.graph)",
"kg": "KnowledgeGraph",
"kg.graph": "graph",
"graph": "graph.nodes()",
"node": "node",
"embeddings": "{}",
"self.model": "SentenceTransformer(model_name)",
"self.model.encode(node)": "np.array",
"np.array": "embeddings[node]"
}
}
```


In [13]:
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser

nodes_schema = ResponseSchema(name="nodes",
                                   description="List of nodes",
                                   type="List")

edges_schema = ResponseSchema(name="edges",
                                   description="List of edges",
                                   type="List")

response_schemas = [nodes_schema, edges_schema]

In [14]:
output_parser = StructuredOutputParser.from_response_schemas(response_schemas=response_schemas)

In [15]:
format_inst = output_parser.get_format_instructions()

In [16]:
print(format_inst)

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"nodes": List  // List of nodes
	"edges": List  // List of edges
}
```


In [9]:
new_prompt = """
Extract the details from the following Python code and format them as a JSON object. Each class should be a node with its methods as nodes connected by edges. Each method should have nodes for its attributes and return values. Each edge should describe the relationship between nodes.

For Example:

Code :
``` 
from sentence_transformers import SentenceTransformer
import numpy as np
from KnowledgeGraph import kg

class GraphEmbedding:
    def __init__(self, model_name='all-MiniLM-L6-v2'):
        self.model = SentenceTransformer(model_name)

    def embed_node(self, node):
        return self.model.encode(node)

    def embed_graph(self, graph):
        embeddings = {}
        for node in graph.nodes():
            embeddings[node] = self.embed_node(node)
        return embeddings

# Usage
ge = GraphEmbedding()
node_embeddings = ge.embed_graph(kg.graph)
```

Output:
```json
{
  "nodes": [
    {"id": "1", "label": "GraphEmbedding"},
    {"id": "2", "label": "__init__"},
    {"id": "3", "label": "model_name"},
    {"id": "4", "label": "model"},
    {"id": "5", "label": "embed_node"},
    {"id": "6", "label": "node"},
    {"id": "7", "label": "model.encode(node)"},
    {"id": "8", "label": "embed_graph"},
    {"id": "9", "label": "graph"},
    {"id": "10", "label": "embeddings"},
    {"id": "11", "label": "graph.nodes()"},
    {"id": "12", "label": "embed_node(node)"}
  ],
  "edges": [
    {"source": "1", "target": "2", "label": "has method"},
    {"source": "2", "target": "3", "label": "has parameter"},
    {"source": "2", "target": "4", "label": "has attribute"},
    {"source": "1", "target": "5", "label": "has method"},
    {"source": "5", "target": "6", "label": "has parameter"},
    {"source": "5", "target": "7", "label": "returns"},
    {"source": "1", "target": "8", "label": "has method"},
    {"source": "8", "target": "9", "label": "has parameter"},
    {"source": "8", "target": "10", "label": "has attribute"},
    {"source": "8", "target": "11", "label": "has attribute"},
    {"source": "8", "target": "12", "label": "returns"}
  ]
}
```

Given Code:

```
from typing import List, Tuple
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain import OpenAI

# Define the entity and relation data models
class Entity(BaseModel):
    name: str = Field(description="The name of the entity")

class Relation(BaseModel):
    entity1: str = Field(description="The first entity in the relationship")
    relationship: str = Field(description="The type of relationship")
    entity2: str = Field(description="The second entity in the relationship")

class ExtractedData(BaseModel):
    entities: List[Entity] = Field(description="List of entities")
    relations: List[Relation] = Field(description="List of relations")

```

"""

In [17]:
new_res = llm.invoke(new_prompt)

In [18]:
print(new_res.content)

```json
{
  "nodes": [
    {
      "id": "1",
      "label": "Entity"
    },
    {
      "id": "2",
      "label": "name"
    },
    {
      "id": "3",
      "label": "Relation"
    },
    {
      "id": "4",
      "label": "entity1"
    },
    {
      "id": "5",
      "label": "relationship"
    },
    {
      "id": "6",
      "label": "entity2"
    },
    {
      "id": "7",
      "label": "ExtractedData"
    },
    {
      "id": "8",
      "label": "entities"
    },
    {
      "id": "9",
      "label": "relations"
    }
  ],
  "edges": [
    {
      "source": "1",
      "target": "2",
      "label": "has attribute"
    },
    {
      "source": "3",
      "target": "4",
      "label": "has attribute"
    },
    {
      "source": "3",
      "target": "5",
      "label": "has attribute"
    },
    {
      "source": "3",
      "target": "6",
      "label": "has attribute"
    },
    {
      "source": "7",
      "target": "8",
      "label": "has attribute"
    },
    {
      "source": "7

In [19]:
res_as_dict = output_parser.parse(new_res.content)

In [20]:
print(res_as_dict)

{'nodes': [{'id': '1', 'label': 'Entity'}, {'id': '2', 'label': 'name'}, {'id': '3', 'label': 'Relation'}, {'id': '4', 'label': 'entity1'}, {'id': '5', 'label': 'relationship'}, {'id': '6', 'label': 'entity2'}, {'id': '7', 'label': 'ExtractedData'}, {'id': '8', 'label': 'entities'}, {'id': '9', 'label': 'relations'}], 'edges': [{'source': '1', 'target': '2', 'label': 'has attribute'}, {'source': '3', 'target': '4', 'label': 'has attribute'}, {'source': '3', 'target': '5', 'label': 'has attribute'}, {'source': '3', 'target': '6', 'label': 'has attribute'}, {'source': '7', 'target': '8', 'label': 'has attribute'}, {'source': '7', 'target': '9', 'label': 'has attribute'}]}


In [21]:
print(type(res_as_dict))

<class 'dict'>


In [22]:
print(res_as_dict['nodes'])

[{'id': '1', 'label': 'Entity'}, {'id': '2', 'label': 'name'}, {'id': '3', 'label': 'Relation'}, {'id': '4', 'label': 'entity1'}, {'id': '5', 'label': 'relationship'}, {'id': '6', 'label': 'entity2'}, {'id': '7', 'label': 'ExtractedData'}, {'id': '8', 'label': 'entities'}, {'id': '9', 'label': 'relations'}]
