# Creating Knowledge graphs from Pdf files

In [1]:
%pip install -qU langchain-openai

Note: you may need to restart the kernel to use updated packages.


In [2]:
# =============================
# **1. Import Necessary Libraries**
# =============================

import os
import re
import gc
import torch
from torch import cuda, bfloat16
from transformers import (
    AutoModelForSeq2SeqLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    StoppingCriteria,
    StoppingCriteriaList,
    pipeline,
)
from langchain.vectorstores.neo4j_vector import Neo4jVector
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.graphs import Neo4jGraph
from langchain.schema import Document
from langchain.llms import HuggingFacePipeline
from langchain.chains import GraphCypherQAChain
from langchain.chains.base import Chain
from langchain.chains.llm import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.chains.question_answering.stuff_prompt import CHAT_PROMPT
from langchain.callbacks.manager import CallbackManagerForChainRun
from typing import Any, Dict, List
from pydantic import BaseModel, Field
from dotenv import load_dotenv
from langchain_community.graphs.graph_document import Node, Relationship, GraphDocument
from neo4j import GraphDatabase
import logging

# =============================
# **2. Setup Logging and Environment Variables**
# =============================

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Load environment variables from .env file
load_dotenv()

# Define Neo4j connection parameters
NEO4J_URI = os.getenv("NEO4J_URI", "bolt://localhost:7687")
NEO4J_USER = os.getenv("NEO4J_USERNAME", "neo4j")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "password")  # Update as needed

# =============================
# **3. Initialize HuggingFace Model and Tokenizer**
# =============================

model_id = 'google/gemma-7b'
device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# Initialize model configuration
model_config = AutoModelForSeq2SeqLM.from_pretrained(
    model_id,
    return_dict=True
)

# BnB Configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

# Load the model with quantization
model = AutoModelForSeq2SeqLM.from_pretrained(
    model_id,
    config=model_config,
    device_map='auto',
    quantization_config=bnb_config,
    low_cpu_mem_usage=True
)

model.eval()

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id)

# =============================
# **4. Define Utility Functions**
# =============================

def bytes_to_giga_bytes(bytes_val):
    return bytes_val / 1024 / 1024 / 1024

def flush():
    gc.collect()
    torch.cuda.empty_cache()
    torch.cuda.reset_peak_memory_stats()

flush()

# Define stopping criteria
stop_list = ['\nHuman:', '\n```\n']
stop_token_ids = [tokenizer(x)['input_ids'] for x in stop_list]
stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids]

class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_ids in stop_token_ids:
            if torch.equal(input_ids[0][-len(stop_ids):], stop_ids):
                return True
        return False

stopping_criteria = StoppingCriteriaList([StopOnTokens()])

# =============================
# **5. Setup Text Generation Pipeline**
# =============================

generate_text = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device=0 if cuda.is_available() else -1,
    temperature=0.3,
    max_new_tokens=512,
    repetition_penalty=1.1,
    stopping_criteria=stopping_criteria,
)

result = generate_text("What are the primary mechanisms underlying antibiotic resistance, and how can we develop strategies to combat it?")
print(result)

from langchain.chains import GraphCypherQAChain

# =============================
# **6. Initialize LLM**
# =============================

llm = HuggingFacePipeline(pipeline=generate_text)

# =============================
# **7. Load and Process Documents**
# =============================

from langchain_community.document_loaders import DirectoryLoader

loader = DirectoryLoader('/content/drive/MyDrive/BioMedical-Dataset', glob="**/*.pdf")
documents = loader.load()

print(f"Number of documents loaded: {len(documents)}")

from langchain.text_splitter import RecursiveCharacterTextSplitter

MARKDOWN_SEPARATORS = [
    "\n#{1,6} ",
    "```\n",
    "\n\\*\\*\\*+\n",
    "\n---+\n",
    "\n___+\n",
    "\n\n",
    "\n",
    " ",
    "",
]
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=30,
    add_start_index=True,
    separators=MARKDOWN_SEPARATORS
)

processed_text_splits = text_splitter.split_documents(documents)

print(f"Number of text splits: {len(processed_text_splits)}")

# =============================
# **8. Generate Embeddings**
# =============================

model_name = "BAAI/bge-base-en-v1.5"
model_kwargs = {"device": "cuda"}
encode_kwargs = {"normalize_embeddings": True}
embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

# =============================
# **9. Initialize Neo4j Graph**
# =============================

graph = Neo4jGraph(
    url=NEO4J_URI,
    username=NEO4J_USER,
    password=NEO4J_PASSWORD
)

# =============================
# **10. Create Vector Index in Neo4j**
# =============================

# Assuming the vector index is already created in Neo4j
# If not, uncomment and run the following code once
'''
graph.query("""
CALL db.index.vector.createNodeIndex(
  'KG-Enhanced-QnA-Biomedical',
  'text_splits',
  'embeddings',
   768,
   'cosine'
)
""")
'''

# =============================
# **11. Display Created Vector Indexes**
# =============================

from neo4j import GraphDatabase

def show_vector_indexes(uri, username, password):
    driver = GraphDatabase.driver(uri, auth=(username, password))
    with driver.session() as session:
        result = session.run("SHOW VECTOR INDEXES")
        for record in result:
            print(record)
    driver.close()

show_vector_indexes(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)

# =============================
# **12. Insert Embeddings into Neo4j**
# =============================

# Define the data to be inserted
chunks = [
    {
        'text': document.page_content,
        'embedding': embeddings.embed_query(document.page_content)
    }
    for document in processed_text_splits if len(document.page_content) > 50
]

# Insert data into Neo4j
def insert_embeddings_into_neo4j(chunks, graph):
    query = """
    UNWIND $data AS row
    CREATE (c:Chunk {text: row.text})
    WITH c, row
    CALL db.create.setVectorProperty(c, 'embedding', row.embedding)
    YIELD node
    RETURN distinct 'done'
    """
    graph.query(query, {'data': chunks})

# Uncomment to insert embeddings
# insert_embeddings_into_neo4j(chunks, graph)

# =============================
# **13. Initialize Neo4jVector**
# =============================

neo4j_vector = Neo4jVector.from_documents(
    processed_text_splits,
    embeddings,
    index_name='KG-Enhanced-QnA-Biomedical',
    url=NEO4J_URI,
    username=NEO4J_USER,
    password=NEO4J_PASSWORD
)

# =============================
# **14. Perform Similarity Search**
# =============================

query = "How can we enhance the specificity and efficiency of CRISPR/Cas9 gene-editing technology to minimize off-target effects and increase its potential for therapeutic applications?"
vector_results = neo4j_vector.similarity_search(query, k=2)

for i, res in enumerate(vector_results):
    print(f"Result {i+1}:")
    print(res.page_content)
    if i != len(vector_results) - 1:
        print()

vector_result = vector_results[0].page_content

# =============================
# **15. Define Custom Neo4jVectorChain**
# =============================

class Neo4jVectorChain(Chain):
    graph: Neo4jGraph = Field(exclude=True)
    input_key: str = "query"
    output_key: str = "result"
    embeddings: HuggingFaceBgeEmbeddings = HuggingFaceBgeEmbeddings()
    qa_chain: LLMChain = LLMChain(llm=llm, prompt=CHAT_PROMPT)

    @property
    def input_keys(self) -> List[str]:
        return [self.input_key]

    @property
    def output_keys(self) -> List[str]:
        return [self.output_key]

    def _call(self, inputs: Dict[str, str], run_manager, k=3) -> Dict[str, Any]:
        question = inputs[self.input_key]
        embedding = self.embeddings.embed_query(question)

        context = self.graph.query(vector_search, {'embedding': embedding, 'k': k})
        context = [el['result'] for el in context]

        result = self.qa_chain({"question": question, "context": context})
        final_result = result[self.qa_chain.output_key]
        return {self.output_key: final_result}

# Define the Cypher query for vector search
vector_search = """
WITH $embedding AS e
CALL db.index.vector.queryNodes('KG-Enhanced-QnA-Biomedical', $k, e) YIELD node, score
RETURN node.text AS result
ORDER BY score DESC
LIMIT $k
"""

# Initialize the custom Neo4jVectorChain
class Neo4jVectorChainCustom(Chain):
    graph: Neo4jGraph
    embeddings: HuggingFaceBgeEmbeddings
    qa_chain: LLMChain
    input_key: str = "query"
    output_key: str = "result"

    @property
    def input_keys(self) -> List[str]:
        return [self.input_key]

    @property
    def output_keys(self) -> List[str]:
        return [self.output_key]

    def _call(self, inputs: Dict[str, str], run_manager=None) -> Dict[str, Any]:
        question = inputs[self.input_key]
        embedding = self.embeddings.embed_query(question)

        context = self.graph.query(
            vector_search,
            {'embedding': embedding, 'k': 3}
        )
        context = [el['result'] for el in context]

        result = self.qa_chain({"question": question, "context": context})
        final_result = result[self.qa_chain.output_key]
        return {self.output_key: final_result}

# Instantiate the custom chain
chain = Neo4jVectorChainCustom(
    graph=graph,
    embeddings=embeddings,
    qa_chain=LLMChain(llm=llm, prompt=CHAT_PROMPT),
    verbose=True
)

# =============================
# **16. Run the Custom Chain for QA**
# =============================

graph_result = chain.run(
    "How can we enhance the specificity and efficiency of CRISPR/Cas9 gene-editing technology to minimize off-target effects and increase its potential for therapeutic applications?"
)

print("QA Result:")
print(graph_result)

# =============================
# **17. Alternative: Using GraphCypherQAChain**
# =============================

chain_alternative = GraphCypherQAChain.from_llm(
    cypher_llm=llm,
    qa_llm=llm,
    graph=graph,
    verbose=True,
    return_intermediate_steps=True,
    validate_cypher=True
)

graph_result_alternative = chain_alternative.run(
    "How can we enhance the specificity and efficiency of CRISPR/Cas9 gene-editing technology to minimize off-target effects and increase its potential for therapeutic applications?"
)

print("Alternative QA Result:")
print(graph_result_alternative)

# =============================
# **18. Clean Up Neo4j Driver**
# =============================



  from .autonotebook import tqdm as notebook_tqdm


OSError: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/google/gemma-7b.
401 Client Error. (Request ID: Root=1-670fc297-3496578037ee00c4171f6528;a3640e26-020f-4726-9e75-77e490ab600b)

Cannot access gated repo for url https://huggingface.co/google/gemma-7b/resolve/main/config.json.
Access to model google/gemma-7b is restricted. You must have access to it and be authenticated to access it. Please log in.

In [3]:
from langchain.graphs import Neo4jGraph

# neo4j_uri = os.environ.get('NEO4J_URI', 'bolt://localhost:7687')
# neo4j_user = os.environ.get('NEO4J_USER', 'neo4j')
# neo4j_password = os.environ.get('NEO4J_PASSWORD', 'password')

# url = "bolt://localhost:7687"
# username ="neo4j"
# password = "password"
# graph = Neo4jGraph(
#     url=url,
#     username=username,
#     password=password
# )

In [4]:
import os
from dotenv import load_dotenv 

load_dotenv()

# Add OpenAI key
import os
api_key = "sk-proj-nFVOfsf7i5TkjCVt2OciT3BlbkFJItRqIcprNW7DWSSLP83u"
os.environ["OPENAI_API_KEY"] = api_key
os.environ["API_KEY_OPENAI"] = api_key


In [None]:
import os
from langchain_community.graphs.graph_document import GraphDocument
from langchain.schema import Document
from typing import List, Optional
from langchain.pydantic_v1 import Field, BaseModel
from neo4j import GraphDatabase

# Set environment variables with the correct password
os.environ["NEO4J_URI"] = "bolt://localhost:7687"
os.environ["NEO4J_USER"] = "neo4j"
os.environ["NEO4J_PASSWORD"] = "password"  # Updated to the correct password

class Property(BaseModel):
    """A single property consisting of key and value"""
    key: str = Field(..., description="key")
    value: str = Field(..., description="value")

# class Node(BaseNode, BaseModel):
#     properties: Optional[List[Property]] = Field(
#         None, description="List of node properties"
#     )

# class Relationship(BaseRelationship, BaseModel):
#     properties: Optional[List[Property]] = Field(
#         None, description="List of relationship properties"
#     )


In [11]:
from langchain_community.graphs.graph_document import Node, Relationship, GraphDocument
from langchain.schema import Document
from typing import Dict, Any
from neo4j import GraphDatabase
from dotenv import load_dotenv
import os
import logging

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Load environment variables from .env file
load_dotenv()

# Define Neo4j connection parameters
NEO4J_URI = os.getenv("NEO4J_URI", "bolt://localhost:7687")
NEO4J_USER = os.getenv("NEO4J_USER", "neo4j")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "password")  # Update as needed

def get_neo4j_driver():
    driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
    return driver

driver = get_neo4j_driver()

def format_property_key(s: str) -> str:
    words = s.split()
    if not words:
        return s
    first_word = words[0].lower()
    capitalized_words = [word.capitalize() for word in words[1:]]
    return "".join([first_word] + capitalized_words)

def map_to_node_data(node: Node) -> Dict[str, Any]:
    """Map the KnowledgeGraph Node to a dictionary suitable for Neo4j."""
    logger.info(f"Mapping Node: {node}")
    if not hasattr(node, 'id') or not hasattr(node, 'type'):
        raise AttributeError("Node is missing 'id' or 'type' attributes")
    if not isinstance(node.id, (str, int)) or not isinstance(node.type, str):
        raise TypeError("'id' must be str or int and 'type' must be a string")
    
    properties = node.properties.copy() if node.properties else {}
    # Optionally add a 'name' property if needed
    if isinstance(node.id, str):
        properties["name"] = node.id.title()
    return {
        "id": node.id,
        "type": node.type.capitalize(),
        "properties": properties
    }

def map_to_relationship_data(rel: Relationship) -> Dict[str, Any]:
    """Map the KnowledgeGraph Relationship to a dictionary suitable for Neo4j."""
    logger.info(f"Mapping Relationship: {rel}")
    if not hasattr(rel, 'type') or not hasattr(rel, 'source') or not hasattr(rel, 'target'):
        raise AttributeError("Relationship is missing 'type', 'source', or 'target' attributes")
    if not isinstance(rel.type, str):
        raise TypeError("'type' attribute must be a string")
    if not isinstance(rel.source, Node) or not isinstance(rel.target, Node):
        raise TypeError("'source' and 'target' must be instances of Node")
    
    source_data = map_to_node_data(rel.source)
    target_data = map_to_node_data(rel.target)
    properties = rel.properties.copy() if rel.properties else {}
    return {
        "source": source_data,
        "target": target_data,
        "type": rel.type,
        "properties": properties
    }

def create_node(tx, node_data):
    logger.info(f"Inserting node: {node_data['id']} of type {node_data['type']}")
    # Inject the label directly using f-strings
    query = (
        f"MERGE (n:`{node_data['type']}` {{id: $id}}) "
        "SET n += $properties "
        "RETURN n"
    )
    result = tx.run(query, id=node_data["id"], properties=node_data["properties"])
    return result.single()

def create_relationship(tx, rel_data):
    logger.info(f"Inserting relationship: {rel_data['type']} between {rel_data['source']['id']} and {rel_data['target']['id']}")
    # Inject the relationship type directly using f-strings
    query = (
        f"MATCH (a:`{rel_data['source']['type']}` {{id: $source_id}}), "
        f"(b:`{rel_data['target']['type']}` {{id: $target_id}}) "
        f"MERGE (a)-[r:`{rel_data['type']}`]->(b) "
        "SET r += $properties "
        "RETURN r"
    )
    result = tx.run(
        query,
        source_id=rel_data["source"]["id"],
        target_id=rel_data["target"]["id"],
        properties=rel_data["properties"]
    )
    return result.single()

def insert_knowledge_graph(knowledge_graph: GraphDocument, driver):
    with driver.session() as session:
        # Insert all nodes
        for node in knowledge_graph.nodes:
            node_data = map_to_node_data(node)
            session.execute_write(create_node, node_data)
        
        # Insert all relationships
        for rel in knowledge_graph.relationships:
            rel_data = map_to_relationship_data(rel)
            session.execute_write(create_relationship, rel_data)

# Example Usage
if __name__ == "__main__":
    # Define your nodes
    node1 = Node(
        id="john_doe",
        type="Person",
        properties={
            "age": "30",
            "birthDate": "1990-01-01",
        }
    )

    node2 = Node(
        id="acme_corp",
        type="Organization",
        properties={
            "industry": "Technology",
        }
    )

    # Define a relationship
    relationship = Relationship(
        source=node1,
        target=node2,
        type="WORKS_AT",
        properties={
            "since": "2015",
        }
    )

    # Create GraphDocument
    knowledge_graph = GraphDocument(
        nodes=[node1, node2],
        relationships=[relationship],
        source=Document(page_content="John Doe works at Acme Corp since 2015.")
    )

    # Insert into Neo4j
    try:
        insert_knowledge_graph(knowledge_graph, driver)
        logger.info("Knowledge graph inserted into Neo4j successfully.")
    except Exception as e:
        logger.error(f"Error inserting knowledge graph: {e}")
    finally:
        driver.close()


INFO:__main__:Mapping Node: id='john_doe' type='Person' properties={'age': '30', 'birthDate': '1990-01-01'}
INFO:__main__:Inserting node: john_doe of type Person
INFO:__main__:Mapping Node: id='acme_corp' type='Organization' properties={'industry': 'Technology'}
INFO:__main__:Inserting node: acme_corp of type Organization
INFO:__main__:Mapping Relationship: source=Node(id='john_doe', type='Person', properties={'age': '30', 'birthDate': '1990-01-01'}) target=Node(id='acme_corp', type='Organization', properties={'industry': 'Technology'}) type='WORKS_AT' properties={'since': '2015'}
INFO:__main__:Mapping Node: id='john_doe' type='Person' properties={'age': '30', 'birthDate': '1990-01-01'}
INFO:__main__:Mapping Node: id='acme_corp' type='Organization' properties={'industry': 'Technology'}
INFO:__main__:Inserting relationship: WORKS_AT between john_doe and acme_corp
INFO:neo4j.notifications:Received notification from DBMS server: {severity: INFORMATION} {code: Neo.ClientNotification.Statem

# Using GPT-3.5-turbo-16k model to create the knowledge graph

In [9]:
import os
from langchain.chains.openai_functions import (
    create_openai_fn_chain,
    create_structured_output_chain,
)
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

# os.environ["OPENAI_API_KEY"] = "sk-"
llm = ChatOpenAI(model="gpt-3.5-turbo-16k", temperature=0)

def get_extraction_chain(
    allowed_nodes: Optional[List[str]] = None,
    allowed_rels: Optional[List[str]] = None
    ):
    prompt = ChatPromptTemplate.from_messages(
        [(
          "system",
          f"""# Knowledge Graph Instructions for GPT-4
## 1. Overview
You are a top-tier algorithm designed for extracting information in structured formats to build a knowledge graph.
- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.
- The aim is to achieve simplicity and clarity in the knowledge graph, making it accessible for a vast audience.
## 2. Labeling Nodes
- **Consistency**: Ensure you use basic or elementary types for node labels.
  - For example, when you identify an entity representing a person, always label it as **"person"**. Avoid using more specific terms like "mathematician" or "scientist".
- **Node IDs**: Never utilize integers as node IDs. Node IDs should be names or human-readable identifiers found in the text.
{'- **Allowed Node Labels:**' + ", ".join(allowed_nodes) if allowed_nodes else ""}
{'- **Allowed Relationship Types**:' + ", ".join(allowed_rels) if allowed_rels else ""}
## 3. Handling Numerical Data and Dates
- Numerical data, like age or other related information, should be incorporated as attributes or properties of the respective nodes.
- **No Separate Nodes for Dates/Numbers**: Do not create separate nodes for dates or numerical values. Always attach them as attributes or properties of nodes.
- **Property Format**: Properties must be in a key-value format.
- **Quotation Marks**: Never use escaped single or double quotes within property values.
- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.
## 4. Coreference Resolution
- **Maintain Entity Consistency**: When extracting entities, it's vital to ensure consistency.
If an entity, such as "John Doe", is mentioned multiple times in the text but is referred to by different names or pronouns (e.g., "Joe", "he"),
always use the most complete identifier for that entity throughout the knowledge graph. In this example, use "John Doe" as the entity ID.
Remember, the knowledge graph should be coherent and easily understandable, so maintaining consistency in entity references is crucial.
## 5. Strict Compliance
Adhere to the rules strictly. Non-compliance will result in termination.
          """),
            ("human", "Use the given format to extract information from the following input: {input}"),
            ("human", "Tip: Make sure to answer in the correct format"),
        ])
    return create_structured_output_chain(KnowledgeGraph, llm, prompt, verbose=False)

Failed to write data to connection IPv4Address(('localhost', 7687)) (ResolvedIPv4Address(('127.0.0.1', 7687)))


In [12]:
def extract_and_store_graph(
    document: Document,
    nodes:Optional[List[str]] = None,
    rels:Optional[List[str]]=None) -> None:
    # Extract graph data using OpenAI functions
    extract_chain = get_extraction_chain(nodes, rels)
    data = extract_chain.invoke(document.page_content)['function']
    # Construct a graph document
    graph_document = GraphDocument(
      nodes = [map_to_base_node(node) for node in data.nodes],
      relationships = [map_to_base_relationship(rel) for rel in data.rels],
      source = document
    )
    # Store information into a graph
    print(graph_document)
    graph.add_graph_documents([graph_document])
    return graph_document

In [13]:

from langchain.text_splitter import TokenTextSplitter

from langchain_community.document_loaders import PyPDFLoader

from datetime import datetime

loader = PyPDFLoader("../data/Football_news.pdf")

start_time = datetime.now()

pages = loader.load_and_split()

# Define chunking strategy
text_splitter = TokenTextSplitter(chunk_size=200, chunk_overlap=20)

# Only take the first 4 pages of the document
documents = text_splitter.split_documents(pages[:4])

  from cryptography.hazmat.primitives.ciphers.algorithms import AES, ARC4


ValueError: File path ../data/Football_news.pdf is not a valid file or url

In [14]:
from tqdm import tqdm

distinct_nodes = set()
relations = []

for i, d in tqdm(enumerate(documents), total=len(documents)):
    graph_document=extract_and_store_graph(d)
    
    #Get distinct nodes
    for node in graph_document.nodes :
        distinct_nodes.add(node.id)
    
    #Get all relations   
    for relation in graph_document.relationships :
        relations.append(relation.type)


NameError: name 'documents' is not defined

In [15]:
import json

end_time = datetime.now() 

LLM = "OpenAI-GPT-3.5-turbo-16k"
file = loader.file_path
processed_time = (end_time - start_time).__str__()

llm_data = {"LLM" : LLM, 
            "File" : loader.file_path, 
            "Processing Time" : processed_time, 
            "Node count" : len(distinct_nodes), 
            "Relation count" : len(relations),
            "Nodes" : list(distinct_nodes),
            "Relations" : relations }

json_file_path = '../data/llm_comparision.json'
with open(json_file_path, 'r') as json_file :
    data = json.load(json_file)
    
if isinstance(data, dict) :
    data = [data]
    
data.append(llm_data)   

with open(json_file_path, 'w') as json_file :
    json.dump(data, json_file, indent=4) 

    
    


NameError: name 'loader' is not defined

Reference Document: https://bratanic-tomaz.medium.com/constructing-knowledge-graphs-from-text-using-openai-functions-096a6d010c17