In [1]:
import pandas as pd
import json
import os
from dotenv import load_dotenv
from langchain_community.graphs import Neo4jGraph
# from langchain_community.chat_models import ChatOllama
from langchain.document_loaders import WikipediaLoader
from langchain_community.llms import Ollama
from langchain.chains import LLMChain
from langchain.prompts.chat import (ChatPromptTemplate,HumanMessagePromptTemplate,SystemMessagePromptTemplate)
from langchain import PromptTemplate
from langchain_core.prompts import FewShotPromptTemplate, PromptTemplate
from langchain.schema import (SystemMessage,HumanMessage,AIMessage)
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_groq import ChatGroq
load_dotenv()

True

In [3]:
groq_api = os.getenv("GROQ_API_KEY")

# Neo4j 
neo4j_url = os.getenv("NEO4J_CONNECTION_URL")
neo4j_user = os.getenv("NEO4J_USER")
neo4j_password = os.getenv("NEO4J_PASSWORD")

# https://api.python.langchain.com/en/latest/graphs/langchain_community.graphs.neo4j_graph.Neo4jGraph.html
graph = Neo4jGraph(neo4j_url,neo4j_user,neo4j_password)


# How to Load Any Text?

1. Text Loader (.txt, .md)

In [4]:
from langchain.document_loaders import TextLoader
loader = TextLoader('predictions1.txt')
documents = loader.load()
print(documents)

[Document(page_content='*Live* Commonwealth Health Cor\nPrint Scanned Documents\nPage: 4\nRussell,Mary Kathleen\nL00041735804\nDate: 02/22/24 16:16\nRafeedheen,Pahil\nInfo From Other Facilities - Page 4\nRUN DATE: 02/13/24\nEPS *LIVE*\nPAGE 1\nRUN TIME:\n1055\nMedications Active Data\nRUN UEER:\nA.U3:RF1\nRUSSELL, MARY, H., 187 / Fem\n\nIN -\n\nCARATICC11/A\nAAA\n1. ADM\n..\n165 , 1 ccm, 65 , 81 kg,\nA\nA0095493/Acct No. AA0814683113.5.\nTimhibitor\n. Unit No\n..\n1. See\nResuscitation Status;\nRESSION\nFULL ICODE\nCA\nAllergies/ADRBi\nStatins HMG COA.\nPenicillins,\nle and\ne and the\ne More Data May Bxist Than,\n..\n13 X New York Applications\ndinteger of t\nl Will Active prod\nDiiiis\nAll\namedication w\nsig/Sch\nss\n. Cosc\nStatus\nLast Admin/\nGeneric (Trade)\nRoute\nStop\n2.555\nAmmmm\nDose\nA Mupirocin\nIGM\n1\nHID\nFeb 13,24\nFebility 24, 08:254\n"Content GM"\n09\nCKD\nBACTROBAN\' NAS\nland\nNASAL\nFeb 17,24\n21\n,GM\nIt of 199\nAVER\nDose\n10 doses\n. Ketorolac Trom\n15 MG\nON

2. PDF Loader

In [6]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("DischargePDF_compressed01.pdf")
pages = loader.load_and_split()
print(pages)

[]


3. Website Loader

In [12]:
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader("https://ahoyconsulting.in/")
pages = loader.load()
print(pages)

[Document(page_content="Study In Czech Republic Consulatant For Indian Students |Ahoy ConsultingPraguemenumenumenucloseclosecloseStudy in,CZECHBook A CallExplore CzechExplore the beauty of czechDiscover, Learn, Thrive: Your Academic Journey in the Heart of Europe Begins Herewhy CZECHRich Academic HeritageExplore centuries-old institutions that have cultivated academic excellence. \n        Czech universities boast a distinguished history of scholarship and research, \n        with many renowned scholars contributing to their legacy. Choose from a diverse range of programs \n        taught in English, ensuring an inclusive and international learning environment that prepares you for a global future.why CZECHCultural ImmersionImmerse yourself in the captivating culture of Czech Republic, where every corner tells a story. \n        From the grandeur of historic castles to the vibrancy of local festivals, you will find yourself\n        surrounded by a rich tapestry of traditions. Engage w

4. Wikipedia Loader

In [14]:
from langchain.document_loaders import WikipediaLoader
query = "Sam Altman"
pages = WikipediaLoader(query=query).load()
print(pages)



5. YouTube Transcript Loader

In [15]:
from langchain_community.document_loaders import YoutubeLoader
loader = YoutubeLoader.from_youtube_url("https://youtu.be/KMXQ4SVLwmo", add_video_info=False)
pages = loader.load()
print(pages)

[Document(page_content="hey wison yeah what is knowledge craft do we need it to enhance our LM performance oh and also do you know how to integrate it with L chain okay guys hold on take it easy I will explain to you in detail step by step stuff from the per and how to set up our Na 4y databas and then how to integrate it using L chain and of course all of that we will use an open source all app so without further ado let get started what is no squas Once Upon a Time way back in 1736 there was a sweet M named Leonard eer who faced a mindbending challenge the seven breach of kbur problem is there a way to walk across all bries ones starting and ending at the same place eer heis something more crucial what matter was how things were connected so you turn the city's lanmark into dots or nodes and its preaches into lives for ages creating a neat little Network known as the origin of the graph Theory story time is both for SP hold on instead of just buing about landmarks and Brides now Las 

# Load & Summarize Data

In [3]:
query = "Tim Cook"
raw_documents = WikipediaLoader(query=query).load()
raw_documents



  lis = BeautifulSoup(html).find_all('li')


[Document(page_content='Timothy Donald Cook (born November 1, 1960) is an American business executive who is the current chief executive officer of Apple Inc. Cook had previously been the company\'s chief operating officer under its co-founder Steve Jobs. Cook joined Apple in March 1998 as a senior vice president for worldwide operations, and then as executive vice president for worldwide sales and operations. He was appointed chief executive on August 24, 2011 after Jobs, who was ill and died that October, resigned. During his tenure as the chief executive, he has advocated for the political reform of international and domestic surveillance, cybersecurity, American manufacturing, and environmental preservation. \nSince 2011 when he took over Apple, to 2020, Cook doubled the company\'s revenue and profit, and the company\'s market value increased from $348 billion to $1.9 trillion. Cook is also on the boards of directors of Nike, Inc. and the National Football Foundation; he is a trust

In [14]:
# filtered_raw_documents = [raw_documents[i] for i in [0,1,4,7,8,9,10,12,13]] #0: Tim Cook (person), 1: Apple (company), 4: Mac (product), 10: Research, 11: Apple Maps, 13: App Store, 7: Apple TV, 8: Steve Jobs, 13: iPhone
docs = " ".join([d.page_content for d in documents]).replace("\n", "").replace("==", "")
print(docs)

*Live* Commonwealth Health CorPrint Scanned DocumentsPage: 4Russell,Mary KathleenL00041735804Date: 02/22/24 16:16Rafeedheen,PahilInfo From Other Facilities - Page 4RUN DATE: 02/13/24EPS *LIVE*PAGE 1RUN TIME:1055Medications Active DataRUN UEER:A.U3:RF1RUSSELL, MARY, H., 187 / FemIN -CARATICC11/AAAA1. ADM..165 , 1 ccm, 65 , 81 kg,AA0095493/Acct No. AA0814683113.5.Timhibitor. Unit No..1. SeeResuscitation Status;RESSIONFULL ICODECAAllergies/ADRBiStatins HMG COA.Penicillins,le ande and thee More Data May Bxist Than,..13 X New York Applicationsdinteger of tl Will Active prodDiiiisAllamedication wsig/Schss. CoscStatusLast Admin/Generic (Trade)RouteStop2.555AmmmmDoseA MupirocinIGM1HIDFeb 13,24Febility 24, 08:254"Content GM"09CKDBACTROBAN' NASlandNASALFeb 17,2421,GMIt of 199AVERDose10 doses. Ketorolac Trom15 MGONGE ONEFeb13,24-04-04-09-DO, Peb 13, 24, 205, 04Summer SRMG ServiceIV13,24Feb.04 :AmYodipine>Bes5 MGBIDFeb12,2454, 24, 24, 54, 54, 55, MG21 00(NORVASC, ISMG, TPO Metoprolol Tar25 MGBIDFe

In [6]:
# filtered_raw_documents

[Document(page_content='Timothy Donald Cook (born November 1, 1960) is an American business executive who is the current chief executive officer of Apple Inc. Cook had previously been the company\'s chief operating officer under its co-founder Steve Jobs. Cook joined Apple in March 1998 as a senior vice president for worldwide operations, and then as executive vice president for worldwide sales and operations. He was appointed chief executive on August 24, 2011 after Jobs, who was ill and died that October, resigned. During his tenure as the chief executive, he has advocated for the political reform of international and domestic surveillance, cybersecurity, American manufacturing, and environmental preservation. \nSince 2011 when he took over Apple, to 2020, Cook doubled the company\'s revenue and profit, and the company\'s market value increased from $348 billion to $1.9 trillion. Cook is also on the boards of directors of Nike, Inc. and the National Football Foundation; he is a trust

In [16]:
from langchain.text_splitter import RecursiveCharacterTextSplitter


text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=500, chunk_overlap=30
)
split_docs = text_splitter.create_documents([docs])
split_docs

[Document(page_content='*Live* Commonwealth Health CorPrint Scanned DocumentsPage: 4Russell,Mary KathleenL00041735804Date: 02/22/24 16:16Rafeedheen,PahilInfo From Other Facilities - Page 4RUN DATE: 02/13/24EPS *LIVE*PAGE 1RUN TIME:1055Medications Active DataRUN UEER:A.U3:RF1RUSSELL, MARY, H., 187 / FemIN -CARATICC11/AAAA1. ADM..165 , 1 ccm, 65 , 81 kg,AA0095493/Acct No. AA0814683113.5.Timhibitor. Unit No..1. SeeResuscitation Status;RESSIONFULL ICODECAAllergies/ADRBiStatins HMG COA.Penicillins,le ande and thee More Data May Bxist Than,..13 X New York Applicationsdinteger of tl Will Active prodDiiiisAllamedication wsig/Schss. CoscStatusLast Admin/Generic (Trade)RouteStop2.555AmmmmDoseA MupirocinIGM1HIDFeb 13,24Febility 24, 08:254"Content GM"09CKDBACTROBAN\' NASlandNASALFeb 17,2421,GMIt of 199AVERDose10 doses. Ketorolac Trom15 MGONGE ONEFeb13,24-04-04-09-DO, Peb 13, 24, 205, 04Summer SRMG ServiceIV13,24Feb.04 :AmYodipine>Bes5 MGBIDFeb12,2454, 24, 24, 54, 54, 55, MG21 00(NORVASC, ISMG, TPO

In [20]:
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.chains.llm import LLMChain
from langchain.chains import MapReduceDocumentsChain, ReduceDocumentsChain
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter


#DOING MAP REDUCE HERE : https://js.langchain.com/v0.1/docs/modules/chains/document/map_reduce/

# llm = Ollama(model="mistral") # Define the mistral model
llm = ChatGroq(temperature=0, model_name="mixtral-8x7b-32768")
# Define the map prompt template
map_template = """The following is a set of documents
{all_data}
Based on this list of docs, please find the important information from it (focus on entities and relationship)
Helpful Answer:"""
map_prompt = PromptTemplate.from_template(map_template)

# Define the map_chain
map_chain = LLMChain(llm=llm, prompt=map_prompt)

reduce_template = """The following is set of summaries:
{all_data}
Take these and distill it into a final, consolidated summary of the main themes. In one final paragraph
Helpful Answer:"""
reduce_prompt = PromptTemplate.from_template(reduce_template)
reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)

combine_documents_chain = StuffDocumentsChain(
    llm_chain=reduce_chain,
    document_variable_name="all_data"  # This should match the variable name in reduce_prompt
)

# Combines and iteravely reduces the mapped documents
reduce_documents_chain = ReduceDocumentsChain(
    # This is final chain that is called.
    combine_documents_chain=combine_documents_chain,
    # If documents exceed context for `StuffDocumentsChain`
    collapse_documents_chain=combine_documents_chain,
    # The maximum number of tokens to group documents into.
    token_max=1024,
)

# Combining documents by mapping a chain over them, then combining results
map_reduce_chain = MapReduceDocumentsChain(
    # Map chain
    llm_chain=map_chain,
    # Reduce chain
    reduce_documents_chain=reduce_documents_chain,
    # The variable name in the llm_chain to put the documents in
    document_variable_name="all_data",
    # Return the results of the map steps in the output
    return_intermediate_steps=False,
)


# Run the MapReduce Chain
summarization_results = map_reduce_chain.run(split_docs)

  from .autonotebook import tqdm as notebook_tqdm
None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Token indices sequence length is longer than the specified maximum sequence length for this model (5570 > 1024). Running this sequence through the model will result in indexing errors


In [21]:
file_path = "summary.txt"

with open(file_path, 'a') as file:
    file.write(summarization_results)

# Extract Information

In [22]:
from langchain_groq import ChatGroq

groq_api = os.getenv("GROQ_API_KEY")

In [24]:
entity_types = ['person','school','award','company','product','characteristic']
relation_types = ['alumniOf','worksFor','hasAward','isProducedBy','hasCharacteristic','acquired','hasProject','isFounderOf']

system_prompt = PromptTemplate(
    template = """
    You are a top-tier algorithm designed for extracting information in structured formats to build a knowledge graph.
    Your task is to identify the entities and relations requested with the user prompt, from a given text.
    You must generate the output in a JSON containing a list with JSON objects having the following keys: "head", "head_type", "relation", "tail", and "tail_type".
    The "head" key must contain the text of the extracted entity with one of the types from the provided list in the user prompt. 
    The "head_type" key must contain the type of the extracted head entity which must be similar to this {entity_types} Can vary depending on the data.
    The "relation" key must contain the type of relation between the "head" and the "tail" which must be simlar to the relations from {relation_types}, Can vary depending on the data at hand.
    The "tail" key must represent the text of an extracted entity which is the tail of the relation, and the "tail_type" key must contain the type of the tail entity similar to that of {entity_types}. 
    Attempt to extract as many entities and relations as you can. 
    
    IMPORTANT NOTES:
    - Don't add any explanation and text. 
    """,
    input_variables=["entity_types","relation_types"],
)


system_message_prompt = SystemMessagePromptTemplate(prompt = system_prompt)

examples = [
        {
            "text":"Adam is a software engineer in Microsoft since 2009, and last year he got an award as the Best Talent" ,    
            "head": "Adam",
            "head_type": "person",
            "relation": "worksFor",
            "tail": "Microsoft",
            "tail_type": "company"
        },
        {
            "text":"Adam is a software engineer in Microsoft since 2009, and last year he got an award as the Best Talent" ,    
            "head": "Adam",
            "head_type": "person",
            "relation": "hasAward",
            "tail": "Best Talent",
            "tail_type": "award"
        },
        {
            "text":"Microsoft is a tech company that provide several products such as Microsoft Word" ,    
            "head": "Microsoft Word",
            "head_type": "product",
            "relation": "isproducedBy",
            "tail": "Microsoft",
            "tail_type": "company"
        },
        {
            "text":"Microsoft Word is a lightweight app that accessible offline" ,    
            "head": "Microsoft Word",
            "head_type": "product",
            "relation": "hasCharacteristic",
            "tail": "lightweight app",
            "tail_type": "characteristic"
        },
        {
            "text":"Microsoft Word is a lightweight app that accessible offline" ,    
            "head": "Microsoft Word",
            "head_type": "product",
            "relation": "hasCharacteristic",
            "tail": "accesible offline",
            "tail_type": "characteristic"
        },
    ]

class ExtractedInfo(BaseModel):
    head: str = Field(description="extracted first or head entity like Microsoft, Apple, John")
    head_type: str = Field(description="type of the extracted head entity like person, company, etc")
    relation: str = Field(description="relation between the head and the tail entities")
    tail: str = Field(description="extracted second or tail entity like Microsoft, Apple, John")
    tail_type: str = Field(description="type of the extracted tail entity like person, company, etc")
    
parser = JsonOutputParser(pydantic_object=ExtractedInfo)

human_prompt = PromptTemplate(
    template = """ Based on the following example, extract entities and relations from the provided text.\n\n

    Use the following entity types, don't use other entity that is not defined below:
    # ENTITY TYPES:
    {entity_types}

    Use the following relation types, don't use other relation that is not defined below:
    # RELATION TYPES:
    {relation_types}

    Below are a number of examples of text and their extracted entities and relationshhips.
    {examples}

    For the following text, generate extract entitites and relations as in the provided example.\n{format_instructions}\nText: {text}""",
    input_variables=["entity_types","relation_types","examples","text"],
    # input_variables=["examples","text"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

human_message_prompt = HumanMessagePromptTemplate(prompt=human_prompt)

chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])


# model = ChatOllama(model = "mistral",temperature=0)
# model = ChatOllama(model = "llama3",temperature=0)    
model = ChatGroq(temperature=0, model_name="llama3-70b-8192")
chain = LLMChain(llm=model, prompt=chat_prompt)

In [25]:
parser.get_format_instructions()

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"head": {"title": "Head", "description": "extracted first or head entity like Microsoft, Apple, John", "type": "string"}, "head_type": {"title": "Head Type", "description": "type of the extracted head entity like person, company, etc", "type": "string"}, "relation": {"title": "Relation", "description": "relation between the head and the tail entities", "type": "string"}, "tail": {"title": "Tail", "description": "extracted second or tail entity like Microsoft, Apple, John", "type": "string"}, "tail_type": {"title": "Tail

In [26]:
chain

LLMChain(prompt=ChatPromptTemplate(input_variables=['entity_types', 'examples', 'relation_types', 'text'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['entity_types', 'relation_types'], template='\n    You are a top-tier algorithm designed for extracting information in structured formats to build a knowledge graph.\n    Your task is to identify the entities and relations requested with the user prompt, from a given text.\n    You must generate the output in a JSON containing a list with JSON objects having the following keys: "head", "head_type", "relation", "tail", and "tail_type".\n    The "head" key must contain the text of the extracted entity with one of the types from the provided list in the user prompt. \n    The "head_type" key must contain the type of the extracted head entity which must be similar to this {entity_types} Can vary depending on the data.\n    The "relation" key must contain the type of relation between the "head" and the "tail" 

In [28]:
# file_path = "clean_summary.txt"
# with open(file_path, 'r') as file:
#     # Read the entire file contents into a string
#     file_contents = file.read()

# # Split the file contents into sentences
# sentences = file_contents.split('. ')

result = []
# Iterate over each sentence
for sentence in split_docs:
    # Process each sentence
    # response  = chain.run(examples = examples, text = docs)
    response  = chain.run(entity_types = entity_types, relation_types = relation_types, examples = examples, text = sentence)
    print(response)
    try:
        result.extend(eval(response))
    except:
        pass

[
    {'head': 'Mary Kathleen Russell', 'head_type': 'person', 'relation': 'hasCharacteristic', 'tail': 'Female', 'tail_type': 'characteristic'},
    {'head': 'Mary Kathleen Russell', 'head_type': 'person', 'relation': 'hasCharacteristic', 'tail': '65', 'tail_type': 'characteristic'},
    {'head': 'Mary Kathleen Russell', 'head_type': 'person', 'relation': 'hasCharacteristic', 'tail': '81 kg', 'tail_type': 'characteristic'},
    {'head': 'Mary Kathleen Russell', 'head_type': 'person', 'relation': 'hasCharacteristic', 'tail': '165 cm', 'tail_type': 'characteristic'},
    {'head': 'Mary Kathleen Russell', 'head_type': 'person', 'relation': 'hasAward', 'tail': 'AA0095493', 'tail_type': 'award'},
    {'head': 'Mary Kathleen Russell', 'head_type': 'person', 'relation': 'hasAward', 'tail': 'AA0814683', 'tail_type': 'award'},
    {'head': 'Mupirocin', 'head_type': 'product', 'relation': 'isProducedBy', 'tail': 'NASAL', 'tail_type': 'company'},
    {'head': 'Ketorolac Trom', 'head_type': 'prod

In [29]:
result

[{'head': 'Mary Kathleen Russell',
  'head_type': 'person',
  'relation': 'hasCharacteristic',
  'tail': 'Female',
  'tail_type': 'characteristic'},
 {'head': 'Mary Kathleen Russell',
  'head_type': 'person',
  'relation': 'hasCharacteristic',
  'tail': '65',
  'tail_type': 'characteristic'},
 {'head': 'Mary Kathleen Russell',
  'head_type': 'person',
  'relation': 'hasCharacteristic',
  'tail': '81 kg',
  'tail_type': 'characteristic'},
 {'head': 'Mary Kathleen Russell',
  'head_type': 'person',
  'relation': 'hasCharacteristic',
  'tail': '165 cm',
  'tail_type': 'characteristic'},
 {'head': 'Mary Kathleen Russell',
  'head_type': 'person',
  'relation': 'hasAward',
  'tail': 'AA0095493',
  'tail_type': 'award'},
 {'head': 'Mary Kathleen Russell',
  'head_type': 'person',
  'relation': 'hasAward',
  'tail': 'AA0814683',
  'tail_type': 'award'},
 {'head': 'Mupirocin',
  'head_type': 'product',
  'relation': 'isProducedBy',
  'tail': 'NASAL',
  'tail_type': 'company'},
 {'head': 'Ketor

# Convert to Cypher Query

In [33]:
# with open("C:/Users/Geraldus Wilsen/Documents/Portfolio/KnowledgeGraphLLM/tutorial/2/clean_result.txt", "r") as file:
#     content = file.read()
entity_relations = eval(response)
# entity_relations = eval(content)
print(entity_relations)

[{'text': 'page_content', 'head': 'Pulse Ox', 'head_type': 'product', 'relation': 'hasCharacteristic', 'tail': '961', 'tail_type': 'characteristic'}, {'text': 'page_content', 'head': 'Pulse Ox', 'head_type': 'product', 'relation': 'hasCharacteristic', 'tail': '127', 'tail_type': 'characteristic'}, {'text': 'page_content', 'head': 'Pulse', 'head_type': 'product', 'relation': 'hasCharacteristic', 'tail': '681', 'tail_type': 'characteristic'}, {'text': 'page_content', 'head': 'Room', 'head_type': 'product', 'relation': 'hasCharacteristic', 'tail': 'ail', 'tail_type': 'characteristic'}, {'text': 'page_content', 'head': 'Temp', 'head_type': 'product', 'relation': 'hasCharacteristic', 'tail': '97.6', 'tail_type': 'characteristic'}, {'text': 'page_content', 'head': 'Resp', 'head_type': 'product', 'relation': 'hasCharacteristic', 'tail': '201', 'tail_type': 'characteristic'}, {'text': 'page_content', 'head': 'Review of Vital Signs', 'head_type': 'product', 'relation': 'hasCharacteristic', 'tai

In [34]:
df = pd.DataFrame(entity_relations)
df

Unnamed: 0,text,head,head_type,relation,tail,tail_type
0,page_content,Pulse Ox,product,hasCharacteristic,961,characteristic
1,page_content,Pulse Ox,product,hasCharacteristic,127,characteristic
2,page_content,Pulse,product,hasCharacteristic,681,characteristic
3,page_content,Room,product,hasCharacteristic,ail,characteristic
4,page_content,Temp,product,hasCharacteristic,97.6,characteristic
5,page_content,Resp,product,hasCharacteristic,201,characteristic
6,page_content,Review of Vital Signs,product,hasCharacteristic,Reviewed,characteristic
7,page_content,Basic Physical Exam,product,hasCharacteristic,Basic PE GEN,characteristic
8,page_content,HEAD,product,hasCharacteristic,Atraumatic/NC,characteristic
9,page_content,EYES,product,hasCharacteristic,PERRL,characteristic


In [35]:
unique_entities = set()
for item in entity_relations:
    unique_entities.add((item['head'], item['head_type']))
    unique_entities.add((item['tail'], item['tail_type']))

unique_entities_list = list(unique_entities)
print(unique_entities_list)

[('NL thought content', 'characteristic'), ('PSYCH', 'product'), ('Reg rate & rnythm', 'characteristic'), ('aleoriented', 'characteristic'), ('Carbon Dioxide ||(21.0mnd', 'characteristic'), ('Laboratory Tests', 'product'), ('97.6', 'characteristic'), ('Chloride (98 - 110 mmol/1)10', 'characteristic'), ('Pulse Ox', 'product'), ('HEAD', 'product'), ('PERRL', 'characteristic'), ('ABDS', 'product'), ('EXT', 'product'), ('Chemistry', 'product'), ('gross movementi NL', 'characteristic'), ('Pulse', 'product'), ('02/11/24 1959', 'characteristic'), ('ail', 'characteristic'), ('Reviewed', 'characteristic'), ('Anion Gap (44.BUNE (2.0 -ma/3 of  8', 'characteristic'), ('127', 'characteristic'), ('961', 'characteristic'), ('681', 'characteristic'), ('Resp', 'product'), ('No rashes, warm/dry', 'characteristic'), ('EYES', 'product'), ('Atraumatic/NC', 'characteristic'), ('No gross abnormality', 'characteristic'), ('NECK', 'product'), ('Review of Vital Signs', 'product'), ('NEURO', 'product'), ('EMembr

In [36]:
with open("cypher_query.txt", "a") as file:
    for item in unique_entities_list:
        label, entity = item
        id = label.replace(" ","_").replace("-","").replace("'","").lower()
        merge_statement = f"""MERGE ({id}:{entity} {{id: "{label}"}})\n"""
        file.write(merge_statement)

In [37]:
with open("cypher_query.txt", "a") as file:
    for item in entity_relations:
        head = item['head'].replace(" ","_").replace("-","").replace("'","").lower()
        tail = item['tail'].replace(" ","_").replace("-","").replace("'","").lower()
        cypher = f"""MERGE ({head})-[:{item['relation']}]->({tail})\n"""
        file.write(cypher)

In [38]:
# Neo4j 
neo4j_url = os.getenv("NEO4J_CONNECTION_URL")
neo4j_user = os.getenv("NEO4J_USER")
neo4j_password = os.getenv("NEO4J_PASSWORD")

# https://api.python.langchain.com/en/latest/graphs/langchain_community.graphs.neo4j_graph.Neo4jGraph.html
graph = Neo4jGraph(neo4j_url,neo4j_user,neo4j_password)

ERROR:neo4j.io:Failed to write data to connection ResolvedIPv4Address(('34.126.171.25', 7687)) (ResolvedIPv4Address(('34.126.171.25', 7687)))
ERROR:neo4j.io:Failed to write data to connection IPv4Address(('e9fb3e64.databases.neo4j.io', 7687)) (ResolvedIPv4Address(('34.126.171.25', 7687)))


In [39]:
graph.refresh_schema()
print(graph.schema)

Node properties:

Relationship properties:

The relationships:



In [44]:
with open("C:/Users/Geraldus Wilsen/Documents/Portfolio/KnowledgeGraphLLM/tutorial/2/cypher_query.txt", "r") as file:
    queries = file.read()

graph.query(queries)

[]

In [40]:
graph.refresh_schema()
print(graph.schema)

Node properties:

Relationship properties:

The relationships:

