In [1]:
import re
import os
from langchain.vectorstores.neo4j_vector import Neo4jVector
from langchain.document_loaders import WikipediaLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import (
    CharacterTextSplitter,
    RecursiveCharacterTextSplitter,
)
from langchain.graphs import Neo4jGraph
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from langchain.text_splitter import TokenTextSplitter

from secret import *
import chromadb

os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [2]:
graph = Neo4jGraph(url="bolt://localhost:7687", username="neo4j", password="test1234")
graph.query(
    """
MATCH (n)
DETACH DELETE n
"""
)
graph.refresh_schema()
print(graph.schema)

Node properties are the following:

Relationship properties are the following:

The relationships are the following:



In [3]:
raw_documents = WikipediaLoader(query="Atelectasis").load()
text_splitter = TokenTextSplitter(chunk_size=2048, chunk_overlap=24)
documents = text_splitter.split_documents([raw_documents[0]]) # should I only take the first? which is the most relevant?

In [4]:
len(raw_documents)

25

In [5]:
from tqdm import tqdm
from aug.graph_doc import (
    get_extraction_chain,
    data_to_graph_doc,
    chain_run,
    add_graph_documents,
)
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo-16k", temperature=0)

allowed_nodes = None
allowed_rels = None
# allowed_nodes = ["Symptom", "Disease"]
# allowed_rels = ["CAN_CAUSE", "DESCRIBE", "HAS"]

extract_chain = get_extraction_chain(llm, allowed_nodes, allowed_rels)
gds = []


i = 0
for d in tqdm(documents, total=len(documents)):
    data = chain_run(extract_chain, d.page_content)
    # data = extract_chain.run(d.page_content)
    # graph_document = GraphDocument(
    #     nodes=[map_to_base_node(node) for node in data.nodes],
    #     relationships=[map_to_base_relationship(rel) for rel in data.rels],
    #     source=d,
    # )
    i += 1
    try:
        data = chain_run(extract_chain, d.page_content)
        graph_document = data_to_graph_doc(data, d)
        # add_graph_document(graph, graph_document)
        gds.append(graph_document)
    except:
        print(f"Not working for doc {i}")

add_graph_documents(graph, gds)

graph.refresh_schema()
print(graph.schema)

  warn_deprecated(
  warn_deprecated(
100%|██████████| 1/1 [01:26<00:00, 86.67s/it]


Node properties are the following:
Condition {affects: STRING, name: STRING, description: STRING, definition: STRING, unilateral: STRING, id: STRING},Symptom {description: STRING, id: STRING, name: STRING},Cause {description: STRING, name: STRING, id: STRING},Risk factor {description: STRING, id: STRING, name: STRING},Diagnostic procedure {description: STRING, id: STRING, name: STRING}
Relationship properties are the following:

The relationships are the following:
(:Condition)-[:CAN_CAUSE]->(:Symptom),(:Condition)-[:CAN_CAUSE]->(:Condition),(:Cause)-[:CAN_LEAD_TO]->(:Condition),(:Cause)-[:CAN_CONTRIBUTE_TO]->(:Condition),(:Cause)-[:CAN_CAUSE]->(:Condition),(:Risk factor)-[:ARE_AT_AN_INCREASED_RISK_OF]->(:Condition),(:Diagnostic procedure)-[:MAY_BE_NECESSARY_TO_EVALUATE]->(:Condition),(:Diagnostic procedure)-[:IS_COMMONLY_USED_TO_DETECT]->(:Condition)


In [6]:
from langchain.chains import GraphCypherQAChain
chain = GraphCypherQAChain.from_llm(
    graph=graph,
    cypher_llm=ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k"),
    qa_llm=ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k"),
    validate_cypher=True,  # Validate relationship directions
    verbose=True,
)

In [7]:
lesion = "Atelectasis"
questions = [
    f"What is {lesion}?",
    f"What are the symptoms associated with {lesion}?",
    f"What can cause {lesion}?",
    f"What are the patient’s symptoms that are relevant for {lesion}?",
    f"What are the relevant clinical signs for the etiological diagnosis of {lesion}?",
    f"What are the relevant laboratory data for the etiological diagnosis of {lesion}?",
    f"What are the relevant clinical characteristics for the etiological diagnosis of {lesion}",
    f"What are the patient’s personal relevant history for the etiological diagnosis of {lesion}",
]

In [8]:
res_dict = {}
for q in questions:
    try:
        res = chain.run(q)
        res_dict[q] = res.strip()
    except:
        res_dict[q] = "Generated Cypher Statement is not valid"
# res = chain.run("What is Atelectasis?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (c:Condition {name: "Atelectasis"})
RETURN c[0m
Full Context:
[32;1m[1;3m[{'c': {'name': 'Atelectasis', 'affects': 'part or all of one lung', 'description': 'Atelectasis is a condition where the alveoli are deflated down to little or no volume, as distinct from pulmonary consolidation, in which they are filled with liquid. It is often referred to informally as a collapsed lung, although more accurately it usually involves only a partial collapse, and that ambiguous term is also informally used for a fully collapsed lung caused by a pneumothorax. It is a very common finding in chest X-rays and other radiological studies, and may be caused by normal exhalation or by various medical conditions. Although frequently described as a collapse of lung tissue, atelectasis is not synonymous with a pneumothorax, which is a more specific condition that can cause atelectasis.', 'definition': 'The partial col

In [9]:
for q in questions:
    print(res_dict[q])

Atelectasis is a condition where part or all of one lung is affected. It is characterized by the deflation of the alveoli, which are the tiny air sacs in the lungs. This condition is often referred to as a collapsed lung, although it usually involves only a partial collapse. Atelectasis is a common finding in chest X-rays and other radiological studies. It can be caused by normal exhalation or various medical conditions. It is important to note that atelectasis is not synonymous with a pneumothorax, which is a more specific condition that can cause atelectasis.
The symptoms associated with Atelectasis include Cyanosis and Increased Heart Rate.
Cyanosis and increased heart rate can be potential causes of atelectasis.
The patient's symptoms that are relevant for Atelectasis include Cyanosis and Increased Heart Rate.
The relevant clinical signs for the etiological diagnosis of Atelectasis include Cyanosis and Increased Heart Rate.
The relevant laboratory data for the etiological diagnosis

In [10]:
raise StopIteration()

StopIteration: 

In [None]:
chain.run("What is Atelectasis?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (c:Condition {name: 'Atelectasis'})
RETURN c[0m
Full Context:
[32;1m[1;3m[{'c': {'name': 'Atelectasis', 'causedBy': 'normal exhalation or various medical conditions', 'affects': 'part or all of one lung', 'description': 'A condition where the alveoli are deflated down to little or no volume, as distinct from pulmonary consolidation, in which they are filled with liquid', 'definition': 'The collapse or closure of a lung resulting in reduced or absent gas exchange', 'unilateral': 'true', 'id': 'Atelectasis', 'alsoKnownAs': 'collapsed lung'}}][0m

[1m> Finished chain.[0m


'Atelectasis is a condition where part or all of one lung collapses or closes, resulting in reduced or absent gas exchange. It can be caused by normal exhalation or various medical conditions. The alveoli, which are small air sacs in the lungs, become deflated down to little or no volume. Atelectasis is also known as a collapsed lung.'

In [None]:
chain.run("What are the symptoms of Atelectasis?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (c:Condition {name: 'Atelectasis'})-[:HAS]->(s:Section)-[:INCLUDES]->(sym:Symptom)
RETURN sym.name[0m
Full Context:
[32;1m[1;3m[{'sym.name': None}, {'sym.name': None}, {'sym.name': None}, {'sym.name': None}, {'sym.name': None}, {'sym.name': None}, {'sym.name': None}][0m

[1m> Finished chain.[0m


"I'm sorry, but I don't have the information to answer your question about the symptoms of Atelectasis."

In [None]:
chain.run("What can cause Atelectasis?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (c:Condition {name: 'Atelectasis'})-[:HAS]->(s:Section)-[:INCLUDES]->(cause:Cause)
RETURN cause.name[0m
Full Context:
[32;1m[1;3m[{'cause.name': None}, {'cause.name': None}, {'cause.name': None}, {'cause.name': None}, {'cause.name': None}, {'cause.name': None}][0m

[1m> Finished chain.[0m


"I'm sorry, but I don't have any information on what can cause Atelectasis."

In [None]:
chain.run("What clinical features radiologists need for diagnosing Atelectasis?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (c:Condition {name: 'Atelectasis'})-[:HAS]->(s:Section)-[:INCLUDES]->(sym:Symptom)
RETURN sym.name AS ClinicalFeaturesRadiologistsNeed[0m
Full Context:
[32;1m[1;3m[{'ClinicalFeaturesRadiologistsNeed': None}, {'ClinicalFeaturesRadiologistsNeed': None}, {'ClinicalFeaturesRadiologistsNeed': None}, {'ClinicalFeaturesRadiologistsNeed': None}, {'ClinicalFeaturesRadiologistsNeed': None}, {'ClinicalFeaturesRadiologistsNeed': None}, {'ClinicalFeaturesRadiologistsNeed': None}][0m

[1m> Finished chain.[0m


"I'm sorry, but I don't have the information on the clinical features that radiologists need for diagnosing Atelectasis."

In [None]:
chain.run("what are radiographic features of Atelectasis?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (c:Condition {name: 'Atelectasis'})-[:HAS]->(s:Section)-[:INCLUDES]->(df:Diagnostic finding)
RETURN df.name[0m


ValueError: Generated Cypher Statement is not valid
{code: Neo.ClientError.Statement.SyntaxError} {message: Invalid input 'finding': expected ")", "WHERE", "{" or a parameter (line 1, column 91 (offset: 90))
"MATCH (c:Condition {name: 'Atelectasis'})-[:HAS]->(s:Section)-[:INCLUDES]->(df:Diagnostic finding)"
                                                                                           ^}