In [1]:
import re
import os
from langchain.vectorstores.neo4j_vector import Neo4jVector
from langchain.document_loaders import WikipediaLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import (
    CharacterTextSplitter,
    RecursiveCharacterTextSplitter,
)
from langchain.graphs import Neo4jGraph
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from langchain.text_splitter import TokenTextSplitter

from secret import *
import chromadb

os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [2]:
graph = Neo4jGraph(url="bolt://localhost:7687", username="neo4j", password="test1234")
graph.query(
    """
MATCH (n)
DETACH DELETE n
"""
)
graph.refresh_schema()
print(graph.schema)

Node properties are the following:

Relationship properties are the following:

The relationships are the following:



In [3]:
raw_documents = WikipediaLoader(query="Atelectasis").load()
text_splitter = TokenTextSplitter(chunk_size=2048, chunk_overlap=24)
documents = text_splitter.split_documents(raw_documents) # should I only take the first? which is the most relevant?

In [4]:
from tqdm import tqdm
from aug.graph_doc import (
    get_extraction_chain,
    data_to_graph_doc,
    chain_run,
    add_graph_documents,
)
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo-16k", temperature=0)

allowed_nodes = None
allowed_rels = None
# allowed_nodes = ["Symptom", "Disease"]
# allowed_rels = ["CAN_CAUSE", "DESCRIBE", "HAS"]

extract_chain = get_extraction_chain(llm, allowed_nodes, allowed_rels)
gds = []

i = 0
for d in tqdm(documents, total=len(documents)):
    # data = extract_chain.run(d.page_content)
    # graph_document = GraphDocument(
    #     nodes=[map_to_base_node(node) for node in data.nodes],
    #     relationships=[map_to_base_relationship(rel) for rel in data.rels],
    #     source=d,
    # )
    i += 1
    try:
        data = chain_run(extract_chain, d.page_content)
        graph_document = data_to_graph_doc(data, d)
        # add_graph_document(graph, graph_document)
        gds.append(graph_document)
    except:
        print(f"Not working for doc {i}")

add_graph_documents(graph, gds)

graph.refresh_schema()
print(graph.schema)

  warn_deprecated(
  warn_deprecated(
 84%|████████▍ | 21/25 [27:42<03:56, 59.00s/it] Retrying langchain_community.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised Timeout: Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600).
 96%|█████████▌| 24/25 [43:07<03:13, 193.50s/it]

Not working for doc 24


100%|██████████| 25/25 [44:08<00:00, 105.95s/it]


Node properties are the following:
Condition {affects: STRING, name: STRING, description: STRING, definition: STRING, unilateral: STRING, id: STRING, mechanism: STRING, treatment: STRING, epidemiology: STRING, diagnosis: STRING, signsandsymptoms: STRING, cause: STRING, alsoknownas: STRING, causedby: STRING, transmission: STRING, complications: STRING, prevention: STRING, symptoms: STRING},Section {name: STRING, id: STRING, description: STRING},Symptom {description: STRING, id: STRING, name: STRING, temperature: STRING},Cause {description: STRING, name: STRING, id: STRING},Risk factor {description: STRING, id: STRING, name: STRING},Type {name: STRING, id: STRING, description: STRING, characteristics: STRING},Anatomy {id: STRING, name: STRING, definition: STRING, plural: STRING, description: STRING},Part of eardrum {description: STRING, id: STRING, name: STRING},Factor {description: STRING, name: STRING, id: STRING},Information {description: STRING, name: STRING, id: STRING},Concept {nam

In [5]:
from langchain.chains import GraphCypherQAChain
chain = GraphCypherQAChain.from_llm(
    graph=graph,
    cypher_llm=ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k"),
    qa_llm=ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k"),
    validate_cypher=True,  # Validate relationship directions
    verbose=True,
)

In [10]:
lesion = "Atelectasis"
questions = [
    f"What is {lesion}?",
    f"What are the symptoms associated with {lesion}?",
    f"What can cause {lesion}?",
    f"What are the patient’s symptoms that are relevant for {lesion}?",
    f"What are the relevant clinical signs for the etiological diagnosis of {lesion}?",
    f"What are the relevant laboratory data for the etiological diagnosis of {lesion}?",
    f"What are the relevant clinical characteristics for the etiological diagnosis of {lesion}",
    f"What are the patient’s personal relevant history for the etiological diagnosis of {lesion}",
]

In [12]:
res_dict = {}
for q in questions:
    try:
        res = chain.run(q)
        res_dict[q] = res.strip()
    except:
        res_dict[q] = "Generated Cypher Statement is not valid"
# res = chain.run("What is Atelectasis?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (c:Condition {name: "Atelectasis"})
RETURN c[0m
Full Context:
[32;1m[1;3m[{'c': {'name': 'Atelectasis', 'affects': 'part or all of one lung', 'description': 'Atelectasis is a condition where the alveoli are deflated down to little or no volume, as distinct from pulmonary consolidation, in which they are filled with liquid. It is often referred to informally as a collapsed lung, although more accurately it usually involves only a partial collapse, and that ambiguous term is also informally used for a fully collapsed lung caused by a pneumothorax. It is a very common finding in chest X-rays and other radiological studies, and may be caused by normal exhalation or by various medical conditions. Although frequently described as a collapse of lung tissue, atelectasis is not synonymous with a pneumothorax, which is a more specific condition that can cause atelectasis.', 'definition': 'The partial col

In [13]:
for q in questions:
    print(res_dict[q])

Atelectasis is a condition where part or all of one lung is affected. It is characterized by the deflation of the alveoli, which are the tiny air sacs in the lungs. This condition is often referred to as a collapsed lung, although it usually involves only a partial collapse. Atelectasis is a common finding in chest X-rays and other radiological studies. It can be caused by normal exhalation or various medical conditions. It is important to note that atelectasis is not synonymous with a pneumothorax, which is a more specific condition that can cause atelectasis.
I'm sorry, but I don't have the information to answer your question. It would be best to consult a medical professional for accurate information on the symptoms associated with Atelectasis.
I'm sorry, but I don't have enough information to provide a helpful answer.
I'm sorry, but I don't have the information to answer your question. It would be best to consult a medical professional for accurate and reliable information regardin

In [7]:
raise StopIteration()

StopIteration: 

In [None]:
chain.run("What are the symptoms of Atelectasis?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3m[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


"I'm sorry, but I don't have the information to answer your question. It would be best to consult a medical professional for accurate information on the symptoms of Atelectasis."

In [None]:
chain.run("What can cause Atelectasis?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (:Condition {name: 'Atelectasis'})-[:HASCAUSE]->(cause:Cause)
RETURN cause.name[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


'There are several factors that can cause atelectasis. Some common causes include blockage of the airways due to mucus, a foreign object, or a tumor, as well as lung diseases such as chronic obstructive pulmonary disease (COPD) or pneumonia. Additionally, certain medical procedures, such as anesthesia or prolonged bed rest, can also contribute to the development of atelectasis. It is important to consult with a healthcare professional for a proper diagnosis and treatment plan.'

In [None]:
chain.run("What clinical features radiologists need for diagnosing Atelectasis?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (c:Condition {name: 'Atelectasis'})-[:DESCRIBES]->(t:Term)
RETURN t.name AS ClinicalFeaturesRadiologistsNeedForDiagnosingAtelectasis[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


'To diagnose Atelectasis, radiologists typically look for certain clinical features. These may include signs such as lung collapse, airway obstruction, and reduced lung volume. Additionally, they may also consider the presence of symptoms such as shortness of breath, coughing, and decreased breath sounds. By evaluating these clinical features, radiologists can help in diagnosing Atelectasis.'

In [None]:
chain.run("what are radiographic features of Atelectasis?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (c:Condition {name: 'Atelectasis'})-[:HASCAUSE]->(r:Condition)-[:HASEFFECT]->(p:Process)
RETURN p.name[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


"I'm sorry, but I don't have the information to answer your question about the radiographic features of Atelectasis."