In [1]:
import re
import os
from langchain.vectorstores.neo4j_vector import Neo4jVector
from langchain.document_loaders import WikipediaLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import (
    CharacterTextSplitter,
    RecursiveCharacterTextSplitter,
)
from langchain.graphs import Neo4jGraph
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from langchain.text_splitter import TokenTextSplitter

from secret import *
import chromadb

os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [2]:
graph = Neo4jGraph(url="bolt://localhost:7687", username="neo4j", password="test1234")
graph.query(
    """
MATCH (n)
DETACH DELETE n
"""
)
graph.refresh_schema()
print(graph.schema)


        Node properties are the following:
        []
        Relationship properties are the following:
        []
        The relationships are the following:
        []
        


In [3]:
import logging
from urllib.parse import urljoin
import requests
from bs4 import BeautifulSoup

search_query = "atelectasis"
URL = f"https://radiopaedia.org/search?lang=gb&q={search_query}&scope=articles"
page = requests.get(URL)
soup = BeautifulSoup(page.content, "html.parser")
radio_address = "https://radiopaedia.org"
all_address = [
    f"{radio_address}{e['href']}"
    for e in soup.find_all("a", class_="search-result search-result-article")
]
all_address

['https://radiopaedia.org/articles/atelectasis-summary?lang=gb',
 'https://radiopaedia.org/articles/linear-atelectasis?lang=gb',
 'https://radiopaedia.org/articles/lung-atelectasis?lang=gb',
 'https://radiopaedia.org/articles/subsegmental-atelectasis?lang=gb',
 'https://radiopaedia.org/articles/round-atelectasis?lang=gb',
 'https://radiopaedia.org/articles/passive-atelectasis-1?lang=gb',
 'https://radiopaedia.org/articles/compressive-atelectasis?lang=gb',
 'https://radiopaedia.org/articles/cicatrisation-atelectasis?lang=gb',
 'https://radiopaedia.org/articles/segmental-atelectasis?lang=gb',
 'https://radiopaedia.org/articles/adhesive-atelectasis?lang=gb',
 'https://radiopaedia.org/articles/resorptive-obstructive-atelectasis?lang=gb',
 'https://radiopaedia.org/articles/chronic-maxillary-atelectasis?lang=gb',
 'https://radiopaedia.org/articles/middle-ear-atelectasis?lang=gb',
 'https://radiopaedia.org/articles/gravity-dependent-atelectasis-1?lang=gb',
 'https://radiopaedia.org/articles/c

In [4]:
from langchain.document_loaders import DiffbotLoader

raw_documents = DiffbotLoader(
    urls=[all_address[0]],
    api_token=DIFFBOT_API_TOKEN,
).load()
text_splitter = TokenTextSplitter(chunk_size=2048, chunk_overlap=24)
documents = text_splitter.split_documents(
    raw_documents
)  # should I only take the first? which is the most relevant?

1

In [5]:
from tqdm import tqdm
from aug.graph_doc import (
    get_extraction_chain,
    data_to_graph_doc,
    chain_run,
    add_graph_documents,
)
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo-16k", temperature=0)

allowed_nodes = None
allowed_rels = None
# allowed_nodes = ["Symptom", "Disease"]
# allowed_rels = ["CAN_CAUSE", "DESCRIBE", "HAS"]

extract_chain = get_extraction_chain(llm, allowed_nodes, allowed_rels)
gds = []

for d in tqdm(documents, total=len(documents)):
    data = chain_run(extract_chain, d.page_content)
    # data = extract_chain.run(d.page_content)
    # graph_document = GraphDocument(
    #     nodes=[map_to_base_node(node) for node in data.nodes],
    #     relationships=[map_to_base_relationship(rel) for rel in data.rels],
    #     source=d,
    # )
    graph_document = data_to_graph_doc(data, d)
    # add_graph_document(graph, graph_document)
    gds.append(graph_document)

add_graph_documents(graph, gds)

graph.refresh_schema()
print(graph.schema)

100%|██████████| 1/1 [00:16<00:00, 16.20s/it]


        Node properties are the following:
        [{'labels': 'Medicalterm', 'properties': [{'property': 'id', 'type': 'STRING'}, {'property': 'name', 'type': 'STRING'}]}, {'labels': 'Imagingtechnique', 'properties': [{'property': 'id', 'type': 'STRING'}, {'property': 'name', 'type': 'STRING'}]}, {'labels': 'Topic', 'properties': [{'property': 'id', 'type': 'STRING'}, {'property': 'name', 'type': 'STRING'}]}]
        Relationship properties are the following:
        []
        The relationships are the following:
        ['(:Medicalterm)-[:HAS]->(:Topic)', '(:Medicalterm)-[:DESCRIBES]->(:Medicalterm)', '(:Topic)-[:HAS]->(:Imagingtechnique)']
        





In [6]:
from langchain.chains import GraphCypherQAChain
chain = GraphCypherQAChain.from_llm(
    graph=graph,
    cypher_llm=ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k"),
    qa_llm=ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k"),
    validate_cypher=True,  # Validate relationship directions
    verbose=True,
)

In [7]:
chain.run("What is Atelectasis?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (m:Medicalterm {name: 'Atelectasis'}) RETURN m[0m
Full Context:
[32;1m[1;3m[{'m': {'name': 'Atelectasis', 'id': 'Atelectasis'}}][0m

[1m> Finished chain.[0m


'Atelectasis is a medical condition characterized by the collapse or closure of a lung or a part of it.'

In [8]:
chain.run("What are the symptoms of Atelectasis?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (m:Medicalterm {name: 'Atelectasis'})-[:DESCRIBES]->(s:Medicalterm)
RETURN s.name AS Symptom[0m
Full Context:
[32;1m[1;3m[{'Symptom': 'Collapse'}][0m

[1m> Finished chain.[0m


'The symptom of Atelectasis is collapse.'

In [9]:
chain.run("What can cause Atelectasis?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (m:Medicalterm {name: 'Atelectasis'})-[:DESCRIBES]->(m2:Medicalterm)
RETURN m2.name[0m
Full Context:
[32;1m[1;3m[{'m2.name': 'Collapse'}][0m

[1m> Finished chain.[0m


'A possible cause of Atelectasis is a condition called Collapse.'

In [10]:
chain.run("What clinical features radiologists need for diagnosing Atelectasis?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (m:Medicalterm {name: 'Atelectasis'})-[:DESCRIBES]->(m2:Medicalterm)-[:HAS]->(t:Topic)-[:HAS]->(i:Imagingtechnique)
RETURN i.name[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


'To diagnose Atelectasis, radiologists typically look for certain clinical features. These may include signs such as lung collapse, airway obstruction, and reduced lung volume. Additionally, they may also consider the presence of symptoms such as shortness of breath, coughing, and decreased breath sounds. By evaluating these clinical features, radiologists can help in diagnosing Atelectasis.'

In [11]:
chain.run("what are radiographic features of Atelectasis?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (m:Medicalterm {name: 'Atelectasis'})-[:DESCRIBES]->(m2:Medicalterm)-[:HAS]->(t:Topic)-[:HAS]->(i:Imagingtechnique)
RETURN i.name[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


"I'm sorry, but I don't have the information to answer your question about the radiographic features of Atelectasis."