Vector Search

In [5]:
from langchain_community.vectorstores import Neo4jVector
from langchain_community.embeddings import HuggingFaceEmbeddings

In [6]:
biobert = HuggingFaceEmbeddings(model_name="dmis-lab/biobert-base-cased-v1.1")

  from .autonotebook import tqdm as notebook_tqdm
No sentence-transformers model found with name dmis-lab/biobert-base-cased-v1.1. Creating a new one with MEAN pooling.


In [7]:
username = "tester"
password = "password"
url = "bolt://localhost:7689"
database="ctgov"

In [8]:
from neo4j import GraphDatabase
driver = GraphDatabase.driver(url, auth=(username, password), encrypted=False)
driver.verify_connectivity()

In [115]:
def run_retriever(retriever, term:str):
    response = retriever.invoke(term)
    return "\n".join([doc.page_content for doc in response])

In [34]:
def fromToCtTo_query(from_node:str, from_property:str, to_node:str, to_property:str)->str:
    query = """
    WITH node, score
    OPTIONAL MATCH path = (node)-[:{from_node}ToStudyAssociation]->(ct:ClinicalTrial)-[:StudyTo{to_node}Association]->(target:{to_node})
    WITH node.{from_property} AS from_node_txt, COLLECT(DISTINCT target.{to_property}) AS to_node_list, max(score) AS score // deduplicate parents
    RETURN "{from_node}: "+from_node_txt+". {to_node}: "+apoc.text.join(to_node_list, ', ') AS text, score, {{}} AS metadata
    """
    cmd = query.format(from_node=from_node, from_property=from_property, to_node=to_node, to_property=to_property)
    return cmd

In [79]:
def fromToCt_query(from_node:str, from_property:str, ct_properties:list[str])->str:
    
    ct_properties_str = ", ".join([f"{p} = \"+ct.{p}+\" " for p in ct_properties])+"\""
    
    query = """
    WITH node, score
    OPTIONAL MATCH (node)-[:{from_node}ToStudyAssociation]->(ct:ClinicalTrial)
    WITH node, ct, max(score) AS score // deduplicate parents
    RETURN "{from_node}: "+node.{from_property}+". ClinicalTrial: {ct_properties_str} AS text, score, {{}} AS metadata
    """
    cmd = query.format(from_node=from_node, from_property=from_property, ct_properties_str=ct_properties_str)
    return cmd

In [117]:
# Intervention to Adverse Event

int_to_ae = Neo4jVector.from_existing_graph(
    embedding=biobert,
    node_label= "Intervention", 
    embedding_node_property="biobert_emb", 
    text_node_properties=["name", "type"],
    url=url,
    index_name="intervention_biobert_emb",
    keyword_index_name="intervention_kw",
    username=username, 
    password=password, 
    database=database,
    search_type="hybrid",
    retrieval_query=fromToCtTo_query("Intervention", "name", "AdverseEvent", "term"),
    ).as_retriever()

context = run_retriever(int_to_ae, "tralokinumab")
print(context)



Intervention: Tralokinumab. AdverseEvent: Hypertension, Dyspnoea, Cough, Headache, Back pain, Arthralgia, Viral upper respiratory tract infection, Sinusitis, Oral candidiasis, Injection site pruritus, Injection site pain, Injection site erythema, Fatigue, Asthma, Vaginal prolapse, Acute kidney injury, Uterine leiomyoma, Breast cancer female, Tendonitis, Rotator cuff syndrome, Weight decreased, Pulmonary function test abnormal, Rib fracture, Hand fracture, Urinary tract infection, Influenza, Bronchitis, Colitis, Epistaxis, Rhinorrhoea, Oropharyngeal pain, Procedural pain, Weight increased, Non-cardiac chest pain, Injection site swelling, Rash, Nausea, Vomiting, Upper respiratory tract infection, Angioedema, Pulmonary embolism, Pharyngeal oedema, Atelectasis, Prostatitis, Metrorrhagia, Hallucination, Depression, Vertebrobasilar insufficiency, Vascular encephalopathy, Ischaemic stroke, Haemorrhagic stroke, Dizziness, Cerebral haemorrhage, Carpal tunnel syndrome, Uterine cancer, Cervix car

In [118]:
# Intervention To Clinical Trial

int_to_ct = Neo4jVector.from_existing_graph(
    embedding=biobert,
    node_label= "Intervention", 
    embedding_node_property="biobert_emb", 
    text_node_properties=["name", "type"],
    url=url,
    index_name="intervention_biobert_emb",
    keyword_index_name="intervention_kw",
    username=username, 
    password=password, 
    database=database,
    search_type="hybrid",
    retrieval_query=fromToCt_query("Intervention", "name", ["id", "study_type","brief_title"]),
    ).as_retriever()

x = run_retriever(int_to_ct, "tralokinumab")
print(x)


Intervention: Tralokinumab. ClinicalTrial: id = NCT02281357 , study_type = INTERVENTIONAL , brief_title = Phase 3 Study to Evaluate the Efficacy & Safety of Tralokinumab in Adults & Adolescents With OCS Dependent Asthma 
Intervention: Tralokinumab. ClinicalTrial: id = NCT02449473 , study_type = INTERVENTIONAL , brief_title = Study to Evaluate Efficacy & Safety of Tralokinumab in Subjects With Asthma Inadequately Controlled on Corticosteroids 
Intervention: Tralokinumab. ClinicalTrial: id = NCT02161757 , study_type = INTERVENTIONAL , brief_title = A Phase 3 Study to Evaluate the Efficacy and Safety of Tralokinumab in Adults and Adolescents With Uncontrolled Asthma 
Intervention: Experimental: tralokinumab. ClinicalTrial: id = NCT02194699 , study_type = INTERVENTIONAL , brief_title = A Phase 3 Study to Evaluate the Efficacy and Safety of Tralokinumab in Adults and Adolescents With Uncontrolled Asthma 
Intervention: Serelaxin. ClinicalTrial: id = NCT02002702 , study_type = INTERVENTIONAL 

In [None]:
adverse_event = Neo4jVector.from_existing_graph(
    embedding=biobert,
    node_label= "intervention", 
    embedding_node_property="biobert_emb", 
    text_node_properties=["term","organ_system"],
    url=url,
    index_name="adverse_event",
    keyword_index_name= "adverse_event_kw",
    search_type="hybrid",
    username=username, 
    password=password, 
    database=database,
    )

In [None]:
adverse_event.retrieval_query

In [None]:
# Replace retrieval_query so it includes ID
#adverse_event.retrieval_query = adverse_event.retrieval_query.replace("id: Null", "`trial2vec_emb` : Null")
adverse_event.retrieval_query = adverse_event.retrieval_query.replace("id: Null", "").replace(",,",",").replace(", ,",",")

In [None]:
adverse_event.retrieval_query

In [None]:
test = adverse_event.similarity_search_with_score("Anaemia", k=3)
print(test)

In [None]:
test[0][0].page_content.lstrip("\n").replace("\n", " | ")

In [None]:
test[0][0].metadata["id"]

In [None]:
condition = Neo4jVector.from_existing_graph(
    embedding=biobert,
    node_label= "Condition", 
    embedding_node_property="biobert_emb", 
    text_node_properties=["id",],
    url=url,
    index_name="condition",
    keyword_index_name="condition_kw",
    search_type="hybrid",
    username=username, 
    password=password, 
    database=database,
    )

In [None]:
condition.similarity_search_with_score("Cancer", k=3)

txt-2-SQL

In [None]:
from langchain_community.utilities import SQLDatabase

In [None]:
import os
from dotenv import load_dotenv

load_dotenv(".env")
AACT_USER = os.getenv("AACT_USER")
AACT_PWD = os.getenv("AACT_PWD")

In [None]:
AACT_USER = os.getenv("AACT_USER")
AACT_PWD = os.getenv("AACT_PWD")

In [None]:
tables = [
    "browse_interventions",
    "sponsors",
    "outcome_analysis_groups",
    "detailed_descriptions",
    "facilities",
    "studies",
    "outcomes",
    "browse_conditions",
    "outcome_analyses",
    "keywords",
    "eligibilities",
    "id_information",
    "design_group_interventions",
    "reported_events",
    "brief_summaries",
    "designs",
    "drop_withdrawals",
    "outcome_measurements",
    "countries",
]

In [None]:
from langchain_community.llms import Ollama
llm = Ollama(model="sqlcoder")

In [None]:
llm.invoke("Hello, world!")

In [None]:
database = "aact"
host = "aact-db.ctti-clinicaltrials.org"
user = AACT_USER
password = AACT_PWD
port = 5432
db_uri = f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{database}"
sql_db = SQLDatabase.from_uri(db_uri, include_tables=tables)

In [None]:
from langchain_community.agent_toolkits import create_sql_agent
agent_executor = create_sql_agent(llm, db=sql_db, verbose=True, ,agent_executor_kwargs={"return_intermediate_steps": True})

In [None]:
sql_db.get_table_info

In [None]:
sql_db.get_usable_table_names()

In [None]:
print(sql_db.get_table_info(sql_db.get_usable_table_names()))

In [None]:
sql_db.get_table_info_no_throw()

In [None]:
query_str = (
    "Which study ids are associated with "
    "the condition 'Asthma' and conducted in the United States, China, and India, "
    "while involving the intervention 'Xhance', and reporting more than five affected subjects "
    "in either 'deaths' or 'serious' adverse events?"
)

In [None]:
response = agent_executor.invoke(query_str)

In [None]:
print(response)

In [None]:
print(sql_db.table_info)

Json Loader

In [None]:
from src.utils.utils import get_clinical_trial_study
from langchain_community.document_loaders import JSONLoader

In [None]:
study = get_clinical_trial_study("NCT01164592")
study = {"NCT01164592":study}

In [None]:
from langchain_text_splitters import RecursiveJsonSplitter
splitter = RecursiveJsonSplitter(max_chunk_size=300)


In [None]:
docs = splitter.create_documents(texts=[study])
