Vector Search

In [None]:
from langchain_community.vectorstores import Neo4jVector
from langchain_community.embeddings import HuggingFaceEmbeddings

In [None]:
biobert = HuggingFaceEmbeddings(model_name="dmis-lab/biobert-base-cased-v1.1")

In [None]:
# username = "tester"
# password  = "tester"
username = "tester"
password = "password"
url = "bolt://localhost:7687"
database="ctgov"

In [None]:
from neo4j import GraphDatabase
driver = GraphDatabase.driver(url, auth=(username, password), encrypted=False)
driver.verify_connectivity()

In [None]:
intervention = Neo4jVector.from_existing_graph(
    embedding=biobert,
    node_label= "Intervention", 
    embedding_node_property="biobert_emb", 
    text_node_properties=["id",],
    url=url,
    index_name="intervention",
    keyword_index_name="intervention_kw",
    username=username, 
    password=password, 
    database=database,
    search_type="hybrid")

In [None]:
intervention.similarity_search_with_score("electrocardiogram", k=3)

In [None]:
adverse_event = Neo4jVector.from_existing_graph(
    embedding=biobert,
    node_label= "intervention", 
    embedding_node_property="biobert_emb", 
    text_node_properties=["term","organ_system"],
    url=url,
    index_name="adverse_event",
    keyword_index_name= "adverse_event_kw",
    search_type="hybrid",
    username=username, 
    password=password, 
    database=database,
    )

In [None]:
adverse_event.retrieval_query

In [None]:
# Replace retrieval_query so it includes ID
#adverse_event.retrieval_query = adverse_event.retrieval_query.replace("id: Null", "`trial2vec_emb` : Null")
adverse_event.retrieval_query = adverse_event.retrieval_query.replace("id: Null", "").replace(",,",",").replace(", ,",",")

In [None]:
adverse_event.retrieval_query

In [None]:
test = adverse_event.similarity_search_with_score("Anaemia", k=3)
print(test)

In [None]:
test[0][0].page_content.lstrip("\n").replace("\n", " | ")

In [None]:
test[0][0].metadata["id"]

In [None]:
condition = Neo4jVector.from_existing_graph(
    embedding=biobert,
    node_label= "Condition", 
    embedding_node_property="biobert_emb", 
    text_node_properties=["id",],
    url=url,
    index_name="condition",
    keyword_index_name="condition_kw",
    search_type="hybrid",
    username=username, 
    password=password, 
    database=database,
    )

In [None]:
condition.similarity_search_with_score("Cancer", k=3)

txt-2-SQL

In [None]:
from langchain_community.utilities import SQLDatabase

In [None]:
import os
from dotenv import load_dotenv

load_dotenv(".env")
AACT_USER = os.getenv("AACT_USER")
AACT_PWD = os.getenv("AACT_PWD")

In [None]:
AACT_USER = os.getenv("AACT_USER")
AACT_PWD = os.getenv("AACT_PWD")

In [None]:
tables = [
    "browse_interventions",
    "sponsors",
    "outcome_analysis_groups",
    "detailed_descriptions",
    "facilities",
    "studies",
    "outcomes",
    "browse_conditions",
    "outcome_analyses",
    "keywords",
    "eligibilities",
    "id_information",
    "design_group_interventions",
    "reported_events",
    "brief_summaries",
    "designs",
    "drop_withdrawals",
    "outcome_measurements",
    "countries",
]

In [None]:
from langchain_community.llms import Ollama
llm = Ollama(model="sqlcoder")

In [None]:
llm.invoke("Hello, world!")

In [None]:
database = "aact"
host = "aact-db.ctti-clinicaltrials.org"
user = AACT_USER
password = AACT_PWD
port = 5432
db_uri = f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{database}"
sql_db = SQLDatabase.from_uri(db_uri, include_tables=tables)

In [None]:
from langchain_community.agent_toolkits import create_sql_agent
agent_executor = create_sql_agent(llm, db=sql_db, verbose=True, ,agent_executor_kwargs={"return_intermediate_steps": True})

In [None]:
sql_db.get_table_info

In [None]:
sql_db.get_usable_table_names()

In [None]:
print(sql_db.get_table_info(sql_db.get_usable_table_names()))

In [None]:
sql_db.get_table_info_no_throw()

In [None]:
query_str = (
    "Which study ids are associated with "
    "the condition 'Asthma' and conducted in the United States, China, and India, "
    "while involving the intervention 'Xhance', and reporting more than five affected subjects "
    "in either 'deaths' or 'serious' adverse events?"
)

In [None]:
response = agent_executor.invoke(query_str)

In [None]:
print(response)

In [None]:
print(sql_db.table_info)

Json Loader

In [1]:
from src.utils.utils import get_clinical_trial_study
from langchain_community.document_loaders import JSONLoader

In [2]:
study = get_clinical_trial_study("NCT01164592")
study = {"NCT01164592":study}

In [3]:
from langchain_text_splitters import RecursiveJsonSplitter
splitter = RecursiveJsonSplitter(max_chunk_size=300)


In [4]:
docs = splitter.create_documents(texts=[study])
