In [None]:
username = "tester"
password  = "password"
url = "bolt://localhost:7687"
database="ctgov"
node_label= "AdverseEvent", 
embedding_node_property="biobert_emb"
index_name = "adverse_event"
text_node_properties = ["term","organ_system"]
user_query = "Anaemia"

In [None]:
from llama_index.llms.ollama import Ollama
from llama_index.core import Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding as LI_HF

from llama_index.vector_stores.neo4jvector import Neo4jVectorStore
from llama_index.core import VectorStoreIndex

llm = Ollama(model="mistral", request_timeout=30.0)
li_biobert = LI_HF(model_name="dmis-lab/biobert-base-cased-v1.1")

Settings.llm = llm
Settings.embed_model = li_biobert

neo4j_vector = Neo4jVectorStore(
    username,
    password,
    url,
    768,
    node_label= node_label, 
    embedding_node_property= embedding_node_property, 
    text_node_property= text_node_properties,
    index_name=index_name,
    keyword_index_name= "adverse_event_kw",
    hybrid_search=True
)

loaded_index = VectorStoreIndex.from_vector_store(neo4j_vector).as_query_engine()
test= loaded_index.query(user_query)
print(test)

In [None]:
from langchain_community.vectorstores import Neo4jVector
from langchain_community.embeddings import HuggingFaceEmbeddings as LC_HF

lc_biobert = LC_HF(model_name="dmis-lab/biobert-base-cased-v1.1")

adverse_event = Neo4jVector.from_existing_graph(
    username=username, 
    password=password, 
    database=database,
    url=url,
    node_label= node_label, 
    embedding_node_property= embedding_node_property, 
    text_node_properties= text_node_properties,
    index_name=index_name,
    search_type="hybrid",
    keyword_index_name= "adverse_event_kw",
    embedding= lc_biobert,
    )

test = adverse_event.similarity_search_with_score(user_query, k=3)
print(test)

text2sql

In [1]:
import os
from dotenv import load_dotenv
from llama_index.core.query_engine import NLSQLTableQueryEngine
from llama_index.core import SQLDatabase
from llama_index.llms.ollama import Ollama
from llama_index.core import Settings
from sqlalchemy import create_engine
import pandas as pd
from tqdm import tqdm
from requests.exceptions import ReadTimeout, Timeout

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Possible solution to llama-index timeout
import nest_asyncio
nest_asyncio.apply()

In [3]:
import yaml

# Load the YAML file
with open( "./src/txt_2_sql/sql_queries_template.yaml", "r") as file:
    sql_queries_template = yaml.safe_load(file)

In [4]:
lm = Ollama(model="sqlcoder", temperature=0.0,  request_timeout=100)
Settings.llm = lm
Settings.embed_model="local"

In [5]:
load_dotenv(".env")
AACT_USER = os.getenv("AACT_USER")
AACT_PWD = os.getenv("AACT_PWD")

In [6]:
tables = [
    "browse_interventions",
    "sponsors",
    "detailed_descriptions",
    "facilities",
    "studies",
    "outcomes",
    "browse_conditions",
    "keywords",
    "eligibilities",
    "reported_events",
    "brief_summaries",
    "designs",
    "countries",
]

database = "aact"
host = "aact-db.ctti-clinicaltrials.org"
user = AACT_USER
password = AACT_PWD
port = 5432
db_uri = f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{database}"


In [7]:
db_engine = create_engine(db_uri)
sql_db = SQLDatabase(db_engine, include_tables=tables)
query_engine = NLSQLTableQueryEngine(sql_database=sql_db)

In [8]:
condition = 'Asthma'
intervention = 'Xhance'
nctId = 'NCT01164592' 

In [9]:
sql_queries_template["query_1"]["question"].format(nctId=nctId)

'What is the title in study NCT01164592?'

In [10]:
response = query_engine.query(sql_queries_template["query_1"]["question"].format(nctId=nctId))
print(response)
print("######")
print(response.metadata["sql_query"])
print("######")
print(response.metadata["result"])

 SERVE-HF: Substudy on the Mechanistic Plausibility of the Clinical Benefits of Adaptive Servo-ventilation
######
SELECT official_title FROM studies WHERE nct_id = 'NCT01164592'
######
[('SERVE-HF: Substudy on the Mechanistic Plausibility of the Clinical Benefits of Adaptive Servo-ventilation',)]


In [11]:
sql_eval_cols = ["question", "gold_std_query", "gold_std_answer", "llamaIndex_query", "llamaIndex_answer"]
sql_eval_rows = list(sql_queries_template.keys())
sql_eval = pd.DataFrame([], index=sql_eval_rows, columns=sql_eval_cols)

for q, d in tqdm(sql_queries_template.items(), desc= "Evaluating llama index"):
    question = d["question"].format(nctId=nctId, condition=condition, intervention=intervention)
    sql_query = d["SQL"].format(nctId=nctId, condition=condition, intervention=intervention)
    
    print(f"{q} : {question}")
    
    sql_eval.at[q, "question"] =  question
    sql_eval.at[q, "gold_std_query"] = sql_query
    
    # Get gold standard answer
    try :
        answer = sql_db.run_sql(sql_query)[0]
    except:
        answer = "No answer"
        
    sql_eval.at[q, "gold_std_answer"] =  answer
    
    # Get LlamaIndex SQL query and answer
    try:
        response = query_engine.query(question)
        sql_eval.at[q, "llamaIndex_query"] = response.metadata["sql_query"]
        sql_eval.at[q, "llamaIndex_answer"] = response.response
    except (ReadTimeout, Timeout, TimeoutError):
        print("Time out!")
        sql_eval.at[q, "llamaIndex_query"] = "ReadTimeout"
        sql_eval.at[q, "llamaIndex_answer"] = "ReadTimeout"
    except Exception as e:
        print(e)
        sql_eval.at[q, "llamaIndex_query"] = e
        sql_eval.at[q, "llamaIndex_answer"] = e


Evaluating llama index:   0%|          | 0/22 [00:00<?, ?it/s]

query_1 : What is the title in study NCT01164592?


Evaluating llama index:   5%|▍         | 1/22 [00:13<04:53, 13.98s/it]

query_2 : Summarise study NCT01164592


Evaluating llama index:   9%|▉         | 2/22 [01:54<21:38, 64.92s/it]

timed out
query_3 : Is study NCT01164592 and interventional or observational study? [INTERVENTIONAL, OBSERVATIONAL].


Evaluating llama index:  14%|█▎        | 3/22 [02:07<13:01, 41.15s/it]

query_4 : What condition(s) is studied in study NCT01164592? Give mesh term.


Evaluating llama index:  18%|█▊        | 4/22 [03:48<19:24, 64.67s/it]

timed out
query_5 : Is Asthma studied in study NCT01164592?


Evaluating llama index:  23%|██▎       | 5/22 [03:54<12:20, 43.58s/it]

query_6 : What drugs / treatments is studied in study NCT01164592? Give mesh term.


Evaluating llama index:  27%|██▋       | 6/22 [04:09<09:00, 33.81s/it]

query_7 : In what phase is study NCT01164592? S


Evaluating llama index:  32%|███▏      | 7/22 [04:17<06:22, 25.48s/it]

query_9 : How many patients to be enrolled in study NCT01164592?


Evaluating llama index:  36%|███▋      | 8/22 [04:26<04:42, 20.17s/it]

query_10 : What is the eligibility criteria for study NCT01164592?


Evaluating llama index:  41%|████      | 9/22 [04:42<04:06, 18.97s/it]

query_11 : What is the min. and max. age range in study NCT01164592?


Evaluating llama index:  45%|████▌     | 10/22 [04:54<03:22, 16.85s/it]

query_13 : Describe the intervention model in study NCT01164592


Evaluating llama index:  50%|█████     | 11/22 [05:05<02:45, 15.02s/it]

query_15 : Describe the primary purpose of study NCT01164592


Evaluating llama index:  55%|█████▍    | 12/22 [05:21<02:34, 15.43s/it]

question_16 : What intervention types are used in study NCT01164592?


Evaluating llama index:  59%|█████▉    | 13/22 [07:02<06:11, 41.28s/it]

timed out
question_17 : Is blinding (a.k.a. masking) implemented in study NCT01164592? If so, select relevant from [PARTICIPANT, CARE_PROVIDER, INVESTIGATOR, OUTCOMES_ASSESSOR, NA]


Evaluating llama index:  64%|██████▎   | 14/22 [08:43<07:53, 59.22s/it]

timed out
question_18 : What is the allocation strategy employed in study NCT01164592?


Evaluating llama index:  68%|██████▊   | 15/22 [08:52<04:08, 35.53s/it]


AttributeError: 'NoneType' object has no attribute 'format'

In [None]:
if not os.path.isdir("./results/txt2sql/"):
    os.makedirs("./results/txt2sql/")
sql_eval.to_csv("./results/txt2sql/llamaindex.eval.tsv", sep="\t")

In [None]:
from llama_index.core.indices.struct_store.sql_query import (
    SQLTableRetrieverQueryEngine,
)
from llama_index.core.objects import (
    SQLTableNodeMapping,
    ObjectIndex,
    SQLTableSchema,
)
from llama_index.core import VectorStoreIndex

# set Logging to DEBUG for more detailed outputs
table_node_mapping = SQLTableNodeMapping(sql_db)
table_schema_objs = [(SQLTableSchema(table_name=t)) for t in tables] 

obj_index = ObjectIndex.from_objects(
    table_schema_objs,
    table_node_mapping,
    VectorStoreIndex,
)
query_engine = SQLTableRetrieverQueryEngine(
    sql_db, obj_index.as_retriever(similarity_top_k=3)
)

In [None]:
response = query_engine.query(sql_queries_template["query_1"]["question"].format(nctId=nctId))
print(response)
print("######")
print(response.metadata["sql_query"])
print("######")
print(response.metadata["result"])

In [None]:
query_str = (
    "Which clinical trial are associated with the condition 'Asthma' "
    "and conducted in the United States, China, and India, while involving the "
    "intervention 'Xhance', and reporting more than five affected subjects "
    "in either 'deaths' or 'serious' adverse events?"
)

In [None]:
print(query_str)

In [None]:
response = query_engine.query(query_str)
print(response)
print("######")
print(response.metadata["sql_query"])
print("######")
print(response.metadata["result"])