In [None]:
!pip install llama-index docx2txt torch sentence-transformers

In [None]:
import os
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)
root_dir = "/content/gdrive/My Drive/"

doc_folder_path = f'{root_dir}Super Doctor/Pharmbot_fulldoc'
print(os.listdir(doc_folder_path))

In [None]:
import os

openai_api_key = 'xxx'
os.environ['OPENAI_API_KEY'] = openai_api_key

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
from llama_index import SimpleDirectoryReader

reader = SimpleDirectoryReader(
    input_dir=doc_folder_path,
    recursive=True,
)

documents = []
for docs in reader.iter_data():
    for doc in docs:
        # do something with the doc
        doc.text = doc.text.upper()
        documents.append(doc)

In [None]:
print(type(documents), "\n")
print(len(documents), "\n")
print(type(documents[0]))
print(documents[0])

In [None]:
from llama_index import Document

document = Document(text="\n\n".join([doc.text for doc in documents]))

In [None]:
from llama_index.node_parser import HierarchicalNodeParser

# create the hierarchical node parser w/ default settings
node_parser = HierarchicalNodeParser.from_defaults(
    chunk_sizes=[2048, 512, 128]
)

In [None]:
nodes = node_parser.get_nodes_from_documents([document])

In [None]:
from llama_index.node_parser import get_leaf_nodes

leaf_nodes = get_leaf_nodes(nodes)
print(leaf_nodes[30].text)

In [None]:
nodes_by_id = {node.node_id: node for node in nodes}

parent_node = nodes_by_id[leaf_nodes[30].parent_node.node_id]
print(parent_node.text)

In [None]:
from llama_index.llms import OpenAI

llm = OpenAI(model="gpt-4", temperature=0.1)

In [None]:
from llama_index import ServiceContext

auto_merging_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    node_parser=node_parser,
)

In [None]:
from llama_index import VectorStoreIndex, StorageContext

storage_context = StorageContext.from_defaults()
storage_context.docstore.add_documents(nodes)

automerging_index = VectorStoreIndex(
    leaf_nodes, storage_context=storage_context, service_context=auto_merging_context
)

automerging_index.storage_context.persist(persist_dir="./merging_index")

In [None]:
from llama_index.indices.postprocessor import SentenceTransformerRerank
from llama_index.retrievers import AutoMergingRetriever
from llama_index.query_engine import RetrieverQueryEngine

automerging_retriever = automerging_index.as_retriever(
    similarity_top_k=20
)

retriever = AutoMergingRetriever(
    automerging_retriever,
    automerging_index.storage_context,
    verbose=True
)

rerank = SentenceTransformerRerank(top_n=10, model="BAAI/bge-reranker-base")

auto_merging_engine = RetrieverQueryEngine.from_args(
    automerging_retriever, node_postprocessors=[rerank]
)

In [None]:
medications = ['Enoxaparin', 'Actrapid', 'Lantus', 'Novorapid', 'Aspirin', 'Clopidogrel', 'Omeprazole', 'Glyceryl Trinitrate', 'Linagliptin', 'Bisoprolol', 'Perindopril', 'Neurobion']

In [None]:
medications_indication_task = [medication + " " + "Use: Labeled Indications, Use: Off-Label: Adult" for medication in medications]

medications_indication_task_str = '"' +'""'.join(medications_indication_task)+ '"'

medications_dosing_task = [medication + " " + "CrCl, Dosing: Adult, Dosing: Altered Kidney Function: Adult, Dosing: Hepatic Impairment: Adult" for medication in medications]

medications_dosing_task_str = '"' +'""'.join(medications_dosing_task)+ '"'

medications_adr_task = [medication + " " + "Adverse Reactions (Significant): Considerations, Adverse Reactions" for medication in medications]

medications_adr_task_str = '"' +'""'.join(medications_adr_task)+ '"'

medications_ddi_task = [medication + " " + "Drug Interactions" for medication in medications]

medications_ddi_task_str = '"' +'""'.join(medications_ddi_task)+ '"'

medications_moa_task = [medication + " " + "Mechanism of Action, Pharmacokinetics (Adult Data Unless Noted)" for medication in medications]

medications_moa_task_str = '"' +'""'.join(medications_moa_task)+ '"'

medications_indication = medications_indication_task_str.split('"')
medications_indication = [med.strip() for med in medications_indication if med.strip()]
medications_dosing = medications_dosing_task_str.split('"')
medications_dosing = [med.strip() for med in medications_dosing if med.strip()]
medications_adr = medications_adr_task_str.split('"')
medications_adr = [med.strip() for med in medications_adr if med.strip()]
medications_ddi = medications_ddi_task_str.split('"')
medications_ddi = [med.strip() for med in medications_ddi if med.strip()]
medications_moa = medications_moa_task_str.split('"')
medications_moa = [med.strip() for med in medications_moa if med.strip()]

indication_responses = [str(auto_merging_engine.query(med)) for med in medications_indication]
indication_combined_responses = "\n".join(indication_responses)

dosing_responses = [str(auto_merging_engine.query(med)) for med in medications_dosing]
dosing_combined_responses = "\n".join(dosing_responses)

adr_responses = [str(auto_merging_engine.query(med)) for med in medications_adr]
adr_combined_responses = "\n".join(adr_responses)

ddi_responses = [str(auto_merging_engine.query(med)) for med in medications_ddi]
ddi_combined_responses = "\n".join(ddi_responses)

moa_responses = [str(auto_merging_engine.query(med)) for med in medications_moa]
moa_combined_responses = "\n".join(moa_responses)

In [None]:
medications_indication_task = [medication + " " + "Use: Labeled Indications, Use: Off-Label: Adult" for medication in medications]

medications_indication_task_str = '"' +'""'.join(medications_indication_task)+ '"'

medications_dosing_task = [medication + " " + "CrCl, Dosing: Adult, Dosing: Altered Kidney Function: Adult, Dosing: Hepatic Impairment: Adult" for medication in medications]

medications_dosing_task_str = '"' +'""'.join(medications_dosing_task)+ '"'

medications_adr_task = [medication + " " + "Adverse Reactions (Significant): Considerations, Adverse Reactions" for medication in medications]

medications_adr_task_str = '"' +'""'.join(medications_adr_task)+ '"'

medications_ddi_task = [medication + " " + "Drug Interactions" for medication in medications]

medications_ddi_task_str = '"' +'""'.join(medications_ddi_task)+ '"'

medications_moa_task = [medication + " " + "Mechanism of Action, Pharmacokinetics (Adult Data Unless Noted)" for medication in medications]

medications_moa_task_str = '"' +'""'.join(medications_moa_task)+ '"'

In [None]:
medications_moa_task_str

In [None]:
indication_combined_responses

dosing_combined_responses

adr_combined_responses

ddi_combined_responses

moa_combined_responses

In [None]:
moa_combined_responses

In [None]:
medications_moa = medications_moa_task_str.split('"')
medications_moa = [med.strip() for med in medications_moa if med.strip()]

moa_responses = [str(auto_merging_engine.query(med)) for med in medications_moa]
moa_combined_responses = "\n".join(moa_responses)

moa_combined_responses

In [None]:
auto_merging_response = auto_merging_engine.query(
    "Enoxaparin Mechanism of Action, Pharmacokinetics (Adult Data Unless Noted)"
)

In [None]:
str(auto_merging_response)

In [None]:
from llama_index.response.notebook_utils import display_response

display_response(auto_merging_response)