In [211]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain.agents import initialize_agent, AgentType
from langchain.agents import Tool
from langchain.embeddings import VertexAIEmbeddings
from langchain.vectorstores import ElasticVectorSearch
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from elasticsearch.exceptions import NotFoundError
from elasticsearch.helpers import bulk
from google.cloud import aiplatform
from langchain.llms import VertexAI
import pandas as pd
import time
from langchain.retrievers import PubMedRetriever
import uuid

PROJECT_ID="drasa-dev"
REGION="asia-southeast1"
ES_URL="http://elastic:6ac371cc3dc9d38cf33e5c146617df75@localhost:9200"
ES_INDEX_NAME="data-index"

In [146]:
aiplatform.init(project=f"{PROJECT_ID}", location=f"{REGION}")
embeddings_service = VertexAIEmbeddings()

db = ElasticVectorSearch(ES_URL, ES_INDEX_NAME, embedding=embeddings_service)

In [147]:
df = pd.read_csv('./datasets/DDICorpus2013Mapped.csv', encoding='utf-8', sep='$', header=None)
df.columns = [
       'DDI_ID', 'Drug_1_Name', 'Drug_1_Drugbankid', 'Drug_1_type',
       'Drug_2_Name', 'Drug_2_Drugbankid', 'Is_DDI', 'DDI_Type', 'Drug_2_type',
       'Sentence_Text']

# Add document part

In [199]:
text_splitter = RecursiveCharacterTextSplitter(
    separators=[".", "\n"],
    chunk_size=500,
    chunk_overlap=0,
    length_function=len,
)
chunked = []
for index, row in df.iterrows():
    sentence = row["Sentence_Text"]
    drug_1 = row["Drug_1_Name"]
    drug_2 = row["Drug_2_Name"]
    is_interaction = row["Is_DDI"]
    typ_of_interaction = row["DDI_Type"]

    concat_text = f"drug1:{drug_1} drug2:{drug_2} is_interaction:{is_interaction} typ_of_interaction:{typ_of_interaction} sentence:{sentence}"
    
    
    splits = text_splitter.create_documents([concat_text])
    for s in splits:
        chunked.append({"content": s.page_content})

def retry_with_backoff(func, *args, retry_delay=5, backoff_factor=2, **kwargs):
    max_attempts = 10
    retries = 0
    for i in range(max_attempts):
        try:
            return func(*args, **kwargs)
        except Exception as e:
            print(f"error: {e}")
            retries += 1
            wait = retry_delay * (backoff_factor**retries)
            print(f"Retry after waiting for {wait} seconds...")
            time.sleep(wait)

batch_size = 5
for i in range(0, len(chunked), batch_size):
    request = [x["content"] for x in chunked[i : i + batch_size]]
    response = retry_with_backoff(embeddings_service.embed_documents, request)
    # Store the retrieved vector embeddings for each chunk back.
    for x, e in zip(chunked[i : i + batch_size], response):
        x["embedding"] = e


In [200]:
def _default_text_mapping(dim: int) -> dict:
    return {
        "properties": {
            "text": {"type": "text"},
            "vector": {"type": "dense_vector", "dims": dim},
        }
    }

dim = len(chunked[0]['embedding'])
mapping = _default_text_mapping(dim)

try:
    db.client.indices.get(index=ES_INDEX_NAME)
except NotFoundError:
    # just to save expensive steps for last
    db.create_index(db.client, db.index_name, mapping)

requests=[]
for i,record in enumerate(chunked):
    request = {
                "_op_type": "index",
                "_index": ES_INDEX_NAME,
                "vector": record['embedding'],
                "text": record['content'],
                "metadata": {},
                "_id": str(uuid.uuid4()),
            }
    requests.append(request)
bulk(db.client, requests)

(2414, [])

# Test run

In [233]:
prompt_template = """
    Now you are drug researcher who could answer interaction between drug and it type of interaction and I give you type of interaction, between drug and reseach sentence
    by using the following pieces of context to answer, just say that you don't know, don't try to make up an answer.
    {context}
    {question}    
    Answer list of drug might interact with:
"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)
chain_type_kwargs = {"prompt": PROMPT}

llm = VertexAI(temperature=0.2)
conversational_memory = ConversationBufferWindowMemory(
    memory_key='chat_history',
    k=5,
    return_messages=True
)
qa = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=db.as_retriever(),
        chain_type_kwargs=chain_type_kwargs,
        return_source_documents=False
    )

qa_pubmed = PubMedRetriever()

tools = [
    Tool(
        name='Drug Interaction Question Answering',
        func=qa.run,
        description=(
            'This tool allows you to ask questions about drug interaction and get answers. '
        )
    ),
    Tool(
        name='Pubmed',
        func=qa_pubmed.run,
        description=(
            'PubMedRetriever is a tool that allows you to search for articles in the PubMed database.'
        )
    ),
]

agent = initialize_agent(
    agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,
    tools=tools,
    llm=llm,
    verbose=True,
    max_iterations=3,
    early_stopping_method='generate',
    memory=conversational_memory
)

In [237]:
query = "Does Paclitaxel interact with Dasatinib ?"
agent(query)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "Drug Interaction Question Answering",
    "action_input": "Does Paclitaxel interact with Dasatinib?"
}
```[0m
Observation: [36;1m[1;3mYes, Paclitaxel interacts with Dasatinib.[0m
Thought:[32;1m[1;3m```json
{
    "action": "Final Answer",
    "action_input": "Yes, Paclitaxel interacts with Dasatinib."
}
```[0m

[1m> Finished chain.[0m


{'input': 'Does Paclitaxel interact with Dasatinib ?',
 'chat_history': [HumanMessage(content='Which drug interact with Tylenol?', additional_kwargs={}, example=False),
  AIMessage(content='Sulfapyridine is a drug that interacts with Tylenol.', additional_kwargs={}, example=False),
  HumanMessage(content='Which drug interact with Proleukin?', additional_kwargs={}, example=False),
  AIMessage(content='Asparaginase, Methotrexate, Dacarbazine, Tamoxifen are drugs that interact with Proleukin.', additional_kwargs={}, example=False),
  HumanMessage(content='Does Proleukin interact with Tamoxifen ?', additional_kwargs={}, example=False),
  AIMessage(content='Yes, Proleukin interacts with Tamoxifen.', additional_kwargs={}, example=False)],
 'output': 'Yes, Paclitaxel interacts with Dasatinib.'}

In [203]:
df[df['Drug_1_Name'] == 'Proleukin']

Unnamed: 0,DDI_ID,Drug_1_Name,Drug_1_Drugbankid,Drug_1_type,Drug_2_Name,Drug_2_Drugbankid,Is_DDI,DDI_Type,Drug_2_type,Sentence_Text
26,DDI-DrugBank.d114.s5.p1,Proleukin,DB00041,brand,Dacarbazine,DB00851,True,effect,drug,Hypersensitivity reactions have been reported ...
27,DDI-DrugBank.d114.s5.p3,Proleukin,DB00041,brand,Tamoxifen,DB00675,True,effect,drug,Hypersensitivity reactions have been reported ...


In [244]:
from langchain import PromptTemplate, HuggingFaceHub, LLMChain
template = """
    Question: {question}
    Answer:
"""

prompt = PromptTemplate(template=template, input_variables=["question"])
llm=HuggingFaceHub(repo_id="stanford-crfm/BioMedLM",huggingfacehub_api_token="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")