#### Build BioMistral Medical RAG Chatbot using BioMistral Open Source LLM

In [None]:
# Installation

!pip install langchain sentence-transformers chromadb llama-cpp-python langchain_community pypdf

#### importing libraries

In [None]:
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import chroma
from langchain_community.llms import llamaCpp
from langchain.chains import RetrievalQA, LLMChain

#### Import the documents 

In [None]:
loader = PyPDFDirectoryLoader("'datasets/train.csv'")
docs = loader.load()

In [None]:
len(docs)

In [None]:
docs[6]

#### Chunking

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
chunks = text_splitter.split_documents(docs)

In [None]:
len(chunks)

In [None]:
chunks[600]

In [None]:
chunks[601]

#### Embeddings creations

In [1]:
import os
os.environ['HUGGINGFACEHUB_API_TOKEN'] = "Enter your secret token here 

In [None]:
embeddings = SentenceTransformerEmbeddings(model_name='NeuML/pubmedbert-base-embeddings') #initializig pubmedbert-base-embeddings model

#### Vector Store Creation

In [None]:
vectorstore = Chroma.from_documents(chunks, embeddings)  #here if we will give chunks only, that means we are performing keyword search.
#but if we are using both chunks and embeddins we are using vector search

In [None]:
#check how well the vector store is working

query = "who is at risk of the heart disease"
search_results = vectorstore.similarity_search(query)

In [None]:
search_results

In [None]:
retriever = vectorstore.as_retriever(search_kwargs={'k':5})

In [None]:
retriever.get_relevant_documents(query)

#### LLM model loading

In [None]:
# we first have to download the model and upload into google drive as the size would be 4B or more, 
# then load the google drive here

In [None]:
LlamaCpp(
    model_path="/content/drive/MyDrive/BioMistral-7B.Q4_K_M.gguf",
    temprature=0.2,
    max_tokens=2048,
    top_p=1
    
)

#this will load the model here frim google drive

#### Use LLM and retriever and query here to generate the final response

In [None]:
# prompt template creation

template = """
<|context|> #this indicates that this is a placeholder and will get the data will be filled here from the retrieval. basically dynamic
You are a medical assistant that follows the instructions and generate the accurate response based on the query and the context provided.
Please be truthful and give direct answers. #this is to reduce the hallucinations
</s> 
<|user|>
{query}
</s> #format separator
<|assistant|> #the role of an LLM, because it should provide you the answers.
"""

In [7]:
 # <|context|> , <|user|>, {query}, <|assistant|> are the dynamic values

In [None]:
Building LLM Application

In [None]:
# using chain method

from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain.prompts import ChatPromptTemplate # for building template


In [None]:
prompt = ChatPromptTemplate.from_template(template)

In [None]:
reg_chain = (
    {"context": retriever, "query":RunnablePassthrough()}  # RunnablePassthrough() indicates the real time user query, in context we are passing the data holding by retriever
    | prompt #this is a total template made up of both context n query
    | llm    # pass prompt to llm
    | StrOutputParser()  #show llm output usin it
)

In [None]:
response = reg_chain.invoke(query)

In [None]:
response   # this response is generated by LLM (the proper response)

In [None]:
# to make the chat interactive as a chatbot we will have to call all this in a loop

import sys

while True:
    user_input = input(f"input query:")
    if user_input =='exit':
        print("Exiting")
        sys.exit()
    if user_input == '':
        continue
    result = rag_chain.invoke(user_input)
    print("Answer:", result)