## BioMistral Medical RAG Chatbot Using BioMistral OpenSource LLM


### Load Google drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install langchain sentence-transformers chromadb llama-cpp-python langchain_community pypdf

### importing libraries


In [None]:
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma
from langchain_community.llms import LlamaCpp
from langchain.chains import RetrievalQA, LLMChain

### Importing Documents


In [None]:
loader=PyPDFDirectoryLoader("/content/drive/MyDrive/BioMistral_RAG_APP/Data")
docs = loader.load()


In [None]:
len(docs)

## Chunking


In [None]:
text_splitter=RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
chunks = text_splitter.split_documents(docs)

In [None]:
len(chunks)

In [None]:
chunks[600]

## Embedding

In [None]:
import os
os.environ['HUGGINGFACEHUB_API_TOKEN'] = 'REPLACE YOUR ACCESS TOKEN'

In [None]:
embeddings = SentenceTransformerEmbeddings(model_name="NeuML/pubmedbert-base-embeddings")

## Vector Store Creation

In [None]:
Vectorstore = Chroma.from_documents(chunks, embeddings)

In [None]:
query= 'who is at risk of heart disease?'
search_results = Vectorstore.similarity_search(query)
search_results

In [None]:
retriever = Vectorstore.as_retriever(search_kwargs={'k':5})


In [None]:
retriever.get_relevant_documents(query)

## LLM Model loading

In [None]:
llm=LlamaCpp(
    model_path="/content/drive/MyDrive/BioMistral_RAG_APP/BioMistral-7B.Q4_K_M.gguf",
    temperature=0.2,
    max_tokens=2048,
    top_p=1,
)

### use LLM retrival query to generate final response

In [None]:
template ="""
<|context|>
You are an medical assistant that follows the instruction and generate the accurate response based on the query and the context provide.
Please be truthful and give direct answers.
</s>
<|user|>

{query}
</s>
<|assistant|>
"""

In [None]:
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template(template)

In [None]:
rag_chain=(
    {"context": retriever, "query": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
    )

In [None]:
response=rag_chain.invoke(query)

In [None]:
response

In [None]:
from ast import Continue
import sys
while True:
  user_input = input(f"Input query:")
  if user_input == 'exit':
    print("Exiting the program.")
    sys.exit()
  if user_input=="":
    continue
  result =  rag_chain.invoke(user_input)
  print("Answer:", result)
