# Build BioMistral Medical RAG Chatbot using BioMistral Open Source LLM

In the notebook we will build a Medical Chatbot with BioMistral LLM and Heart Health pdf file.

In [2]:
!pip install langchain sentence-transformers chromadb llama-cpp-python langchain_community pypdf

Collecting langchain
  Downloading langchain-0.3.1-py3-none-any.whl.metadata (7.1 kB)
Collecting sentence-transformers
  Downloading sentence_transformers-3.1.1-py3-none-any.whl.metadata (10 kB)
Collecting chromadb
  Downloading chromadb-0.5.11-py3-none-any.whl.metadata (6.8 kB)
Collecting llama-cpp-python
  Downloading llama_cpp_python-0.3.1.tar.gz (63.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.9/63.9 MB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting langchain_community
  Downloading langchain_community-0.3.1-py3-none-any.whl.metadata (2.8 kB)
Collecting pypdf
  Downloading pypdf-5.0.1-py3-none-any.whl.metadata (7.4 kB)
Collecting langchain-core<0.4.0,>=0.3.6 (from langchain)
  Downloading langchain_core-0.3.7-py3-none-

## Installation

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Import libraries

In [10]:
pip install textwrap

[31mERROR: Could not find a version that satisfies the requirement textwrap (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for textwrap[0m[31m
[0m

In [11]:
from langchain_community.document_loaders import DataFrameLoader
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS, Chroma
from langchain_community.llms import LlamaCpp
from langchain.chains import RetrievalQA, LLMChain
import pandas as pd
import os
import textwrap
from IPython.display import display, Markdown

In [12]:
import pathlib
import textwrap
from IPython.display import display
from IPython.display import Markdown
def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

In [13]:
# Used to securely store your API key
from google.colab import userdata

## Setup HuggingFace Access Token

- Log in to [HuggingFace.co](https://huggingface.co/)
- Click on your profile icon at the top-right corner, then choose [“Settings.”](https://huggingface.co/settings/)
- In the left sidebar, navigate to [“Access Token”](https://huggingface.co/settings/tokens)
- Generate a new access token, assigning it the “write” role.


In [14]:
import os
from getpass import getpass
# Prompt the user to input their token
HUGGINGFACEHUB_API_TOKEN = getpass("Enter your Hugging Face API token: ")

# Set the token in the environment
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN


Enter your Hugging Face API token: ··········


## Import document

In [15]:
import pandas as pd

In [16]:
from langchain_community.document_loaders import DataFrameLoader

In [3]:
import pandas as pd

# Load your CSV file
file_path = "/content/drive/Shareddrives/298A_Team7/data/model_df_balanced.csv"
df = pd.read_csv(file_path)

# Print columns to verify
print(df.columns)

# Randomly sample 1000 records
sampled_df = df[['TEXT', 'readmitted']].sample(n=1000, random_state=42)

# Verify if the sample contains the correct columns
print(sampled_df.head())

Index(['TEXT', 'readmitted'], dtype='object')
                                                    TEXT  readmitted
51320  2127217 132 pm chest preop pa lat clip clip nu...           1
80965  nursing admit note1 infant potential sepsis2 a...           1
4569   2183118 826 chest portable ap clip clip number...           0
11181  sinus rhythmnormal ecg 2112616 643 pm chest po...           0
48414  sinus rhythm ventricular bigeminy previous tra...           1


In [7]:
import pandas as pd

# Load the original CSV file
file_path = "/content/drive/Shareddrives/298A_Team7/data/model_df_balanced.csv"
df = pd.read_csv(file_path)

# Select the first 20,000 records
df_1000 = df.head(1000)

# Save the selected records to a new CSV
output_path = "output_1000_records.csv"
df_1000.to_csv(output_path, index=False)

print(f"New CSV with 1000 records saved to {output_path}")

New CSV with 1000 records saved to output_1000_records.csv


In [4]:
sampled_df

Unnamed: 0,TEXT,readmitted
51320,2127217 132 pm chest preop pa lat clip clip nu...,1
80965,nursing admit note1 infant potential sepsis2 a...,1
4569,2183118 826 chest portable ap clip clip number...,0
11181,sinus rhythmnormal ecg 2112616 643 pm chest po...,0
48414,sinus rhythm ventricular bigeminy previous tra...,1
...,...,...
9023,2129426 141 pm ct head wo contrast clip clip n...,0
69264,2158926 1255 pm ct chest wcontrast ct abdomen ...,1
57035,sinus rhythm first degree atrioventricular con...,1
53022,ccu admission note per report hospital hospita...,1


In [5]:
sampled_df.head()

Unnamed: 0,TEXT,readmitted
51320,2127217 132 pm chest preop pa lat clip clip nu...,1
80965,nursing admit note1 infant potential sepsis2 a...,1
4569,2183118 826 chest portable ap clip clip number...,0
11181,sinus rhythmnormal ecg 2112616 643 pm chest po...,0
48414,sinus rhythm ventricular bigeminy previous tra...,1


In [6]:
# Combine 'TEXT' and 'readmitted' into a single column for document processing (Optional)
sampled_df['combined_text'] = sampled_df.apply(lambda row: f"Clinical Note: {row['TEXT']}, Readmission Status: {row['readmitted']}", axis=1)

# Use 'combined_text' for document loading
docs = sampled_df['combined_text'].tolist()

In [7]:
# Convert the list to LangChain document format
from langchain.docstore.document import Document

# Create a list of Document objects for LangChain
docs = [Document(page_content=text) for text in sampled_df['combined_text']]


In [15]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Split the documents into chunks (300 characters with 50 overlap)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2500, chunk_overlap=50)
chunks = text_splitter.split_documents(docs)

# Verify the number of chunks created
print(f"Total number of chunks: {len(chunks)}")


Total number of chunks: 34616


In [16]:
chunks[0]

Document(metadata={}, page_content='Clinical Note: 2127217 132 pm chest preop pa lat clip clip number radiology 60602 reason bladder mass lap cyst wneobladder final report reason examination preoperative evaluation pa lateral upright chest radiographs reviewed comparison 21261022 ct 21261023 heart size mildly enlarged bilateral significant fat pads present mediastinal position contour width stable multiple pulmonary nodules seen lungs predominantly lower lobes demonstrated prior study 82126 precise comparison chest radiograph chest ct difficult compared prior chest radiograph obtained day chest ct might potentially interval increase evaluation chest ct required precise documentation stability pleural effusion evidence failure acute cardiopulmonary process pneumonia atrial fibrillation rapid ventricular response compared previoustracing 21261022 inferior stt wave changes improved otherwisethere diagnostic interim change critical care staff saw examined mr known lastname 5654 dr first na

In [18]:
from langchain_community.embeddings import HuggingFaceEmbeddings

# Use the HuggingFace embeddings model
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")

# Check if the embeddings model is properly initialized
print("Embeddings model initialized.")


Embeddings model initialized.


In [19]:
# Import the new Chroma class
from langchain.vectorstores import Chroma

# Specify the directory where Chroma will store the vector database
chroma_directory = "/content/drive/Shareddrives/298A_Team7/Bio Mistral Model HF/vector_1k"


In [20]:
from langchain.vectorstores import Chroma

# Create a Chroma vector store using the chunks and embeddings
vectorstore = Chroma.from_documents(chunks, embeddings, persist_directory=chroma_directory)

# Verify if the vector store is created
print("Vector store created and stored at:", chroma_directory)


Vector store created and stored at: /content/drive/Shareddrives/298A_Team7/Bio Mistral Model HF/vector_1k


In [21]:

 # Example query to search the vector store
query = "who is at risk of heart disease"
search_results = vectorstore.similarity_search(query)

# Display the result in markdown format
from IPython.display import Markdown

def to_markdown(text):
    return Markdown(f"> {text}")

# Show the top result
to_markdown(search_results[0].page_content)


> un tpnright hip fracture prosthesismitral valve prolapseper patient extensive cardiac history worked ct chf although patient denies atrial fibrillation coumadin cath 2101 per patient showed cad showed ascar heartrecent stool cards 222 positive bloodcardiac risk factors diabetes dyslipidemiahypertensionsocial historysocial history originally location un 42751 lives hospital1 crossing alf denies tobaccoalcoholillicit drugsfamily historybrother x 2 died mi sister mvpphysical examvs t973 hr95117 bp14094 rr18 o2100raorthostaticsstanding 11574 84 sitting 12070 76 laying 12070 80bpmgen thin elderly female nad oriented x3 mood affectappropriateheent ncat sclera anicteric conjunctiva pink pallorneck supple jvdcv pmi located 5th intercostal space midclavicular lineirregular irregular rhythm mrgchest ctab crackles wheezes rhonchiabd soft ntnd hsm tenderness incisional hernia ntpinpoint fistula expressable dischargeext cceneuro aox3pertinent results2104328 0659pm ckcpk1342104328 0659pm ckmb5 ctropnt0012104328 1125am glucose92 urea n20 creat09 sodium140potassium43 chloride104 total co226 anion gap142104328 1125am ckcpk1432104328 1125am ctropnt0022104328 1125am ckmb72104328 1125am wbc86 rbc403 hgb119 hct346 mcv86mch294 mchc344 rdw1522104328 1125am neuts718 lymphs218 monos50 eos08basos052104328 1125am plt count3692104328 1125am pt229 ptt274 inrpt22brief hospital courseekg demonstrated 2104328 atrial flutter 31 block andoccasional pvcs compared prior atrial flutter asopposed atrial premature beatstelemetry demonstrated rates 80s90s cad cardiac cath 2101 intervention done perpatient although vague history scar heartpatient three sets cardiac enzymes negativeshe complaints chest pain ekgs showed sttwchanges essentially ruled mi pump nl tte 2103617 euvolemic exam rhythm admitted looked like atrial flutterwith 31 block chads2 score 1 ep consulted felt thatshe rhythm controlled could come ofcoumadin given recent guaiac positive stools wasstarted propafenone rhythm control continued hercoumadin episodes tachycardia 180 bpmovernight thus toprol restarted 125mg hospital1 shehad bradycardia long pauses telemetry requiringatropine transferred ccu furthermanagement arrhythmias potentially ppm placementccu stay initially monitored telemetry wasstill pauses tele thought secondary betablockade beta blocker wore hr picked shestopped pauses anxious prior procedure butwent ahead preprocedure dose vancomycin andafter completion flushing forehead scalp sheunderwent procedure without

In [22]:
# Set up the retriever with k=5 (top 5 results)
retriever = vectorstore.as_retriever(search_kwargs={'k': 5})

# Test the retriever
retrieved_docs = retriever.get_relevant_documents(query)
for doc in retrieved_docs:
    to_markdown(doc.page_content)


  retrieved_docs = retriever.get_relevant_documents(query)


In [23]:
from langchain_community.llms import LlamaCpp

# Load the BioMistral model (make sure the path is correct)
llm = LlamaCpp(
    model_path= "/content/drive/Shareddrives/298A_Team7/Bio Mistral Model HF/BioMistral-7B.Q4_K_M.gguf",
    temperature=0.3,
    max_tokens=2048,
    top_p=1
)
# Verify LLM is loaded
print("BioMistral LLM loaded.")

llama_model_loader: loaded meta data with 21 key-value pairs and 291 tensors from /content/drive/Shareddrives/298A_Team7/Bio Mistral Model HF/BioMistral-7B.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = hub
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:

BioMistral LLM loaded.


In [24]:
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain.prompts import ChatPromptTemplate

# Define a prompt template
template = """
<|context|>
You are an AI assistant that follows instructions extremely well.
Please be truthful and give direct answers. also please tell if the patient will be readmitted again or not for sure
</s>
<|user|>
{query}
</s>
<|assistant|>
"""

# Create a prompt using the template
prompt = ChatPromptTemplate.from_template(template)

# Set up the RAG chain
rag_chain = (
    {"context": retriever,  "query": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Test the RAG chain with a query
response = rag_chain.invoke(" A person is suffering from viral fever from past week; his upright chest radiographs are reviewed and it is mildly enlarged size without pericardial effusion;Will he get readmitted or not?")
to_markdown(response)


llama_perf_context_print:        load time =  263703.01 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   113 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    38 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =  942672.68 ms /   151 tokens


> The patient is suffering from viral fever and the radiographs are normal. The chances of readmission are less. Please consult your doctor if you have any other symptoms. Good luck!

In [None]:
import sys

# Continuous input loop for chatbot interaction
while True:
    user_input = input("Input Prompt: ")
    if user_input.lower() == 'exit':
        print('Exiting...')
        sys.exit()
    if user_input.strip() == '':
        continue

    result = rag_chain.invoke(user_input)
    print("Answer: ", result)


Input Prompt: With the assessment indicating an increased risk for sepsis and pending results for blood cultures, should the infant be readmitted if he develops any clinical signs of infection?


Llama.generate: 57 prefix-match hit, remaining 45 prompt tokens to eval
llama_perf_context_print:        load time =  263703.01 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    45 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    55 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time = 1151322.26 ms /   100 tokens


Answer:  The patient is currently in the hospital and has not been discharged yet. If he develops any clinical signs of infection, it is likely that he will be readmitted to the hospital. However, I cannot say for sure if he will be readmitted or not.


In [None]:
!pip install datasets

In [None]:
!pip install evaluate



In [None]:
!pip install rouge_score



In [None]:
import evaluate

# Load the ROUGE metric
rouge = evaluate.load("rouge")

# Example usage
results = rouge.compute(predictions=["The patient is suffering from viral fever and the radiographs are normal. The chances of readmission are less."], references=["Mildly enlarged heart size without pericardial effusion might not indicate severe complications. However, other factors like the patient's overall health, underlying conditions, and response to treatment will influence the likelihood of readmission."])
print(results)


{'rouge1': 0.23529411764705885, 'rouge2': 0.08163265306122448, 'rougeL': 0.23529411764705885, 'rougeLsum': 0.23529411764705885}


In [None]:
import nltk
from nltk.translate.bleu_score import corpus_bleu

# Example reference sentences (ground truth)
references = [
    [""]
]

# Example candidate sentences (generated output)
candidates = [
    [""]
]

# Calculate BLEU score
bleu_score = corpus_bleu([[ref] for ref in references], candidates)

print(f'BLEU score: {bleu_score:.4f}')


In [None]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.38.0-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit)
  Downloading GitPython-3.1.43-py3-none-any.whl.metadata (13 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting watchdog<5,>=2.1.5 (from streamlit)
  Downloading watchdog-4.0.2-py3-none-manylinux2014_x86_64.whl.metadata (38 kB)
Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.19,<4,>=3.0.7->streamlit)
  Downloading gitdb-4.0.11-py3-none-any.whl.metadata (1.2 kB)
Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit)
  Downloading smmap-5.0.1-py3-none-any.whl.metadata (4.3 kB)
Downloading streamlit-1.38.0-py2.py3-none-any.whl (8.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.7/8.7 MB[0m [31m70.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading GitPython-3.1.43-py3-none-any.whl (207 kB)
[2K   [90m━━━━━━━━━━

In [None]:

import streamlit as st
from langchain.vectorstores import Chroma
from langchain_community.llms import LlamaCpp
from langchain.schema.runnable import RunnablePassthrough
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser

In [None]:
# Function to convert text to markdown
def to_markdown(text):
    return f"> {text}"

# Load the BioMistral model
llm = LlamaCpp(
    model_path="/content/drive/Shareddrives/298A_Team7/Bio Mistral Model HF/BioMistral-7B.Q4_K_M.gguf",
    temperature=0.3,
    max_tokens=2048,
    top_p=1
)


llama_model_loader: loaded meta data with 21 key-value pairs and 291 tensors from /content/drive/Shareddrives/298A_Team7/Bio Mistral Model HF/BioMistral-7B.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = hub
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:

In [None]:
# Load the vector store
vectorstore = Chroma(persist_directory="/content/drive/Shareddrives/298A_Team7/Bio Mistral Model HF", embedding_function=None)
retriever = vectorstore.as_retriever(search_kwargs={'k': 5})

# Define a prompt template
template = """
<|context|>
You are an AI assistant that follows instructions extremely well.
Please be truthful and give direct answers. Also please tell if the patient will be readmitted again or not.
</s>
<|user|>
{query}
</s>
<|assistant|>
"""
prompt = ChatPromptTemplate.from_template(template)

# Set up the RAG chain
rag_chain = (
    {"context": retriever, "query": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
# Streamlit app UI
st.title("BioMistral Medical RAG Chatbot")

user_input = st.text_input("Enter your medical query here:")
if st.button("Submit"):
    if user_input.strip() != "":
        response = rag_chain.invoke(user_input)
        st.markdown(to_markdown(response))
    else:
        st.write("Please enter a valid query.")



In [None]:
!pip install streamlit pyngrok langchain langchain_community




In [None]:
from pyngrok import ngrok

# Set your ngrok authtoken
ngrok.set_auth_token("2mgd9wmuUIc1sfjxbQGmSSx8J66_23KrMuJcURM2UJYDkBB9P")

# Create a tunnel to the Streamlit app
public_url = ngrok.connect("8501")
print(f"Access the web app via: {public_url}")

# Run the Streamlit app
!streamlit run /content/drive/Shareddrives/298A_Team7/Bio Mistral Model HF/app.py &


Access the web app via: NgrokTunnel: "https://463b-34-145-74-20.ngrok-free.app" -> "http://localhost:8501"
Usage: streamlit run [OPTIONS] TARGET [ARGS]...
Try 'streamlit run --help' for help.

Error: Streamlit requires raw Python (.py) files, but the provided file has no extension.
For more information, please see https://docs.streamlit.io


In [None]:
from pyngrok import ngrok

# Replace 'YOUR_NGROK_AUTHTOKEN' with your actual authtoken
ngrok.set_auth_token("2mgd9wmuUIc1sfjxbQGmSSx8J66_23KrMuJcURM2UJYDkBB9P")

# Create a tunnel to the Streamlit app
public_url = ngrok.connect("8501")  # Ensure the correct port is used
print(f"Access the web app via: {public_url}")

Access the web app via: NgrokTunnel: "https://ce37-34-145-74-20.ngrok-free.app" -> "http://localhost:8501"


In [None]:
!streamlit run /content/drive/Shareddrives/298A_Team7/Bio Mistral Model HF/app.py &

Usage: streamlit run [OPTIONS] TARGET [ARGS]...
Try 'streamlit run --help' for help.

Error: Streamlit requires raw Python (.py) files, but the provided file has no extension.
For more information, please see https://docs.streamlit.io
