# Build a chatbot that gives advice on cardiovascular risk

# Libraries and load secrets

In [None]:
# %%bash
# uv pip install -q llama-index-core
# uv pip install -q llama-index-llms-groq
# uv pip install -q llama-index-readers-file
# uv pip install -q llama-index-embeddings-huggingface
# uv pip install -q llama-index-embeddings-instructor
# !pip install pypdf

In [None]:
import os
import keys
import gradio as gr

# Set the token as an environ variable
os.environ["GROQ_API_KEY"] = keys.GROQ_API_KEY#userdata.get("GROQ_API_KEY")
os.environ["HF_TOKEN"] = keys.HF_TOKEN #userdata.get("HF_TOKEN")

# Run only for setup

## Store relevant data
Add all relevant data (.txt, .pdf etc) to contents/data manually

## Load the data

In [None]:
# from llama_index.core import SimpleDirectoryReader
# from llama_index.core import VectorStoreIndex
# from llama_index.core.node_parser import SentenceSplitter

# # load in the documents
# documents = SimpleDirectoryReader("./content/data", required_exts=[".pdf"]).load_data(
#     show_progress=True
# )

## Splitting the document

In [None]:
# from llama_index.core.node_parser import SentenceSplitter

# text_splitter = SentenceSplitter(chunk_size=800, chunk_overlap=150)

# docs = text_splitter.get_nodes_from_documents(documents)

## Creating vectors with embeddings

In [None]:
# from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# # embeddings
# embedding_model = "danielheinz/e5-base-sts-en-de"#sentence-transformers/all-MiniLM-L6-v2" # 
# # switched to work for german texts based on discussion https://discuss.huggingface.co/t/rag-embeddings-german-language/60840/4
# embeddings_folder = "./content/embedding_model/" # if you're working locally instead of on colab

# embeddings = HuggingFaceEmbedding(
#     model_name=embedding_model, cache_folder=embeddings_folder
# )

## Create vector database

In [None]:
# from llama_index.core import VectorStoreIndex

# vector_index = VectorStoreIndex.from_documents(
#     documents, transformations=[text_splitter], embed_model=embeddings
# )

In [None]:
# vector_index.storage_context.persist(persist_dir="./content/vector_index")

# RAG - chatbot

In [None]:
from llama_index.llms.groq import Groq
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import StorageContext, load_index_from_storage
from llama_index.core.chat_engine import ContextChatEngine
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.core.base.llms.types import ChatMessage, MessageRole

model = "llama-3.3-70b-versatile"#"allam-2-7b"#

llm = Groq(
    model=model,
    #max_tokens=100, #for allam as it is unable to be concise (as prompted)
    token=os.environ.get("GROQ_API_KEY"),
)

embedding_model = "danielheinz/e5-base-sts-en-de"#sentence-transformers/all-MiniLM-L6-v2" # 
# switched to work for german texts based on discussion https://discuss.huggingface.co/t/rag-embeddings-german-language/60840/4
embeddings_folder = "./content/embedding_model/"

embeddings = HuggingFaceEmbedding(
    model_name=embedding_model, cache_folder=embeddings_folder
)

storage_context = StorageContext.from_defaults(persist_dir="./content/vector_index")
vector_index = load_index_from_storage(storage_context, embed_model=embeddings)
retriever = vector_index.as_retriever(similarity_top_k=2) # answes get better with higher number, but also limit of tokens is more quickly reached

prefix_messages = [
    ChatMessage(
        role=MessageRole.SYSTEM,
        content='''
        Context: You are a friendly, supportive health assistant helping adults understand heart risk based on self-reported lifestyle factors (smoking, diet, exercise). You can reference clinical risk factors (blood pressure, cholesterol, family history) and lifestyle changes. Risk comes from an external ML model as “low,” “medium,” or “high.”
        Objective: Explain lifestyle risk factors clearly and briefly, provide context about clinical factors, interpret risk categories, encourage achievable lifestyle changes, and advise seeing a doctor if risk is high or users ask clinical questions.
        Behavior:
        Keep answers concise (max 2 sentences).
        Do not give medical diagnoses or instructions.
        Highlight achievable lifestyle changes.
        Include a short motivational nudge in every answer.
        If a user asks about symptoms, medications, or is high risk, politely advise consulting a healthcare professional.
        Remind users this tool is not a substitute for a doctor.
        Tone:Friendly, encouraging, clear, concise, balanced between conversational and professional.'''
     ),
    ChatMessage(
        role=MessageRole.SYSTEM,
        content="Answer the question based only on the following context and previous conversation.",
    ),
]

memory = ChatMemoryBuffer.from_defaults()

rag_bot = ContextChatEngine(
    llm=llm, retriever=retriever, memory=memory, prefix_messages=prefix_messages
)

In [None]:
bot_2 = ContextChatEngine(
    llm=llm,
    retriever=retriever,
    memory=ChatMemoryBuffer.from_defaults(),
    prefix_messages=prefix_messages,
)

# Start the conversation loop
while True:
    user_input = input("You: ")

    # Check for exit condition
    if user_input.lower() == "end":
        print("Ending the conversation. Goodbye!")
        break

    # Get the response from the conversation chain
    response = bot_2.chat(user_input)
    # Print the chatbot's response
    print(response.response)

# Create and Interface and make RAG chatbot available online

### Wrapper Function

In [None]:
def rag_bot_chat_interface(message, history):
    # Call the chat engine with the new message
    # The engine's internal memory handles the history
    response = rag_bot.chat(message)

    # Return the response text
    return str(response)

### Interface

In [None]:

theme = gr.themes.Soft(
    primary_hue="rose",
    secondary_hue="rose",
    neutral_hue="rose",
)
demo_chat_interface = gr.ChatInterface(
    fn=rag_bot_chat_interface,
    title="Love Your Heart",
    description="Ask me questions. I'll use my knowledge to respond.",
    theme=theme,#"soft",
    # We can add example questions for the user to click on
    examples=["What are risk factors for heart disease?", "How can I decrease my risk?", "Where can I find help?"]
)

demo_chat_interface.launch() #share=True

# Using users self-reported data

### Callback function

In [None]:
def custom_rag_bot_callback(message, chat_history, bmi_value):

    # 1. Dynamically update the retriever with the new k value from the slider
    # We cast to int() because the slider might pass a float
    retriever = vector_index.as_retriever(similarity_top_k=2)

    # 2. Retrieve the context
    nodes = retriever.retrieve(message)
    context_str = "\n\n".join([n.get_content() for n in nodes])

    # 3. Create a clean prompt including the context and the user's message
    # We add our system prompts manually here
    full_prompt = ('''
        Context: You are a friendly, supportive health assistant helping adults understand heart risk based on self-reported lifestyle factors (smoking, diet, exercise). You can reference clinical risk factors (blood pressure, cholesterol, family history) and lifestyle changes. Risk comes from an external ML model as “low,” “medium,” or “high.”
        Objective: Explain lifestyle risk factors clearly and briefly, provide context about clinical factors, interpret risk categories, encourage achievable lifestyle changes, and advise seeing a doctor if risk is high or users ask clinical questions.
        Behavior:
        Keep answers concise and succinct (1-2 sentences).
        Do not give medical diagnoses or instructions.
        Highlight achievable lifestyle changes.
        Include a short motivational nudge in every answer.
        If a user asks about symptoms, medications, or is high risk, politely advise consulting a healthcare professional.
        Remind users this tool is not a substitute for a doctor.
        Tone:Friendly, encouraging, clear, concise, balanced between conversational and professional.'''        
        "---------------------\n"
        "BMI: " + str(bmi_value) +"\n"
        "Context: " + context_str + "\n"
        "Question: " + message
    )

    # 4. Generate the response (using the base LLM, not the chat engine)
    response_obj = llm.complete(full_prompt)
    bot_response = str(response_obj)

    # 5. Update the chat history and return it to the chatbot component
    # We must append the new [user_message, bot_response] pair
    chat_history.append([message, bot_response])

    # Gradio's gr.Chatbot component expects the *entire* updated history list to be returned.
    return chat_history

### Building the UI with Blocks

In [None]:
theme = gr.themes.Soft(
    primary_hue="rose",
    secondary_hue="rose",
    neutral_hue="rose",
)
with gr.Blocks(title="Custom RAG Bot UI", theme=theme) as demo_custom:
    gr.Markdown("<h1>Love your heart</h1>")

    with gr.Row():
        # --- Sidebar/Parameter Column (scale=1 means it takes 1/5 of the width) ---
        with gr.Column(scale=1, min_width=250):
            gr.Markdown("## Your data")
            top_k_slider = gr.Slider(
                minimum=15,
                maximum=45,
                step=1,
                value=30,
                label="BMI",
                #info="Controls how many relevant documents are retrieved."
            )

        # --- Main Chat Column (scale=4 means it takes 4/5 of the width) ---
        with gr.Column(scale=4):
            # The Chatbot component displays the history
            chatbot = gr.Chatbot(
                label="AI Assistant",
                height=500,
                # Provide an initial welcome message
                value=[[None, "Hello! I am your AI assistant. Ask me questions."]]
            )

            # The Textbox for user input
            msg_input = gr.Textbox(
                label="Your Question",
                placeholder="Ask me something about heart health..."
            )

            # The submission event
            # When the user presses Enter in 'msg_input'...
            msg_input.submit(
                fn=custom_rag_bot_callback,  # ...call our function
                # Pass in the current message, the chatbot history, and the slider value
                inputs=[msg_input, chatbot, top_k_slider],
                # The function's return value updates the chatbot component
                outputs=[chatbot]
            ).then(
                # After submit, clear the textbox. We do this in a separate .then() step.
                fn=lambda: None, # A simple function that returns None
                inputs=None,
                outputs=[msg_input], # The None value clears the textbox
                queue=False # Run this step immediately
            )

# 3. Launch the Demo
demo_custom.launch()