# Build a chatbot that gives advice on cardiovascular risk

# Libraries and load secrets

In [None]:
# %%bash
# uv pip install -q llama-index-core
# uv pip install -q llama-index-llms-groq
# uv pip install -q llama-index-readers-file
# uv pip install -q llama-index-embeddings-huggingface
# uv pip install -q llama-index-embeddings-instructor
# !pip install pypdf

In [1]:
import os
import keys

# Set the token as an environ variable
os.environ["GROQ_API_KEY"] = keys.GROQ_API_KEY#userdata.get("GROQ_API_KEY")
os.environ["HF_TOKEN"] = keys.HF_TOKEN #userdata.get("HF_TOKEN")

## Store relevant data
Add all relevant data (.txt, .pdf etc) to contents/data manually

## Setting up LLM

In [2]:
from llama_index.llms.groq import Groq

# This info's at the top of each HuggingFace model page
model = "llama-3.3-70b-versatile"

llm = Groq(
    model=model,
    api_key=os.environ.get(
        "GROQ_API_KEY"
    ),  # you can also enter your API key here, either hard-coded or read from another file
)

  from .autonotebook import tqdm as notebook_tqdm


## Load the data

In [27]:
from llama_index.core import SimpleDirectoryReader
from llama_index.core import VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter

# load in the documents
documents = SimpleDirectoryReader("./content/data", required_exts=[".pdf"]).load_data(
    show_progress=True
)

Loading files: 100%|██████████| 3/3 [00:12<00:00,  4.08s/it]


## Splitting the document

In [28]:
from llama_index.core.node_parser import SentenceSplitter

text_splitter = SentenceSplitter(chunk_size=800, chunk_overlap=150)

docs = text_splitter.get_nodes_from_documents(documents)

# Creating vectors with embeddings

In [5]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# embeddings
embedding_model = "danielheinz/e5-base-sts-en-de"#sentence-transformers/all-MiniLM-L6-v2" # 
# switched to work for german texts based on discussion https://discuss.huggingface.co/t/rag-embeddings-german-language/60840/4
embeddings_folder = "./content/embedding_model/" # if you're working locally instead of on colab

embeddings = HuggingFaceEmbedding(
    model_name=embedding_model, cache_folder=embeddings_folder
)

## Create vector database

In [30]:
from llama_index.core import VectorStoreIndex

vector_index = VectorStoreIndex.from_documents(
    documents, transformations=[text_splitter], embed_model=embeddings
)

In [31]:
vector_index.storage_context.persist(persist_dir="./content/vector_index")

In [6]:
from llama_index.core import StorageContext, load_index_from_storage

storage_context = StorageContext.from_defaults(persist_dir="./content/vector_index")
vector_index = load_index_from_storage(storage_context, embed_model=embeddings)

### 3.6.&nbsp; Adding a prompt
We can guide our model's behavior with a prompt, similar to how we gave instructions to the chatbot.
> Google have a good page about [prompting best practices](https://ai.google.dev/docs/prompt_best_practices).

In [7]:
from llama_index.core.prompts import PromptTemplate

input_template = """Here is the context:
{context_str}

Answer the question based only on the following context. 
Use easy language and short sentences. Be encouraging and friendly.
Keep your answers short and succinct. 
Question to be answered: {query_str}
Answer:"""

prompt = PromptTemplate(template=input_template)

## RAG - chatbot

In [25]:
from llama_index.llms.groq import Groq
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import StorageContext, load_index_from_storage
from llama_index.core.chat_engine import ContextChatEngine
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.core.base.llms.types import ChatMessage, MessageRole

model = "llama-3.3-70b-versatile"

llm = Groq(
    model=model,
    token=os.environ.get("GROQ_API_KEY"),
)

embedding_model = "danielheinz/e5-base-sts-en-de"#sentence-transformers/all-MiniLM-L6-v2" # 
# switched to work for german texts based on discussion https://discuss.huggingface.co/t/rag-embeddings-german-language/60840/4
embeddings_folder = "./content/embedding_model/"

embeddings = HuggingFaceEmbedding(
    model_name=embedding_model, cache_folder=embeddings_folder
)

storage_context = StorageContext.from_defaults(persist_dir="./content/vector_index")
vector_index = load_index_from_storage(storage_context, embed_model=embeddings)
retriever = vector_index.as_retriever(similarity_top_k=6)

prefix_messages = [
    ChatMessage(
        role=MessageRole.SYSTEM,
        content="You are a nice chatbot having a conversation with a human",
    ),
    ChatMessage(
        role=MessageRole.SYSTEM,
        content="Explain it like I am 15",#Use easy language and short sentences. ",
    ),
    ChatMessage(
        role=MessageRole.SYSTEM,
        content="Be appreciative, supporting, encouraging and friendly.",
    ),
    ChatMessage(
        role=MessageRole.SYSTEM,
        content="Answer the question based only on the following context and previous conversation.",
    ),
    ChatMessage(
        role=MessageRole.SYSTEM, content="Keep your answers short and succinct."
    ),
]

memory = ChatMemoryBuffer.from_defaults()

rag_bot = ContextChatEngine(
    llm=llm, retriever=retriever, memory=memory, prefix_messages=prefix_messages
)

In [24]:
bot_2 = ContextChatEngine(
    llm=llm,
    retriever=retriever,
    memory=ChatMemoryBuffer.from_defaults(),
    prefix_messages=prefix_messages,
)

# Start the conversation loop
while True:
    user_input = input("You: ")

    # Check for exit condition
    if user_input.lower() == "end":
        print("Ending the conversation. Goodbye!")
        break

    # Get the response from the conversation chain
    response = bot_2.chat(user_input)
    # Print the chatbot's response
    print(response.response)

People get heart disease because of some things they can't control and some things they can. The things they can control are like eating too much bad food, not exercising, smoking, and being too heavy. These things can hurt their heart and make them sick. But don't worry, we can all try to make good choices to keep our hearts healthy!


Retrying llama_index.llms.openai.base.OpenAI._chat in 1.0 seconds as it raised RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01ka8k4788eywsnjb4st7ckgky` service tier `on_demand` on tokens per day (TPD): Limit 100000, Used 99949, Requested 3399. Please try again in 48m12.672s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}.
Retrying llama_index.llms.openai.base.OpenAI._chat in 1.3263949008507432 seconds as it raised RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01ka8k4788eywsnjb4st7ckgky` service tier `on_demand` on tokens per day (TPD): Limit 100000, Used 99948, Requested 3399. Please try again in 48m11.808s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code'

RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01ka8k4788eywsnjb4st7ckgky` service tier `on_demand` on tokens per day (TPD): Limit 100000, Used 99947, Requested 3399. Please try again in 48m10.943999999s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}