# Build a chatbot that gives advice on cardiovascular risk

# Libraries and load secrets

In [None]:
# %%bash
# uv pip install -q llama-index-core
# uv pip install -q llama-index-llms-groq
# uv pip install -q llama-index-readers-file
# uv pip install -q llama-index-embeddings-huggingface
# uv pip install -q llama-index-embeddings-instructor
# !pip install pypdf

In [None]:
import os
import keys

# Set the token as an environ variable
os.environ["GROQ_API_KEY"] = keys.GROQ_API_KEY#userdata.get("GROQ_API_KEY")
os.environ["HF_TOKEN"] = keys.HF_TOKEN #userdata.get("HF_TOKEN")

# Run only for setup

## Store relevant data
Add all relevant data (.txt, .pdf etc) to contents/data manually

## Load the data

In [None]:
# from llama_index.core import SimpleDirectoryReader
# from llama_index.core import VectorStoreIndex
# from llama_index.core.node_parser import SentenceSplitter

# # load in the documents
# documents = SimpleDirectoryReader("./content/data", required_exts=[".pdf"]).load_data(
#     show_progress=True
# )

## Splitting the document

In [None]:
# from llama_index.core.node_parser import SentenceSplitter

# text_splitter = SentenceSplitter(chunk_size=800, chunk_overlap=150)

# docs = text_splitter.get_nodes_from_documents(documents)

## Creating vectors with embeddings

In [None]:
# from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# # embeddings
# embedding_model = "danielheinz/e5-base-sts-en-de"#sentence-transformers/all-MiniLM-L6-v2" # 
# # switched to work for german texts based on discussion https://discuss.huggingface.co/t/rag-embeddings-german-language/60840/4
# embeddings_folder = "./content/embedding_model/" # if you're working locally instead of on colab

# embeddings = HuggingFaceEmbedding(
#     model_name=embedding_model, cache_folder=embeddings_folder
# )

## Create vector database

In [None]:
# from llama_index.core import VectorStoreIndex

# vector_index = VectorStoreIndex.from_documents(
#     documents, transformations=[text_splitter], embed_model=embeddings
# )

In [None]:
# vector_index.storage_context.persist(persist_dir="./content/vector_index")

# RAG - chatbot

In [None]:
from llama_index.llms.groq import Groq
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import StorageContext, load_index_from_storage
from llama_index.core.chat_engine import ContextChatEngine
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.core.base.llms.types import ChatMessage, MessageRole

model = "allam-2-7b"#"llama-3.3-70b-versatile"

llm = Groq(
    model=model,
    token=os.environ.get("GROQ_API_KEY"),
)

embedding_model = "danielheinz/e5-base-sts-en-de"#sentence-transformers/all-MiniLM-L6-v2" # 
# switched to work for german texts based on discussion https://discuss.huggingface.co/t/rag-embeddings-german-language/60840/4
embeddings_folder = "./content/embedding_model/"

embeddings = HuggingFaceEmbedding(
    model_name=embedding_model, cache_folder=embeddings_folder
)

storage_context = StorageContext.from_defaults(persist_dir="./content/vector_index")
vector_index = load_index_from_storage(storage_context, embed_model=embeddings)
retriever = vector_index.as_retriever(similarity_top_k=2) # answes get better with higher number, but also limit of tokens is more quickly reached

prefix_messages = [
    ChatMessage(
        role=MessageRole.SYSTEM,
        content="You are a nice chatbot having a conversation with a human",
    ),
    ChatMessage(
        role=MessageRole.SYSTEM,
        content="Explain it like I am 15",#Use easy language and short sentences. ",
    ),
    ChatMessage(
        role=MessageRole.SYSTEM,
        content="Be appreciative, supporting, encouraging and friendly.",
    ),
    ChatMessage(
        role=MessageRole.SYSTEM,
        content="Answer the question based only on the following context and previous conversation.",
    ),
    ChatMessage(
        role=MessageRole.SYSTEM, content="Keep your answers short and succinct."
    ),
]

memory = ChatMemoryBuffer.from_defaults()

rag_bot = ContextChatEngine(
    llm=llm, retriever=retriever, memory=memory, prefix_messages=prefix_messages
)

In [10]:
bot_2 = ContextChatEngine(
    llm=llm,
    retriever=retriever,
    memory=ChatMemoryBuffer.from_defaults(),
    prefix_messages=prefix_messages,
)

# Start the conversation loop
while True:
    user_input = input("You: ")

    # Check for exit condition
    if user_input.lower() == "end":
        print("Ending the conversation. Goodbye!")
        break

    # Get the response from the conversation chain
    response = bot_2.chat(user_input)
    # Print the chatbot's response
    print(response.response)

Heart disease, also known as cardiovascular disease (CVD), can develop over time due to various factors. Here's a simplified explanation:

1. High blood pressure: Keeping your blood pressure under control is important to prevent heart disease.
2. Unhealthy diet: Eating a balanced diet with plenty of fruits, vegetables, and whole grains can help protect your heart.
3. Physical inactivity: Regular exercise is essential for heart health. Aim for at least 150 minutes of moderate-intensity activity per week.
4. Smoking: Quitting smoking can significantly reduce your risk of heart disease.
5. High cholesterol: Maintaining healthy cholesterol levels is crucial for heart health.
6. Obesity: Staying at a healthy weight reduces the risk of heart disease.
7. Diabetes: Controlling blood sugar levels is vital for heart health.
8. Stress: Managing stress through relaxation techniques can help protect your heart.
9. Sleep apnea: Getting enough good-quality sleep is important for heart health.
10. Fam