# 🍎 Personal Wellness RAG Agent

## 📦 Setup and Installation

Let's start by installing and importing all the libraries we'll need for our NewsBot system.


In [None]:
!pip install langchain langchain_community langchain_openai langgraph pypdf pydantic python-docx faiss-cpu docx2txt



In [None]:
import os
from pathlib import Path
from langgraph.graph import StateGraph, END, START
from langchain_openai import ChatOpenAI
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.caches import InMemoryCache
from langchain_core.globals import set_llm_cache
from langchain_core.documents import Document
from langchain_community.document_loaders import Docx2txtLoader
from langchain_community.document_loaders.pdf import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from typing import List, Dict, Optional
from pydantic import BaseModel

os.environ["OPENAI_API_KEY"] = ""

## Create Prompts

In [None]:
class GenerationPrompts():
    INPUT_VALIDATION_SYSTEM = """ You are a meticulous reviewer of words and phrases. """

    @staticmethod
    def input_validation_user(query: str) -> str:
        return f""" Topic: {query}
                Determine whether the query is inappropriate or contains harmful requests.

                Only respond with True if it does or False if it doesnt

                """

    BIODATA_SYNOPSIS_SYSTEM = """ You are a health specialist. Extract important information that
                               could be used to provide valuable advice about user issues.
                               """

    @staticmethod
    def biodata_synopsis_user(heart_rate: str, mood: str, did_exercise: str, sleep_description: str, docs: Dict[str, List[Document]]) -> str:
        return f""" User Biometric Data:
                - Heart rate: {heart_rate}
                - Heart rate content: {docs["heart_rate"]}

                - Mood: {mood}
                - Mood content: {docs["mood"]}

                - Did user exercise today?: {did_exercise}
                - Exercise content: {docs["did_exercise"]}

                - How user slept: {sleep_description}
                - Sleep content: {docs["sleep_description"]}

                Using the above user biometrics and their respective content provide a brief synopsis of the users health.
                Provide either motivation or concern depending on users biometric data and add any information from the content
                data that might be valuable to the user.

                """

    ADVICE_GENERATION_SYSTEM = """ You are a medical professional providing motivational or concerned advice on patient health
                               data and queries.
                               """

    @staticmethod
    def advice_generation_user(query_docs: List[Document], query: str, synopsis: str) -> str:
        return f"""Query: {query}
                Content: {query_docs}

                If there is a positive affirmation in the content add it here. Only add the phrase.
                if there are breathing exercises in the content choose one at random and add it here. Should be in a numbered step by step list.

                if the content is not about positive affirmations provide numbered snippets containing some professional wellness advice to answer the users query.

                Synopsis: {synopsis}

                Next provide a summary based on the users synopsis
                """

## Create State

In [None]:
# Create User State
class UserState(BaseModel):
    query: str
    heart_rate: str
    mood: str
    did_exercise: str
    sleep_description: str
    synopsis: Optional[str] = None
    retrieved_docs: Optional[List[Document]] = None
    docs_dict: Optional[Dict[str, List[Document]]] = None
    advice: Optional[str] = None

## Create Workflow

In [None]:
class Workflow:
    def __init__(self):
        self.llm = ChatOpenAI(model="gpt-4o", temperature=0.1)
        self.prompts = GenerationPrompts()
        self.workflow = self._build_workflow()
        self.cache = InMemoryCache()
        set_llm_cache(self.cache)

    # Create LangGraph workflow
    def _build_workflow(self):
        graph = StateGraph(UserState)
        graph.add_node("check_input", self.input_validation)
        graph.add_node("load_add_docs", self.load_add_documents)
        graph.add_node("get_synopsis", self.get_bio_synopsis)
        graph.add_node("get_advice", self.generate_advice)
        graph.add_edge(START, "load_add_docs")
        graph.add_conditional_edges("load_add_docs", self.input_validation)
        graph.add_edge("get_synopsis", "get_advice")
        graph.add_edge("get_advice", END)
        return graph.compile()

    # Check input for inappropriate or harmful requests
    def input_validation(self, state: UserState):
        """
        Check the user's query for inappropriate or harmful content

        Args:
            state: The agent's saved information.

        Returns:
            String: The next node the agent should use.
        """
        print("\n")
        print("Checking input for inappropriate or harmful requests...\n")
        messages = [
            SystemMessage(content=self.prompts.INPUT_VALIDATION_SYSTEM),
            HumanMessage(content=self.prompts.input_validation_user(state.query))
        ]
        response = self.llm.invoke(messages)

        # Check if the LLM considers the query to have inappropriate or harmful requests
        if response.content == "True":
            print("This prompt is innappropriate or contains a harmful request.")
            return "END"
        return "get_synopsis"

    # Load documents (pdf, docx, doc), preprocess, and add to vector store
    def load_add_documents(self, state: UserState):
        """
        Load documents (pdf, docx, doc), preprocess, and add to vector store

        Args:
            state: The agent's saved information.

        Returns:
            Object: Documents retrieved from the vector store.
        """
        docs = []
        embeddings = OpenAIEmbeddings()

        # load documents
        print("\n")
        print("Loading Documents...")
        for file_path in Path("/content/drive/MyDrive/RAG_Documents").iterdir():
          print(f"\tFolder: {file_path}")
          for file_path_next in Path(file_path).iterdir():
              print(f"\t\tLoading: {file_path_next}")
              if file_path_next.suffix.lower() == ".pdf":
                  loader = PyPDFLoader(str(file_path_next))
                  docs.extend(loader.load())
              elif file_path_next.suffix.lower() == ".docx":
                  loader = Docx2txtLoader(str(file_path_next))
                  docs.extend(loader.load())

        # split documents into chunks
        print("\n")
        print(f"Total Documents Loaded: {len(docs)}")
        print("Processing Documents...")
        splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
        chunks = splitter.split_documents(docs)
        print(f"\nTotal Chunks: {len(chunks)}")

        # embed documents then add them to the vector store
        print("Creating Embeddings...")
        print("Adding Documents to Vector Store...")
        vector_store = FAISS.from_documents(chunks, embeddings)

        # user biometrics
        user_biometrics = [("heart_rate", state.heart_rate), ("mood", state.mood), ("did_exercise", state.did_exercise), ("sleep_description", state.sleep_description)]

        userdata_docs = {}

        retriever = vector_store.as_retriever(search_kwargs={"k": 20})

        # query based on user biometrics
        for data in user_biometrics:
            if data[0] == "heart_rate":
                retrieved_docs = retriever.invoke(f"my heart rate is {data[1]}")
                userdata_docs["heart_rate"] = retrieved_docs
            elif data[0] == "mood":
                retrieved_docs = retriever.invoke(f"I am {data[1]} today")
                userdata_docs["mood"] = retrieved_docs
            elif data[0] == "did_exercise":
                retrieved_docs = retriever.invoke(f"benefits of exercise")
                userdata_docs["did_exercise"] = retrieved_docs
            elif data[0] == "sleep_description":
                retrieved_docs = retriever.invoke(data[1])
                userdata_docs["sleep_description"] = retrieved_docs

        # make sure to get breathing exercises
        print("Retrieving Queried Documents...\n")
        if "stress" in state.query:
          retrieved_docs_1 = retriever.invoke("breathing exercises")
        elif "breathing" in state.query:
          retrieved_docs_1 = retriever.invoke("breathing exercises")
        elif "anxiety" in state.query:
          retrieved_docs_1 = retriever.invoke("breathing exercises")
        else:
          retrieved_docs = retriever.invoke(state.query)

        userdata_docs["query"] = retrieved_docs

        return {"retrieved_docs": retrieved_docs, "docs_dict": userdata_docs}

    # Get a structured synopsis from llm
    def get_bio_synopsis(self, state: UserState):
        """
        Get a structured synopsis from llm using content retrieved from the user's biometrics

        Args:
            state: The agent's saved information.

        Returns:
            String: LLM generated response.
        """
        bio_docs = state.docs_dict

        messages = [
            SystemMessage(content=self.prompts.BIODATA_SYNOPSIS_SYSTEM),
            HumanMessage(content=self.prompts.biodata_synopsis_user(state.heart_rate, state.mood, state.did_exercise, state.sleep_description, bio_docs))
        ]

        # get llm response
        response = self.llm.invoke(messages)
        # show bio response
        print(f"\nBio Synopsis:\n{response.content}\n")
        return {"synopsis": response.content}

    def generate_advice(self, state: UserState):
        """
        Get structured advice response from llm using content retrieved from the user's query and the user's synopsis

        Args:
            state: The agent's saved information.

        Returns:
            String: LLM generated response.
        """

        messages = [
            SystemMessage(content=self.prompts.ADVICE_GENERATION_SYSTEM),
            HumanMessage(content=self.prompts.advice_generation_user(state.retrieved_docs, state.query, state.synopsis))
        ]

        # get llm response
        response = self.llm.invoke(messages)
        return {"advice": response.content}



    def run(self, query: str, heart_rate: str, mood: str, did_exercise: str, sleep_description: str) -> UserState:
        """
        Run the agent

        Args:
            topic: The users query to be searched.

        Returns:
            Object: The final agent state
        """
        starting_state = UserState(query=query, heart_rate=heart_rate, mood=mood, did_exercise=did_exercise, sleep_description=sleep_description)
        finished_state = self.workflow.invoke(starting_state)
        return UserState(**finished_state)

## Run Program

In [None]:
def main():
    workflow = Workflow()
    print("Personal Wellness Agent")

    while True:
        query = input("\nWhat can I help you with?: ")

        # Check if the user wants to quit the agent or make another query
        if query.lower() in {"quit", "exit"}:
            break

        print("\n🔍 Ok, lets get some Biometric data first: ")

        # Get user biometrics
        heart_rate = input("\n\t What is your current heartrate?: ")
        mood = input("\t What is your current mood. (Happy, Sad, Depressed): ")
        did_exercise = input("\t Did you exercise today? (Yes, No): ")
        sleep_description = input("\t How did you sleep last night?: ")

        print("\n\nAwesome, thank you for that information!\n\n")

        # If there is a query run the agent
        if query:
            result = workflow.run(query, heart_rate, mood, did_exercise, sleep_description)
            print(f"\nLets see if I can help you out...")
            print("=" * 60)

            # Print final report
            if result.advice:
                print("My advice: ")
                print("-" * 40)
                print(f"{result.advice}")

main()

Personal Wellness Agent

What can I help you with?: stress break

🔍 Ok, lets get some Biometric data first: 

	 What is your current heartrate?: 85
	 What is your current mood. (Happy, Sad, Depressed): Sad
	 Did you exercise today? (Yes, No): No
	 How did you sleep last night?: I didn't sleep good


Awesome, thank you for that information!




Loading Documents...
	Folder: /content/drive/MyDrive/RAG_Documents/Motivation & affirmations
		Loading: /content/drive/MyDrive/RAG_Documents/Motivation & affirmations/Motivation_Affirmations_Self_Determination.docx
		Loading: /content/drive/MyDrive/RAG_Documents/Motivation & affirmations/Affirmations .docx
		Loading: /content/drive/MyDrive/RAG_Documents/Motivation & affirmations/Motivation_Affimrations_PODCAST_How to feel hopeful.docx
		Loading: /content/drive/MyDrive/RAG_Documents/Motivation & affirmations/Motivation_Affirmations_Being More Optimistic Could Add Years to Your Life.docx
		Loading: /content/drive/MyDrive/RAG_Documents/Motivation & 

  embeddings = OpenAIEmbeddings()


		Loading: /content/drive/MyDrive/RAG_Documents/Motivation & affirmations/Motivation_Affirmations_How to Create the Right Environment for Students to Develop a Growth Mindset.docx
		Loading: /content/drive/MyDrive/RAG_Documents/Motivation & affirmations/Motivation_affirmations_How Gratitude Makes You Happier.docx
		Loading: /content/drive/MyDrive/RAG_Documents/Motivation & affirmations/Positive_Affirmations.gdoc
	Folder: /content/drive/MyDrive/RAG_Documents/Wellness Agent Data_General_Knowledge_base
		Loading: /content/drive/MyDrive/RAG_Documents/Wellness Agent Data_General_Knowledge_base/Wellness Agent Data Acquisition_.docx
	Folder: /content/drive/MyDrive/RAG_Documents/Sleep issues & remedies
		Loading: /content/drive/MyDrive/RAG_Documents/Sleep issues & remedies/Sleep issues & remedies_Sleep_Hygiene.docx
		Loading: /content/drive/MyDrive/RAG_Documents/Sleep issues & remedies/Sleep issues & remedies_Cant sleep_what to do.docx
		Loading: /content/drive/MyDrive/RAG_Documents/Sleep issu