In [1]:
import os
import dotenv
from pydantic import BaseModel, Field
from typing import List, Optional, Literal
from langchain.prompts import PromptTemplate
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import JsonOutputParser


os.environ.clear()
dotenv.load_dotenv()

True

# LLM

In [2]:
local_llm = "llama3.2:latest"
model_tested = "llama3.2:latest"
metadata = f"CRAG, {model_tested}"

# Create Index
Let's index 3 blog posts

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import SKLearnVectorStore
from langchain_nomic.embeddings import NomicEmbeddings  # local
from langchain_openai import OpenAIEmbeddings  # api

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [5]:
# List of URLs to load documents from
urls = [
    "https://lpi.oregonstate.edu/sites/lpi.oregonstate.edu/files/pdf/mic/micronutrients_for_health.pdf?utm_source=chatgpt.com",
    "https://www.accessdata.fda.gov/scripts/InteractiveNutritionFactsLabel/assets/InteractiveNFL_Vitamins%26MineralsChart_October2021.pdf?utm_source=chatgpt.com",
    "https://www.hilarispublisher.com/open-access/essential-nutrients-in-human-body.pdf?utm_source=chatgpt.com",
]

# Load documents from the URLs
docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

# Initialize a text splitter with specified chunk size and overlap
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250, chunk_overlap=0
)

# Split the documents into chunks
doc_splits = text_splitter.split_documents(docs_list)

# # Embedding
embedding=NomicEmbeddings(
    model="nomic-embed-text-v1.5",
    inference_mode="local",
)

# embedding = OpenAIEmbeddings()


# Add the document chunks to the "vector store"
vectorstore = SKLearnVectorStore.from_documents(
    documents=doc_splits,
    embedding=embedding,
)
retriever = vectorstore.as_retriever(k=4)


KeyboardInterrupt: 

Embedding texts:  83%|████████▎ | 27264/32809 [13:03<02:22, 38.87inputs/s]

# Define Tools

In [None]:
class UserProfile(BaseModel):
    name: Optional[str] = Field(None, description="User's full name")
    age: int = Field(..., ge=0, le=120, description="User's age")
    gender: str = Field(..., description="Male, Female, or Other")
    height_cm: int = Field(..., gt=50, lt=250, description="Height in cm")
    weight_kg: float = Field(..., gt=20, lt=300, description="Weight in kg")
    activity_level: str = Field(..., description="Sedentary, Lightly active, Moderately active, Very active, Super active")
    dietary_preferences: List[Literal[
        "Vegetarian", "Vegan", "Pescatarian", "Keto", "Paleo", 
        "Gluten-Free", "Dairy-Free", "Nut-Free", "Halal", "Kosher",
        "Low-Carb", "Low-Fat", "High-Protein", "Mediterranean", "FODMAP", "Sugar-Free"
    ]] = Field(default=[], description="User's dietary preferences, can be one or more.")
    allergies: List[str] = Field(default=[], description="User's allergies")
    health_conditions: List[str] = Field(default=[], description="Any medical conditions")
    weight_goal: str = Field(..., description="Lose weight, Maintain weight, Gain muscle")


user_profile = {
    "name": "Space Cadet",
    "age": 23,
    "gender": "Male",
    "height_cm": 183,
    "weight_kg": 65,
    "activity_level": "Lightly active",
    "dietary_preferences": ["Dairy-Free", "Low-Carb"],
    "allergies": ["Peanuts"],
    "health_conditions": ["None"],
    "weight_goal": "Maintain weight"
}

user = UserProfile(**user_profile)
past_meals=[]
print(user.model_dump_json(indent=4))

{
    "name": "Space Cadet",
    "age": 23,
    "gender": "Male",
    "height_cm": 183,
    "weight_kg": 65.0,
    "activity_level": "Lightly active",
    "dietary_preferences": [
        "Dairy-Free",
        "Low-Carb"
    ],
    "allergies": [
        "Peanuts"
    ],
    "health_conditions": [
        "None"
    ],
    "weight_goal": "Maintain weight"
}


In [None]:
llm = ChatOllama(model="local_llm", format="json", temperature=0)

# Retrieval Prompt Template for Multiple Meal Plans
retriever_prompt = PromptTemplate(
    template="""
    You are a nutritionist AI assistant that helps users generate **personalized meal recommendations** based on their profile.
    
    The user profile is as follows:

    - Age: {age}
    - Gender: {gender}
    - Height: {height_cm} cm
    - Weight: {weight_kg} kg
    - Activity Level: {activity_level}
    - Dietary Preferences: {dietary_preferences}
    - Allergies: {allergies}
    - Health Conditions: {health_conditions}
    - Weight Goal: {weight_goal}
    - Past Meal History (if available): {past_meals}

    ### Task:
    Generate a structured JSON response with **multiple queries** for retrieving meal plans.  
    Each query should focus on **one meal category**:  
    - Breakfast  
    - Lunch  
    - Dinner  
    - Snacks  

    Ensure that meals align with **dietary preferences, allergies, and weight goals** while maintaining **nutritional balance**.

    Your output must be a valid JSON object structured as follows:
    ```json
    {{
        "queries": [
            {{
                "meal_type": "Breakfast",
                "query": "Retrieve high-protein breakfast meals suitable for {gender}, {age} years old, {activity_level} activity, avoiding {allergies}."
            }},
            {{
                "meal_type": "Lunch",
                "query": "Retrieve balanced lunch options with {dietary_preferences} for a {weight_goal} goal, avoiding {allergies}."
            }},
            {{
                "meal_type": "Dinner",
                "query": "Find nutritious dinners for {age}-year-old {gender} aiming to {weight_goal}."
            }},
            {{
                "meal_type": "Snacks",
                "query": "Suggest healthy snack options that fit within a {dietary_preferences} diet while avoiding {allergies}."
            }}
        ]
    }}
    ```
    """,
    input_variables=[
        "age",
        "gender",
        "height_cm",
        "weight_kg",
        "activity_level",
        "dietary_preferences",
        "allergies",
        "health_conditions",
        "weight_goal",
        "past_meals"
    ],
)

# Output Parser
output_parser = JsonOutputParser()

# Function to Generate Multiple Queries
def generate_retrieval_queries(user_profile):
    formatted_prompt = retriever_prompt.format(**user_profile)
    response = llm.invoke(formatted_prompt)
    return output_parser.parse(response)

# Example User Profile
user_profile = {
    "age": 28,
    "gender": "Male",
    "height_cm": 183,
    "weight_kg": 65,
    "activity_level": "Lightly active",
    "dietary_preferences": [],
    "allergies": ["Peanuts"],
    "health_conditions": ["None"],
    "weight_goal": "Maintain weight",
    "past_meals": ["Oatmeal with fruits", "Grilled chicken with rice", "Salmon with vegetables"],
}

# Generate queries
queries = generate_retrieval_queries(user_profile)
print(queries)

OllamaEndpointNotFoundError: Ollama call failed with status code 404. Maybe your model is not found and you should pull the model with `ollama pull local_llm`.