# Dependencies

In [None]:
import os
import logging
import json
import chromadb

from llm_agent_toolkit.loader import PDFLoader  # type: ignore
from llm_agent_toolkit.encoder import OllamaEncoder  # type: ignore
from llm_agent_toolkit.memory import ChromaMemory  # type: ignore
from llm_agent_toolkit._memory import ShortTermMemory  # type: ignore
from llm_agent_toolkit.core.local import Image_to_Text  # type: ignore
from llm_agent_toolkit import ChatCompletionConfig  # type: ignore
from agents import EInvoiceAgent
from tools import DDGSearch

## Setup Logger

In [None]:
logging.basicConfig(
    filename="./output/log/cookbook-1.log",
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
)
logger = logging.getLogger(__name__)

## Build Knowledge Base

In [None]:
def build_knowledge_base(
    mem: ChromaMemory, ldr: PDFLoader, src_dir: str, web_search: DDGSearch, topic: str
):
    logger.info("Building knowledge base...")
    files = os.listdir(src_dir)
    for f in files:
        filepath = os.path.join(src_dir, f)
        logger.info("Loading from %s", filepath)
        f_content = ldr.load(input_path=filepath)
        logger.info("Adding to memory...")
        mem.add(document_string=f_content, metadata={"filename": f})

    params = {"query": topic, "top_n": 20}
    text_result = web_search.run(json.dumps(params))
    pages = text_result.split("$$$$$$$")
    for page in pages:
        page_object = json.loads(page.strip())
        metadata = {"filename": page_object["title"]}
        p_content = f"Body={page_object['body']}\nHTML={page_object['html']}"
        p_content = p_content.replace("\n\n\n\n", "\n")
        mem.add(document_string=p_content, metadata=metadata)

    logger.info("Finished")

# Parameters

In [None]:
DIR = r"./assets"
TMP_DIRECTORY = "./output"
PROJECT_NAME = "cookbook-1"
NAMESPACE = "my-einv-2"
OLLAMA_HOST = "http://localhost:11434"
OVERWRITE = True
TOPIC = "Malaysia e-Invoice Guidelines, SDK"
SPLIT_TEXT_CONFIG = {"chunk_size": 2048, "stride_rate": 0.8}

## Prompts

In [None]:
PRE_GENERATION_PROMPT = """
You are a grading assistant. Your task is to evaluate the correlation between a user query and a knowledge segment retrieved via semantic search.

Analyze the semantic and contextual relationship between the user query and the knowledge segment.
Provide a score between 0.0 and 1.0 to represent the correlation:
    0.0: Completely unrelated.
    1.0: Perfectly related and fully relevant.
Justify your score with a concise reason.

Input Format:
User Query: "<insert user query here>" Knowledge Segment: "<insert knowledge segment here>"

Output Format (JSON):
{
  "reason": "<string>"
  "score": <float>,
}

Note:
- Return only in JSON format with "reason" and "score" as keys. Nothing else!!!
"""

POST_GENERATION_PROMPT = """
You are a grounding validator. Your task is to evaluate the grounding of an LLM response based on the provided context.

Compare the LLM response against the context to determine if the response is factually supported and aligns with the given information.
Provide a score between 0.0 and 1.0:
    0.0: Completely hallucinated or unsupported by the context.
    1.0: Fully grounded in the context and factually accurate.
Justify your score with a concise explanation that highlights specific aspects of the response and context.

Input Format:
LLM Response: "<insert LLM response here>" Context: "<insert context here>"

Output Format (JSON):
{
  "reason": "<string>"
  "score": <float>,
}

Note:
- Return only in JSON format with "reason" and "score" as keys. Nothing else!!!
"""

IMAGE_INTERPRETOR_PROMPT = """
You are an image interpreter. Your task is to analyze and describe the provided image, whether it is a photograph or a diagram. Your description should comprehensively include:
- Image Type: State whether the image is a photo, diagram, chart, or another type.
- Foreground Elements: Identify and describe the primary objects, people, subjects, or components in focus.
- Background Elements: For photos, describe the setting or environment. For diagrams, describe any supporting elements like labels, annotations, or grids.
- Vibe and Atmosphere: If the image is a photo, explain the overall mood, tone, or emotional impression. If it is a diagram, describe its purpose or functional intent (e.g., instructional, explanatory).
- Interactions: For photos, highlight actions, movements, or dynamics between elements. For diagrams, explain how the components relate, interact, or connect logically.
- Relationships: Explain the connections or contextual significance between elements (e.g., people interacting in photos, or flow and dependencies in diagrams).

Output Example (Photo): "The image shows a bustling city street in the foreground, with people crossing a zebra crossing. The background includes tall skyscrapers under a clear blue sky. The vibe is energetic and lively. People are interacting by talking and walking together, suggesting a morning rush. The relation between the elements shows a harmonious urban scene with people and vehicles coexisting."

Output Example (Diagram): "The image is a flowchart showing a process with five connected nodes. The foreground features circles and arrows labeled with text. The background is plain white with no decorative elements. The purpose is instructional, conveying the sequence of steps in a workflow. Interactions are depicted through arrows showing directional flow, and relationships highlight dependencies between nodes."
"""

## Build Knowledge Base

In [None]:
vdb = chromadb.Client(
    settings=chromadb.Settings(
        is_persistent=True, persist_directory=f"{TMP_DIRECTORY}/storage/chroma"
    )
)

local_encoder = OllamaEncoder(
    connection_string=OLLAMA_HOST,
    model_name=OllamaEncoder.SUPPORTED_MODELS[0]["name"],
)
chroma_memory = ChromaMemory(
    vdb=vdb,
    encoder=local_encoder,
    split_text_config=SPLIT_TEXT_CONFIG,
    namespace=NAMESPACE,
    overwrite=OVERWRITE,
)
print(f"chroma_memory: {chroma_memory}")
i2t = Image_to_Text(
    connection_string=OLLAMA_HOST,
    system_prompt=IMAGE_INTERPRETOR_PROMPT,
    config=ChatCompletionConfig(name="llava:7b", temperature=0.3, max_tokens=1024),
    tools=None,
)
pdf_loader = PDFLoader(text_only=False, tmp_directory=TMP_DIRECTORY, core=i2t)
ddg = DDGSearch(region="my", safe_search="on", pause_second=0.5)

In [None]:
build_knowledge_base(chroma_memory, pdf_loader, DIR, ddg, TOPIC)

In [None]:
agent_config = {
    "core": {
        "connection_string": OLLAMA_HOST,
        "system_prompt": "You are an agent expert in Malaysia e-Invoice implementation.",
        "config": ChatCompletionConfig(
            name="qwen2.5:7b", max_tokens=128_000, temperature=0.5
        ),
        "tools": [ddg],
    },
    "pre-filter": {
        "connection_string": OLLAMA_HOST,
        "system_prompt": PRE_GENERATION_PROMPT,
        "config": ChatCompletionConfig(
            name="llama3.2:3b", max_tokens=512, temperature=0.3
        ),
        "threshold": 0.4,
    },
    "post-filter": {
        "connection_string": OLLAMA_HOST,
        "system_prompt": POST_GENERATION_PROMPT,
        "config": ChatCompletionConfig(
            name="llama3.2:3b", max_tokens=512, temperature=0.3
        ),
        "threshold": 0.4,
    },
}

agent = EInvoiceAgent(
    short_memory=ShortTermMemory(max_entry=50),
    vector_memory=chroma_memory,
    config=agent_config,
)

# $ \ * + * /

In [None]:
with open("{TMP_DIRECTORY}/{PROJECT_NAME}-progress.md", "w", encoding="utf-8") as progress:
    with open(f"./{PROJECT_NAME}-questions.txt", "r", encoding="utf-8") as reader:
        questions = reader.readlines()
        for qi, question in enumerate(questions, start=1):
            bot_answer = agent.ask(query=question)
            progress.write(f"[{qi}] Question: {question}\nAnswer: {bot_answer}\n")