# monitoring

In [21]:
from dotenv import load_dotenv
import os

load_dotenv()

True

In [22]:
from langfuse import get_client
 
langfuse = get_client()
 
# Verify connection
if langfuse.auth_check():
    print("Langfuse client is authenticated and ready!")
else:
    print("Authentication failed. Please check your credentials and host.")

Langfuse client is authenticated and ready!


In [23]:
from openinference.instrumentation.smolagents import SmolagentsInstrumentor
 
SmolagentsInstrumentor().instrument()

# start

## visualization adjust

In [2]:
from tqdm.notebook import tqdm
import pandas as pd
from typing import Optional, List, Tuple
from datasets import Dataset
import matplotlib.pyplot as plt

pd.set_option("display.max_colwidth", None)  # This will be helpful when visualizing retriever outputs

## PDF Extraction

### Normal OCR with PyPDFLoader

In [3]:
from langchain_community.document_loaders import PyPDFLoader

file_path = "../assets/guidebooks/guidebook.pdf"
loader = PyPDFLoader(file_path)

In [4]:
docs = loader.load()

In [5]:
from langchain.docstore.document import Document as LangchainDocument

RAW_KNOWLEDGE_BASE = [
    LangchainDocument(page_content=doc.page_content, metadata={"source": doc.metadata["page_label"]}) for doc in tqdm(docs)
]

  0%|          | 0/370 [00:00<?, ?it/s]

## Chunking

See ```https://github.com/FullStackRetrieval-com/RetrievalTutorials/blob/main/tutorials/LevelsOfTextSplitting/5_Levels_Of_Text_Splitting.ipynb ``` for other chunking methods

### RecursiveCharacterTextSplitter

In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# We use a hierarchical list of separators specifically tailored for splitting Markdown documents
# This list is taken from LangChain's MarkdownTextSplitter class
MARKDOWN_SEPARATORS = [
    "\n#{1,6} ",
    "```\n",
    "\n\\*\\*\\*+\n",
    "\n---+\n",
    "\n___+\n",
    "\n\n",
    "\n",
    " ",
    "",
]

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # The maximum number of characters in a chunk: we selected this value arbitrarily
    chunk_overlap=100,  # The number of characters to overlap between chunks
    add_start_index=True,  # If `True`, includes chunk's start index in metadata
    strip_whitespace=True,  # If `True`, strips whitespace from the start and end of every document
    separators=MARKDOWN_SEPARATORS,
)

docs_processed = []
for doc in RAW_KNOWLEDGE_BASE:
    docs_processed += text_splitter.split_documents([doc])

SentenceTransformer model could be changed

In [7]:
# from transformers import AutoTokenizer

# EMBEDDING_MODEL_NAME = "BAAI/bge-en-icl"

# tokenizer = AutoTokenizer.from_pretrained(EMBEDDING_MODEL_NAME)
# lengths = [len(tokenizer.encode(doc.page_content)) for doc in tqdm(docs_processed)]

# # Plot the distribution of document lengths, counted as the number of tokens
# fig = pd.Series(lengths).hist()
# plt.title("Distribution of document lengths in the knowledge base (in count of tokens)")
# plt.show()

Document chunk is forced to be 512, probably there's a better way to do this

## Vector Database

### FAISS

use modal

## Use OpenAI

In [7]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [8]:
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

# index = faiss.IndexFlatL2(len(embeddings.embed_query("hello world")))

# vector_store = FAISS(
#     embedding_function=embeddings,
#     index=index,
#     docstore=InMemoryDocstore(),
#     index_to_docstore_id={},
# )

In [None]:
# vector_store = FAISS.from_documents(
#     documents=RAW_KNOWLEDGE_BASE,
#     embedding=embeddings,
# )

Save to local

In [None]:
# vector_store.save_local("faiss_index")

In [None]:
vector_store = FAISS.load_local(
    "faiss_index", embeddings, allow_dangerous_deserialization=True
)

In [10]:
retriever = vector_store.as_retriever(search_type="mmr")

Retriever tool

In [None]:
from smolagents import Tool

class PMBOKRetrieverTool(Tool):
    name = "PMBOKRetriever"
    description = "Uses semantic search to retrieve the parts of Project Management Body of Knowledge (PMBOK) documentation that could be most relevant to answer your query."
    inputs = {
        "query": {
            "type": "string",
            "description": "The query to perform. This should be semantically close to your target documents. Use the affirmative form rather than a question.",
        }
    }
    output_type = "string"

    def __init__(self, vector_store, k=4, **kwargs):
        super().__init__(**kwargs)
        # Initialize the retriever with our processed documents
        self.retriever = vector_store.as_retriever(
            search_type="mmr",
            k=k
        )

    def forward(self, query: str) -> str:
        """Execute the retrieval based on the provided query."""
        assert isinstance(query, str), "Your search query must be a string"

        # Retrieve relevant documents
        docs = self.retriever.invoke(query)

        # Format the retrieved documents for readability
        return "\nRetrieved documents:\n" + "".join(
            [
                f"\n\n===== Document {str(i)} =====\n" + doc.page_content
                for i, doc in enumerate(docs)
            ]
        )

# Initialize our retriever tool with the processed documents
retriever_tool = PMBOKRetrieverTool(vector_store, k=4)

In [31]:
from smolagents import InferenceClientModel, CodeAgent

# Initialize the agent with our retriever tool
agent = CodeAgent(
    tools=[retriever_tool],  # List of tools available to the agent
    model=InferenceClientModel(provider="nebius"),  # Default model "Qwen/Qwen2.5-Coder-32B-Instruct"
    max_steps=4,  # Limit the number of reasoning steps
    verbosity_level=2,  # Show detailed agent reasoning
)

# To use a specific model, you can specify it like this:
# model=InferenceClientModel(model_id="meta-llama/Llama-3.3-70B-Instruct")

## Test 1 - 26/06/2025

commisionning does not exist in pmbok

nanya pendapat skrg

add memory now

it seems that the model does not know the sequence of a project.

jangan kaku, kayak ngomong ke orang. end productnya mau jadi kayak manusia

chat memory: ask the user for follow up tasks "would u like a tea with that?"

needs another agent: project maker