In [24]:
%%writefile utils.py

import os
import tempfile
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders.sitemap import SitemapLoader
from langchain_community.vectorstores import SKLearnVectorStore
from langchain_openai import OpenAIEmbeddings

RAG_PROMPT = """You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the latest question in the conversation.
If you don't know the answer, just say that you don't know.
The pre-existing conversation may provide important context to the question.
Use three sentences maximum and keep the answer concise.

Conversation: {conversation}
Context: {context}
Question: {question}
Answer:"""

def get_vector_db_retriever():
    persist_path = os.path.join(tempfile.gettempdir(), "union.parquet")
    embd = OpenAIEmbeddings()

    # If vector store exists, then load it
    if os.path.exists(persist_path):
        vectorstore = SKLearnVectorStore(
            embedding=embd,
            persist_path=persist_path,
            serializer="parquet"
        )
        return vectorstore.as_retriever(lambda_mult=0)

    # Otherwise, index LangSmith documents and create new vector store
    ls_docs_sitemap_loader = SitemapLoader(web_path="https://docs.smith.langchain.com/sitemap.xml", continue_on_failure=True)
    ls_docs = ls_docs_sitemap_loader.load()

    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=500, chunk_overlap=0
    )
    doc_splits = text_splitter.split_documents(ls_docs)

    vectorstore = SKLearnVectorStore.from_documents(
        documents=doc_splits,
        embedding=embd,
        persist_path=persist_path,
        serializer="parquet"
    )
    vectorstore.persist()
    return vectorstore.as_retriever(lambda_mult=0)


Writing utils.py


In [None]:
import os

os.environ["LANGSMITH_API_KEY"] = "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"
os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_PROJECT"] = "langsmith-academy"

In [None]:
!pip install langgraph langgraph-sdk langgraph-checkpoint-sqlite "langsmith>=0.2.0" langchain-community langchain-core langchain-openai notebook python-dotenv lxml scikit-learn pandas pyarrow utils


In [30]:
from openai import OpenAI
from typing import List
import nest_asyncio
from utils import get_vector_db_retriever
from langsmith.run_helpers import traceable

MODEL_PROVIDER = "openai"
MODEL_NAME = "gpt-4o-mini"
APP_VERSION = 1.0
RAG_SYSTEM_PROMPT = """You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the latest question in the conversation.
If you don't know the answer, just say that you don't know.
Use three sentences maximum and keep the answer concise.
"""

openai_client = OpenAI()
nest_asyncio.apply()
retriever = get_vector_db_retriever()

@traceable
def retrieve_documents(question: str, langsmith_extra=None):
    return retriever.invoke(question)


@traceable
def generate_response(question: str, documents, langsmith_extra=None):
    formatted_docs = "\n\n".join(doc.page_content for doc in documents)
    messages = [
        {
            "role": "system",
            "content": RAG_SYSTEM_PROMPT
        },
        {
            "role": "user",
            "content": f"Context: {formatted_docs} \n\n Question: {question}"
        }
    ]
    return call_openai(messages)


@traceable
def call_openai(
    messages: List[dict], model: str = MODEL_NAME, temperature: float = 0.0, langsmith_extra=None
) -> str:
    return openai_client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=temperature,
    )

@traceable
def langsmith_rag(question: str, langsmith_extra=None):
    documents = retrieve_documents(question, langsmith_extra=langsmith_extra)
    response = generate_response(question, documents, langsmith_extra=langsmith_extra)
    return response.choices[0].message.content

In [28]:
question = "How do I add Metadata to a Run with @traceable?"
ai_answer = langsmith_rag(question)
print(ai_answer)

You can add metadata to a run by including a metadata key named `ls_run_name` in the `experimental_telemetry` object when calling the `generateText` function. For example, you can structure your call like this: 

```javascript
await generateText({
  model: openai("gpt-4.1-nano"),
  prompt: "Your prompt here.",
  experimental_telemetry: {
    isEnabled: true,
    metadata: { userId: "123", language: "english" },
  },
});
```

This metadata will be visible in your LangSmith dashboard and can be used for filtering and searching traces.


In [31]:
question = "How do I add metadata at runtime?"
ai_answer = langsmith_rag(question, langsmith_extra={"metadata": {"runtime_metadata": "foo"}})
print(ai_answer)

To add metadata at runtime, you can attach it when you run an experiment in the SDK. This involves specifying the metadata related to your experiment, such as the model used or other relevant identifiers. Ensure that the same metadata is also attached to the trace for consistent filtering and analysis later.


# New section