In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
from app import langsmith_rag

question = "How do I set up tracing to LangSmith with @traceable?"
langsmith_rag(question)

"To set up tracing to LangSmith with the @traceable decorator in Python, ensure that you set the LANGSMITH_TRACING environment variable to 'true' and the LANGSMITH_API_KEY environment variable to your API key. Then, simply decorate any function you wish to trace with @traceable. Make sure to use the await keyword when calling wrapped sync functions to ensure the trace is logged correctly."

In [6]:
from langsmith import Client

example_dataset = [
    (
        "What is the meaning of life?",
        """For centuries, philosophers and scientists have debated the meaning of life. Some argue there is a universal purpose, while others see meaning as a construct of consciousness and culture. Modern perspectives mix scientific insight with existential reflection, acknowledging the complexity of human experience, connection, and curiosity.""",
        "The meaning of life is not a single answer, but an evolving journey shaped by consciousness, connection, and the pursuit of understanding yourself, others, and the universe."
    ),
    (
        "Does free will truly exist, or is everything predetermined?",
        """The concept of free will versus determinism has challenged thinkers from ancient philosophy to modern neuroscience. Some physical theories imply that every event follows inevitably from prior states, while others emphasize quantum randomness or the unpredictable nature of human consciousness. Contemporary debates weigh biology, environment, and the subjective feeling of choice.""",
        "Free will is both an illusion and a reality; while physical laws and biology provide structure, conscious beings shape outcomes through choices within these boundaries."
    ),
    (
        "Is there objective morality, or is it all subjective?",
        """Morality shapes societies, yet its basis is deeply debated. Objective morality suggests certain ethical truths are universal, independent of belief, while subjectivity argues that values are shaped by individuals, cultures, and history. Empirical studies find overlapping principles across cultures, yet vast differences in application and belief.""",
        "Some moral principles, such as empathy and fairness, emerge consistently across cultures, suggesting a kernel of objective morality, yet personal and cultural perspectives shape much of our ethical framework."
    ),
]



client = Client()
dataset_name = "Deep Questions"

# Create dataset
dataset = client.create_dataset(
    dataset_name=dataset_name, description="Existence, consciousness, and reality"
)

# Prepare inputs and outputs
inputs = [{"question": q, "context": c} for q, c, _ in example_dataset]
outputs = [{"output": o} for _, _, o in example_dataset]

# Create examples in the dataset
client.create_examples(
    inputs=inputs,
    outputs=outputs,
    dataset_id=dataset.id,
)


{'example_ids': ['bfbb4673-9f4c-42df-ad57-8e536cc0f9eb',
  '85a9a5dd-b3b3-4fd7-82d0-117ecd73f999',
  'f5f47333-3bd5-4d58-9883-d29d971db555'],
 'count': 3}

In [11]:
# Create a LANGSMITH_API_KEY in Settings > API Keys
from langsmith import Client
prompt = client.pull_prompt("deep", include_model=True)

In [15]:
import os
import tempfile
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders.sitemap import SitemapLoader
from langchain_community.vectorstores import SKLearnVectorStore
from langchain_openai import OpenAIEmbeddings
from langsmith import traceable
from langsmith.client import convert_prompt_to_openai_format
from openai import OpenAI
from typing import List
import nest_asyncio

MODEL_NAME = "gpt-4o-mini"
MODEL_PROVIDER = "openai"
APP_VERSION = 1.0

# TODO: Remove this hard-coded prompt and replace it with Prompt Hub

openai_client = OpenAI()

def get_vector_db_retriever():
    persist_path = os.path.join(tempfile.gettempdir(), "union.parquet")
    embd = OpenAIEmbeddings()

    # If vector store exists, then load it
    if os.path.exists(persist_path):
        vectorstore = SKLearnVectorStore(
            embedding=embd,
            persist_path=persist_path,
            serializer="parquet"
        )
        return vectorstore.as_retriever(lambda_mult=0)

    # Otherwise, index LangSmith documents and create new vector store
    ls_docs_sitemap_loader = SitemapLoader(web_path="https://docs.smith.langchain.com/sitemap.xml", continue_on_failure=True)
    ls_docs = ls_docs_sitemap_loader.load()

    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=500, chunk_overlap=0
    )
    doc_splits = text_splitter.split_documents(ls_docs)

    vectorstore = SKLearnVectorStore.from_documents(
        documents=doc_splits,
        embedding=embd,
        persist_path=persist_path,
        serializer="parquet"
    )
    vectorstore.persist()
    return vectorstore.as_retriever(lambda_mult=0)

nest_asyncio.apply()
retriever = get_vector_db_retriever()

"""
retrieve_documents
- Returns documents fetched from a vectorstore based on the user's question
"""
@traceable(run_type="chain")
def retrieve_documents(question: str):
    return retriever.invoke(question)

"""
generate_response
- Calls `call_openai` to generate a model response after formatting inputs
"""
@traceable(run_type="chain")
def generate_response(question: str, documents):
    formatted_docs = "\n\n".join(doc.page_content for doc in documents)
    # TODO: Let's use our prompt pulled from Prompt Hub instead of manually formatting here!
    formatted_prompt = prompt.invoke({"context":formatted_docs, "question": question})
    messages = convert_prompt_to_openai_format([formatted_prompt])["messages"]

    return call_openai(messages)

"""
call_openai
- Returns the chat completion output from OpenAI
"""
@traceable(
    run_type="llm",
    metadata={
        "ls_provider": MODEL_PROVIDER,
        "ls_model_name": MODEL_NAME
    }
)
def call_openai(messages: List[dict]) -> str:
    return openai_client.chat.completions.create(
        model=MODEL_NAME,
        messages=messages,
    )

"""
langsmith_rag
- Calls `retrieve_documents` to fetch documents
- Calls `generate_response` to generate a response based on the fetched documents
- Returns the model response
"""
@traceable(run_type="chain")
def langsmith_rag(question: str):
    documents = retrieve_documents(question)
    response = generate_response(question, documents)
    return response.choices[0].message.content


In [16]:
question = "What is the meaning of life?"
langsmith_rag(question)

'The meaning of life is often seen as the pursuit of purpose and fulfillment, deeply rooted in personal values, relationships, and experiences. It can vary greatly from person to person, influenced by beliefs, culture, and individual circumstances. While some may find meaning through relationships, creativity, or contributions to society, others might seek it in personal growth, exploration, or spirituality. Ultimately, it transcends objective definitions and is centered on individual interpretation and the journey toward self-discovery.'