# Prompt Engineering Lifecycle

### Setup

In [8]:
# Or you can use a .env file
from dotenv import load_dotenv
load_dotenv(dotenv_path="/home/jai/study_material/sem7/llm/code_work/Jaideep_Singh_2210110675_MAT496/.env", override=True)

True

### Log a trace

In [12]:
from app import langsmith_rag
question = "How do I set up tracing to LangSmith with @traceable?"
langsmith_rag(question)

Fetching pages: 100%|##########| 197/197 [01:16<00:00,  2.56it/s]


"To set up tracing to LangSmith using the @traceable decorator, first ensure the LANGSMITH_TRACING environment variable is set to 'true' and the LANGSMITH_API_KEY is configured with your API key. Then, simply decorate your desired function with @traceable to log traces. Additionally, remember to use the await keyword when calling the wrapped sync function to ensure traces are logged correctly."

### Create a Dataset

Let's create a dataset to evaluate this particular step of our application

In [13]:
from langsmith import Client

# Custom example dataset for Copilot tweak
copilot_examples = [
    (
        "What is prompt engineering?",
        "Prompt engineering is the process of designing and refining prompts to effectively guide large language models (LLMs) to produce desired outputs.",
        "Prompt engineering involves crafting input prompts to optimize LLM responses for specific tasks."
    ),
    (
        "How do you evaluate a prompt's effectiveness?",
        "A prompt's effectiveness can be evaluated by measuring the relevance, accuracy, and consistency of the LLM's responses to a set of test cases.",
        "You can evaluate prompt effectiveness by testing it on various inputs and analyzing the quality of the outputs."
    ),
    (
        "Why is context important in prompt engineering?",
        "Context helps the LLM understand the user's intent and generate more accurate and relevant responses.",
        "Providing context in prompts leads to better and more targeted LLM outputs."
    )
]

client = Client()
dataset_name = "Copilot Custom Dataset"

# Create custom dataset
dataset = client.create_dataset(
    dataset_name=dataset_name, description="A custom dataset for prompt engineering lifecycle experiments."
)

# Prepare inputs and outputs
inputs = [{"question": q, "context": c} for q, c, _ in copilot_examples]
outputs = [{"output": o} for _, _, o in copilot_examples]

# Create examples in the dataset
client.create_examples(
    inputs=inputs,
    outputs=outputs,
    dataset_id=dataset.id,
)
print(f"[Info] Created custom dataset '{dataset_name}' with {len(copilot_examples)} examples.")

[Info] Created custom dataset 'Copilot Custom Dataset' with 3 examples.


### Update our Application to use Prompt Hub

We're going to pretty much define the same RAG application as before - with one crucial improvement.

Instead of pulling our `RAG_PROMPT` from utils.py, we're going to connect to the Prompt Hub in LangSmith.

Let's add the code snippet that will pull down our prompt that we just iterated on!

In [16]:
from langchain import hub
prompt=hub.pull("rag_for_copilot")

In [17]:
import os
import tempfile
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders.sitemap import SitemapLoader
from langchain_community.vectorstores import SKLearnVectorStore
from langchain_openai import OpenAIEmbeddings
from langsmith import traceable
from langsmith.client import convert_prompt_to_openai_format
from openai import OpenAI
from typing import List
import nest_asyncio

MODEL_NAME = "gpt-4o-mini"
MODEL_PROVIDER = "openai"
APP_VERSION = 1.0


openai_client = OpenAI()

def get_vector_db_retriever():
    persist_path = os.path.join(tempfile.gettempdir(), "union.parquet")
    embd = OpenAIEmbeddings()

    # If vector store exists, then load it
    if os.path.exists(persist_path):
        vectorstore = SKLearnVectorStore(
            embedding=embd,
            persist_path=persist_path,
            serializer="parquet"
        )
        return vectorstore.as_retriever(lambda_mult=0)

    # Otherwise, index LangSmith documents and create new vector store
    ls_docs_sitemap_loader = SitemapLoader(web_path="https://docs.smith.langchain.com/sitemap.xml", continue_on_failure=True)
    ls_docs = ls_docs_sitemap_loader.load()

    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=500, chunk_overlap=0
    )
    doc_splits = text_splitter.split_documents(ls_docs)

    vectorstore = SKLearnVectorStore.from_documents(
        documents=doc_splits,
        embedding=embd,
        persist_path=persist_path,
        serializer="parquet"
    )
    vectorstore.persist()
    return vectorstore.as_retriever(lambda_mult=0)

nest_asyncio.apply()
retriever = get_vector_db_retriever()

"""
retrieve_documents
- Returns documents fetched from a vectorstore based on the user's question
"""
@traceable(run_type="chain")
def retrieve_documents(question: str):
    return retriever.invoke(question)

"""
generate_response
- Calls `call_openai` to generate a model response after formatting inputs
"""
@traceable(run_type="chain")
def generate_response(question: str, documents):
    formatted_docs = "\n\n".join(doc.page_content for doc in documents)
    # TODO: Let's use our prompt pulled from Prompt Hub instead of manually formatting here!

    formatted_prompt = prompt.invoke({"context":formatted_docs, "question": question})
    messages = convert_prompt_to_openai_format(formatted_prompt)["messages"]
    return call_openai(messages)

"""
call_openai
- Returns the chat completion output from OpenAI
"""
@traceable(
    run_type="llm",
    metadata={
        "ls_provider": MODEL_PROVIDER,
        "ls_model_name": MODEL_NAME
    }
)
def call_openai(messages: List[dict]) -> str:
    return openai_client.chat.completions.create(
        model=MODEL_NAME,
        messages=messages,
    )

"""
langsmith_rag
- Calls `retrieve_documents` to fetch documents
- Calls `generate_response` to generate a response based on the fetched documents
- Returns the model response
"""
@traceable(run_type="chain")
def langsmith_rag(question: str):
    documents = retrieve_documents(question)
    response = generate_response(question, documents)
    return response.choices[0].message.content


In [18]:
question = "How to learn Prompt Engineering?"
langsmith_rag(question)

"To learn prompt engineering, start by understanding the key concepts and practices outlined in resources like LangSmith's documentation. Practice by crafting, testing, and refining prompts using tools or SDKs, and consider working through a quickstart tutorial for hands-on experience. It may also be valuable to collaborate with domain experts or product managers to refine your approach."