In [1]:
from dotenv import load_dotenv
load_dotenv( override=True)

True

In [2]:
import os
import tempfile
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders.sitemap import SitemapLoader
from langchain_community.vectorstores import SKLearnVectorStore
from langchain_openai import OpenAIEmbeddings
from langsmith import traceable
from openai import OpenAI
from typing import List
import nest_asyncio

# TODO: Configure this model!
MODEL_NAME = "gpt-3.5-turbo"
MODEL_PROVIDER = "openai"
APP_VERSION = 1.0
RAG_SYSTEM_PROMPT = """You are an assistant for question-answering tasks. 
Use the following pieces of retrieved context to answer the latest question in the conversation. 
If you don't know the answer, just say that you don't know. 
Use three sentences maximum and keep the answer concise.
"""

openai_client = OpenAI()

def get_vector_db_retriever():
    persist_path = os.path.join(tempfile.gettempdir(), "union.parquet")
    embd = OpenAIEmbeddings()

    # If vector store exists, then load it
    if os.path.exists(persist_path):
        vectorstore = SKLearnVectorStore(
            embedding=embd,
            persist_path=persist_path,
            serializer="parquet"
        )
        return vectorstore.as_retriever(lambda_mult=0)
     # Otherwise, index LangSmith documents and create new vector store
    ls_docs_sitemap_loader = SitemapLoader(web_path="https://docs.smith.langchain.com/sitemap.xml", continue_on_failure=True)
    ls_docs = ls_docs_sitemap_loader.load()

    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=500, chunk_overlap=0
    )
    doc_splits = text_splitter.split_documents(ls_docs)

    vectorstore = SKLearnVectorStore.from_documents(
        documents=doc_splits,
        embedding=embd,
        persist_path=persist_path,
        serializer="parquet"
    )
    vectorstore.persist()
    return vectorstore.as_retriever(lambda_mult=0)

nest_asyncio.apply()
retriever = get_vector_db_retriever()

"""
retrieve_documents
- Returns documents fetched from a vectorstore based on the user's question
"""
@traceable(run_type="chain")
def retrieve_documents(question: str):
    return retriever.invoke(question)

"""
generate_response
- Calls `call_openai` to generate a model response after formatting inputs
"""
@traceable(run_type="chain")
def generate_response(question: str, documents):
    formatted_docs = "\n\n".join(doc.page_content for doc in documents)
    messages = [
        {
            "role": "system",
            "content": RAG_SYSTEM_PROMPT
        },
        {
            "role": "user",
            "content": f"Context: {formatted_docs} \n\n Question: {question}"
        }
    ]
    return call_openai(messages)

"""
call_openai
- Returns the chat completion output from OpenAI
"""
@traceable(
    run_type="llm",
    metadata={
        "ls_provider": MODEL_PROVIDER,
        "ls_model_name": MODEL_NAME
    }
)
def call_openai(messages: List[dict]) -> str:
    return openai_client.chat.completions.create(
        model=MODEL_NAME,
        messages=messages,
    )

"""
langsmith_rag
- Calls `retrieve_documents` to fetch documents
- Calls `generate_response` to generate a response based on the fetched documents
- Returns the model response
"""
@traceable(run_type="chain")
def langsmith_rag(question: str):
    documents = retrieve_documents(question)
    response = generate_response(question, documents)
    return response.choices[0].message.content

USER_AGENT environment variable not set, consider setting it to identify your requests.
  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from langsmith import evaluate, Client

client = Client()
dataset_name = "RAG Application Golden Dataset"

def is_concise_enough(reference_outputs: dict, outputs: dict) -> dict:
    score = len(outputs["output"]) < 1.5 * len(reference_outputs["output"])
    return {"key": "is_concise", "score": int(score)}

def target_function(inputs: dict):
    return langsmith_rag(inputs["question"])

evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="gpt-4o"
)

View the evaluation results for experiment: 'gpt-4o-14424044' at:
https://smith.langchain.com/o/b3cf18a4-4e38-4c04-bff3-e0df17ce5621/datasets/d7ceceb9-90d2-4db6-b6fb-0de6687d9b74/compare?selectedSessions=0f3cf386-3602-4f6b-97fd-70757a70ac44




20it [00:37,  1.88s/it]


Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,Who should I contact for admission-related que...,I don't have information on admission-related ...,,You can contact the university’s admission off...,0,1.857252,0986f241-604d-47a1-ae9c-c71984dc67f0,83c78b6f-cddf-4379-a8b1-752f788e26ea
1,When do college admissions usually start?,I don't have the specific information about wh...,,Most college admissions in India start between...,1,1.975113,13723608-b863-4919-8e1e-6e952ed9bd2d,98999364-351d-43b1-a21f-f8aa708d34cb
2,What should I do after getting selected?,"After getting selected, you should label examp...",,"Once selected, you need to confirm your admiss...",0,2.003205,173b7232-3c61-423a-b545-c5487a283486,70ff69a9-e5e8-4366-9345-8328aa618f13
3,How can I check my admission status?,I don't have information about how to check yo...,,"After applying, you can log into the universit...",1,1.121308,1a107389-f744-4862-b67a-7817163ffa12,5e8abe60-b108-4c91-adb6-b8b229feb712
4,Can international students apply for admission?,"I'm sorry, I don't know the answer to that que...",,"Yes, most universities have a separate process...",1,2.143642,3822a362-4d1b-4737-bea0-099f6d68b637,44a76419-c8bd-4cf6-a7c0-8e0e51137511
5,What happens if I miss the application deadline?,"If you miss the application deadline, typicall...",,"If you miss the deadline, you may have to wait...",0,2.353755,4aedaa19-b6da-4c70-b5f5-88a4b5b428de,21430821-4ce2-41c9-8065-babe8ec4ee94
6,Is there an entrance exam for admission?,I don't know.,,"Some colleges require entrance exams, while ot...",1,1.424873,861bbbf4-b757-4ce1-bf5c-90a377f5fce7,74e940f3-f346-435c-ba92-805ca6bf113a
7,Can I apply to multiple colleges at once?,I don't have information related to applying t...,,"Yes, you can apply to multiple colleges as lon...",1,1.665958,a8ecf3e2-14b0-4b2d-a6bc-9a9b52643239,bb8c9873-3df2-439a-9ec5-0b9e93af5cdb
8,What documents are needed for college admission?,I don't know. Would you like me to help you wi...,,"Typically, you need your high school transcrip...",1,1.72604,aa473cdc-c45c-496f-95df-164d4d147a75,8faeaa1e-e8cb-417c-8e8e-8672e809eea4
9,How can I apply for college admission?,I don't have enough information to help you wi...,,You can apply for college admission through th...,1,1.316335,d86afc5d-fee3-44f2-bf16-3150426969c8,afd114a5-3db0-4452-8a33-65d3a9373db1


In [4]:
from langsmith import evaluate, Client
from langsmith.schemas import Example, Run

def target_function(inputs: dict):
    return langsmith_rag(inputs["question"])

evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="gpt-3.5-turbo"
)

View the evaluation results for experiment: 'gpt-3.5-turbo-6de0c9fb' at:
https://smith.langchain.com/o/b3cf18a4-4e38-4c04-bff3-e0df17ce5621/datasets/d7ceceb9-90d2-4db6-b6fb-0de6687d9b74/compare?selectedSessions=771d1957-c1fe-41a3-ad30-e8172d6445ed




20it [00:33,  1.67s/it]


Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,Who should I contact for admission-related que...,I don't have information on admission-related ...,,You can contact the university’s admission off...,1,1.243504,0986f241-604d-47a1-ae9c-c71984dc67f0,a865583d-47e5-43bc-a697-22655ae50fea
1,When do college admissions usually start?,I don't have the specific information regardin...,,Most college admissions in India start between...,1,1.218091,13723608-b863-4919-8e1e-6e952ed9bd2d,4d1bf710-0595-4648-b327-2212380d56f7
2,What should I do after getting selected?,"After getting selected, you should create and ...",,"Once selected, you need to confirm your admiss...",0,1.494771,173b7232-3c61-423a-b545-c5487a283486,daaed3bc-cd01-456b-a362-86365fb10103
3,How can I check my admission status?,I don't have that information.,,"After applying, you can log into the universit...",1,1.574337,1a107389-f744-4862-b67a-7817163ffa12,abc58836-eb8d-4295-8ea4-5619f26a0218
4,Can international students apply for admission?,I don't know.,,"Yes, most universities have a separate process...",1,1.73118,3822a362-4d1b-4737-bea0-099f6d68b637,5dd7e352-c233-4d85-b356-1cd04ada41c5
5,What happens if I miss the application deadline?,"If you miss the application deadline, you may ...",,"If you miss the deadline, you may have to wait...",0,2.248495,4aedaa19-b6da-4c70-b5f5-88a4b5b428de,ebd3f09b-73ab-408c-b5d4-9109146fa6b5
6,Is there an entrance exam for admission?,I don't know.,,"Some colleges require entrance exams, while ot...",1,1.224374,861bbbf4-b757-4ce1-bf5c-90a377f5fce7,ce328965-4308-4d92-be30-e37fb865bfef
7,Can I apply to multiple colleges at once?,"I'm sorry, I couldn't find relevant informatio...",,"Yes, you can apply to multiple colleges as lon...",1,1.335646,a8ecf3e2-14b0-4b2d-a6bc-9a9b52643239,23c3ee8c-aed7-4d7e-86bd-a6c48c7ef0dd
8,What documents are needed for college admission?,I don't have the information to answer that qu...,,"Typically, you need your high school transcrip...",1,1.218674,aa473cdc-c45c-496f-95df-164d4d147a75,e60da25c-93a7-4b22-b55e-fb096be55390
9,How can I apply for college admission?,I don't have information on applying for colle...,,You can apply for college admission through th...,1,1.254462,d86afc5d-fee3-44f2-bf16-3150426969c8,ae44a6d3-6f16-4b58-803c-41cec465e492


In [5]:
evaluate(
    target_function,
    data=client.list_examples(dataset_name=dataset_name),
    evaluators=[is_concise_enough],
    experiment_prefix="initial dataset version"
)

View the evaluation results for experiment: 'initial dataset version-1cdf8459' at:
https://smith.langchain.com/o/b3cf18a4-4e38-4c04-bff3-e0df17ce5621/datasets/d7ceceb9-90d2-4db6-b6fb-0de6687d9b74/compare?selectedSessions=b647da97-afc5-4aa1-91c0-61083f62a059




20it [00:36,  1.85s/it]


Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,Who should I contact for admission-related que...,I don't have information about who to contact ...,,You can contact the university’s admission off...,1,1.420736,0986f241-604d-47a1-ae9c-c71984dc67f0,03b04db6-cfaa-40b6-953c-4f42f6d82ce2
1,When do college admissions usually start?,I don't have the specific information regardin...,,Most college admissions in India start between...,1,1.236238,13723608-b863-4919-8e1e-6e952ed9bd2d,00d4b08e-50f0-44bb-b1c4-7c61e58648f5
2,What should I do after getting selected?,After selecting examples in your dataset and c...,,"Once selected, you need to confirm your admiss...",0,1.747048,173b7232-3c61-423a-b545-c5487a283486,cdab68e6-e67a-4416-8776-fa46dc12545a
3,How can I check my admission status?,"I'm sorry, I don't have the information on how...",,"After applying, you can log into the universit...",1,1.316911,1a107389-f744-4862-b67a-7817163ffa12,620457c5-102d-4ab0-83de-174cd3d045fb
4,Can international students apply for admission?,I don't know the answer to your question as it...,,"Yes, most universities have a separate process...",1,1.737107,3822a362-4d1b-4737-bea0-099f6d68b637,a59873d0-ec84-46d2-9ff2-16e65a1f6afb
5,What happens if I miss the application deadline?,"If you miss the application deadline, you typi...",,"If you miss the deadline, you may have to wait...",0,1.747687,4aedaa19-b6da-4c70-b5f5-88a4b5b428de,ab33b297-c2c7-4592-960f-b437fa130063
6,Is there an entrance exam for admission?,I don't know about an entrance exam for admiss...,,"Some colleges require entrance exams, while ot...",1,1.224561,861bbbf4-b757-4ce1-bf5c-90a377f5fce7,14fdf946-16c1-4d07-815b-daf72c1274e0
7,Can I apply to multiple colleges at once?,I don't have the information to answer that qu...,,"Yes, you can apply to multiple colleges as lon...",1,1.332357,a8ecf3e2-14b0-4b2d-a6bc-9a9b52643239,25a7714c-51df-4921-829b-6b079b70b1f3
8,What documents are needed for college admission?,I don't know the answer to that.,,"Typically, you need your high school transcrip...",1,1.209252,aa473cdc-c45c-496f-95df-164d4d147a75,ba63ac9b-6afe-45f5-9bb0-22c29fc9d0bf
9,How can I apply for college admission?,I don't have the information on how to apply f...,,You can apply for college admission through th...,1,1.227696,d86afc5d-fee3-44f2-bf16-3150426969c8,c0b93353-6c69-470b-8a50-d485376730e0


In [6]:
evaluate(
    target_function,
    data=client.list_examples(
        dataset_name=dataset_name, 
        example_ids=[   # We pass in a specific list of example_ids
            # TODO: You will need to paste in your own example ids for this to work!
            "572163c6-51cc-48d1-932a-14b0d0fbf189",
            "16038691-db5c-47cb-8918-33e9a6dc30c4"
        ]
    ),
    evaluators=[is_concise_enough],
    experiment_prefix="two specific example ids"
)

LangSmithNotFoundError: Resource not found for /examples. HTTPError('404 Client Error: Not Found for url: https://api.smith.langchain.com/examples?offset=0&id=572163c6-51cc-48d1-932a-14b0d0fbf189&id=16038691-db5c-47cb-8918-33e9a6dc30c4&inline_s3_urls=True&limit=100&dataset=d7ceceb9-90d2-4db6-b6fb-0de6687d9b74', '{"detail":"Examples not found"}')

In [None]:
evaluate(
    target_function,
    data=client.list_examples(
        dataset_name=dataset_name, 
        example_ids=[   # We pass in a specific list of example_ids
            # TODO: You will need to paste in your own example ids for this to work!
            "fe06737f-ae9a-440b-aee1-bfb1441dad11",
            "fc6c0c19-f8e7-4a8f-8e9b-8646bfe92cf4"
        ]
    ),
    evaluators=[is_concise_enough],
    experiment_prefix="two specific example ids"
)

In [None]:
evaluate(
    target_function,
    data=client.list_examples(
        dataset_name=dataset_name, 
        example_ids=[   # We pass in a specific list of example_ids
            # TODO: You will need to paste in your own example ids for this to work!
            "b894201a-78cb-47b1-b84d-010b7a4190d3",
            "b6d78f73-b5dd-4fe7-a254-bc097fdea9e1"
        ]
    ),
    evaluators=[is_concise_enough],
    experiment_prefix="two specific example ids"
)

In [None]:
evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="two repetitions",
    num_repetitions=2   # This field defaults to 1
)

In [None]:
evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="concurrency",
    max_concurrency=3,  # This defaults to None, so this is an improvement!
)

In [None]:
evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="metadata added",
    metadata={  # We can pass custom metadata for the experiment, such as the model name
        "model_name": MODEL_NAME 
    }
)