# Experiments

### Setup

In [1]:
# Or you can use a .env file
from dotenv import load_dotenv
load_dotenv(dotenv_path="../../.env", override=True)

True

Here is the RAG Application that we've been working with throughout this course

In [5]:
import os
import tempfile
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders.sitemap import SitemapLoader
from langchain_community.vectorstores import SKLearnVectorStore
from langchain_openai import OpenAIEmbeddings
from langsmith import traceable
from openai import OpenAI
from typing import List
import nest_asyncio

# TODO: Configure this model!
MODEL_NAME = "gpt-3.5-turbo"
MODEL_PROVIDER = "openai"
APP_VERSION = 1.0
RAG_SYSTEM_PROMPT = """You are an assistant for question-answering tasks. 
Use the following pieces of retrieved context to answer the latest question in the conversation. 
If you don't know the answer, just say that you don't know. 
Use three sentences maximum and keep the answer concise.
"""

openai_client = OpenAI()

def get_vector_db_retriever():
    persist_path = os.path.join(tempfile.gettempdir(), "union.parquet")
    embd = OpenAIEmbeddings()

    # If vector store exists, then load it
    if os.path.exists(persist_path):
        vectorstore = SKLearnVectorStore(
            embedding=embd,
            persist_path=persist_path,
            serializer="parquet"
        )
        return vectorstore.as_retriever(lambda_mult=0)

    # Otherwise, index LangSmith documents and create new vector store
    ls_docs_sitemap_loader = SitemapLoader(web_path="https://docs.smith.langchain.com/sitemap.xml", continue_on_failure=True)
    ls_docs = ls_docs_sitemap_loader.load()

    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=500, chunk_overlap=0
    )
    doc_splits = text_splitter.split_documents(ls_docs)

    vectorstore = SKLearnVectorStore.from_documents(
        documents=doc_splits,
        embedding=embd,
        persist_path=persist_path,
        serializer="parquet"
    )
    vectorstore.persist()
    return vectorstore.as_retriever(lambda_mult=0)

nest_asyncio.apply()
retriever = get_vector_db_retriever()

"""
retrieve_documents
- Returns documents fetched from a vectorstore based on the user's question
"""
@traceable(run_type="chain")
def retrieve_documents(question: str):
    return retriever.invoke(question)

"""
generate_response
- Calls `call_openai` to generate a model response after formatting inputs
"""
@traceable(run_type="chain")
def generate_response(question: str, documents):
    formatted_docs = "\n\n".join(doc.page_content for doc in documents)
    messages = [
        {
            "role": "system",
            "content": RAG_SYSTEM_PROMPT
        },
        {
            "role": "user",
            "content": f"Context: {formatted_docs} \n\n Question: {question}"
        }
    ]
    return call_openai(messages)

"""
call_openai
- Returns the chat completion output from OpenAI
"""
@traceable(
    run_type="llm",
    metadata={
        "ls_provider": MODEL_PROVIDER,
        "ls_model_name": MODEL_NAME
    }
)
def call_openai(messages: List[dict]) -> str:
    return openai_client.chat.completions.create(
        model=MODEL_NAME,
        messages=messages,
    )

"""
langsmith_rag
- Calls `retrieve_documents` to fetch documents
- Calls `generate_response` to generate a response based on the fetched documents
- Returns the model response
"""
@traceable(run_type="chain")
def langsmith_rag(question: str):
    documents = retrieve_documents(question)
    response = generate_response(question, documents)
    return response.choices[0].message.content


### Experiment

Here is a code snippet that should look similar to what you see from the starter code!

There are a few important components here.

1. We have defined an Evaluator
2. We pipe our dataset examples (dict) to the shape of input that our function `langsmith_rag` takes (str) using a target function

In [4]:
from langsmith import evaluate, Client

client = Client()
dataset_name = "TestDB"

def is_concise_enough(reference_outputs: dict, outputs: dict) -> dict:
    score = len(outputs["output"]) < 1.5 * len(reference_outputs["output"])
    return {"key": "is_concise", "score": int(score)}

def target_function(inputs: dict):
    return langsmith_rag(inputs["question"])

evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="gpt-4o"
)

View the evaluation results for experiment: 'gpt-4o-7218f362' at:
https://smith.langchain.com/o/4ffb8661-bc93-48d9-919b-d7dd87377bc1/datasets/eca4f364-f2f9-4360-9679-2ba78775775c/compare?selectedSessions=c97c0d9d-76ec-4fbb-9fd7-79a65c4d6e21




0it [00:00, ?it/s]

Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,What are the benefits of using LangSmith for L...,LangSmith offers seamless integration with Lan...,,The benefits of using LangSmith for LLM develo...,1,3.045672,5b5dc1a5-d646-4752-91a2-41869ff00073,a206e09d-94eb-4900-bf95-d80110450863
1,How can I integrate LangSmith with existing ma...,LangSmith can be integrated with existing mach...,,To integrate LangSmith with existing machine l...,1,3.857788,05dc2df8-c3e7-4a76-919e-5272836ab243,09c07509-22ac-4cea-a5bc-4adb8e13c5df
2,What tools does LangSmith provide for debuggin...,"LangSmith provides tools for tracing, which gi...",,LangSmith offers a variety of tools for debugg...,1,3.076697,d42d9301-cc00-4913-bed2-fd77a99cad8b,2fd08ef8-5d4d-46da-b90c-102fcb4d30a9
3,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used to evaluate agents....",,"Yes, LangSmith can be used to evaluate agents....",1,2.129232,22ca4c4e-d54a-4bb2-8e79-7013a9bb9f9c,183a2983-d782-403a-9af7-5b5bbdd686dd
4,How do I create user feedback with the LangSmi...,To create user feedback with the LangSmith SDK...,,To create user feedback with the LangSmith SDK...,1,2.365204,344eceea-8ee6-40a1-807d-7991d0daffff,5af573b3-0562-4be6-aa79-fc9ccde542eb
5,How do I pass metadata in with @traceable?,"To pass metadata with `@traceable`, you can us...",,You can pass metadata with the @traceable deco...,0,5.23199,41e88b84-6db8-4279-8fac-2dd6712e307c,b37cb3d5-ae16-421e-90a0-3c2ca503170a
6,How can I trace with the @traceable decorator?,To trace with the @traceable decorator in Pyth...,,To trace with the @traceable decorator in Pyth...,1,2.46004,5c231c7a-7243-4dc8-bec3-780d534cd0f3,30509648-d79c-49cc-9307-53801d576121
7,Can LangSmith be used for finetuning and model...,LangSmith is primarily a platform for LLM obse...,,"Yes, LangSmith can be used for fine-tuning and...",1,3.150949,70906009-6d20-4796-bfa8-07017675e478,58e14180-ace6-4dcd-aa5d-e52693ccdc9c
8,Does LangSmith support offline evaluation?,The provided context does not mention support ...,,"Yes, LangSmith supports offline evaluation thr...",1,1.83844,b2ecd2d6-db1a-4010-90ba-68c00f2202e4,8ec9c347-e2eb-475d-83b1-166ef719604a
9,Does LangSmith support online evaluation?,"Yes, LangSmith supports online evaluation. It ...",,"Yes, LangSmith supports online evaluation as a...",1,1.76748,bc7e17ef-d076-4ef3-812d-b1d8f5f6739e,55748595-5239-45f3-8010-0ed22f865fe7


### Modifying your Application

Now, let's change our model to gpt-35-turbo and see how it performs!

Make this change, and then run this code snippet!

In [6]:
from langsmith import evaluate, Client
from langsmith.schemas import Example, Run

def target_function(inputs: dict):
    return langsmith_rag(inputs["question"])

evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="gpt-3.5-turbo"
)

View the evaluation results for experiment: 'gpt-3.5-turbo-eae44d15' at:
https://smith.langchain.com/o/4ffb8661-bc93-48d9-919b-d7dd87377bc1/datasets/eca4f364-f2f9-4360-9679-2ba78775775c/compare?selectedSessions=65d072fa-e555-4555-8368-59f19f43cd8f




0it [00:00, ?it/s]

Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,What are the benefits of using LangSmith for L...,LangSmith offers seamless integration with Lan...,,The benefits of using LangSmith for LLM develo...,1,1.58985,5b5dc1a5-d646-4752-91a2-41869ff00073,52ea82cb-9917-4db7-954c-634a63e17ac9
1,How can I integrate LangSmith with existing ma...,You can integrate LangSmith with your existing...,,To integrate LangSmith with existing machine l...,0,2.072453,05dc2df8-c3e7-4a76-919e-5272836ab243,32dc5a3e-5c5a-4a95-964a-23132441834c
2,What tools does LangSmith provide for debuggin...,LangSmith provides the LangSmith SDK (Python a...,,LangSmith offers a variety of tools for debugg...,1,2.257108,d42d9301-cc00-4913-bed2-fd77a99cad8b,14780b0e-57a7-4716-893e-ff51ecd1c388
3,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used to evaluate agents ...",,"Yes, LangSmith can be used to evaluate agents....",0,1.736093,22ca4c4e-d54a-4bb2-8e79-7013a9bb9f9c,02d543de-101b-4dc1-8a5c-0c2c3f30211f
4,How do I create user feedback with the LangSmi...,To create user feedback with the LangSmith SDK...,,To create user feedback with the LangSmith SDK...,1,2.043456,344eceea-8ee6-40a1-807d-7991d0daffff,14fd5131-f445-4a29-bedb-715bf112c135
5,How do I pass metadata in with @traceable?,To pass metadata with the @traceable decorator...,,You can pass metadata with the @traceable deco...,1,1.599374,41e88b84-6db8-4279-8fac-2dd6712e307c,3ec0f297-c6f4-4dfc-a9ba-6083d0885e70
6,How can I trace with the @traceable decorator?,"To trace with the @traceable decorator, you ne...",,To trace with the @traceable decorator in Pyth...,1,2.079151,5c231c7a-7243-4dc8-bec3-780d534cd0f3,013beea4-94ef-4abe-b1b5-50078165a2e8
7,Can LangSmith be used for finetuning and model...,LangSmith is mainly designed for observability...,,"Yes, LangSmith can be used for fine-tuning and...",0,2.141378,70906009-6d20-4796-bfa8-07017675e478,09e6dc2b-a391-46d6-ba32-9dd4c8f2fd70
8,Does LangSmith support offline evaluation?,LangSmith does not support offline evaluation....,,"Yes, LangSmith supports offline evaluation thr...",1,2.257762,b2ecd2d6-db1a-4010-90ba-68c00f2202e4,e660a37c-c4f8-4929-bbed-30639617f230
9,Does LangSmith support online evaluation?,"Yes, LangSmith supports online evaluation thro...",,"Yes, LangSmith supports online evaluation as a...",0,2.542366,bc7e17ef-d076-4ef3-812d-b1d8f5f6739e,c98ed4e0-2ff2-4325-811f-e0e0e780726b


### Running over Different pieces of Data

##### Dataset Version

You can execute an experiment on a specific version of a dataset in the sdk by using the `as_of` parameter in `list_examples`

Let's try running on just our initial dataset.

##### Dataset Split

You can run an experiment on a specific split of your dataset, let's try running on the Crucial Examples split.

In [8]:
evaluate(
    target_function,
    data=client.list_examples(dataset_name=dataset_name, splits=["Critical Examples"]),  # We pass in a list of Splits
    evaluators=[is_concise_enough],
    experiment_prefix="Crucial Examples split"
)

View the evaluation results for experiment: 'Crucial Examples split-732e9870' at:
https://smith.langchain.com/o/4ffb8661-bc93-48d9-919b-d7dd87377bc1/datasets/eca4f364-f2f9-4360-9679-2ba78775775c/compare?selectedSessions=02d62248-575a-452d-a0e1-f534b466fcc5




0it [00:00, ?it/s]

Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,How can I integrate LangSmith with existing ma...,You can integrate LangSmith with existing mach...,,To integrate LangSmith with existing machine l...,1,1.832473,05dc2df8-c3e7-4a76-919e-5272836ab243,d1815b17-4102-4421-b7bb-b789655113bc
1,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used to evaluate agents ...",,"Yes, LangSmith can be used to evaluate agents....",0,1.800298,22ca4c4e-d54a-4bb2-8e79-7013a9bb9f9c,d664d98b-b823-40f1-a482-1f745965c564
2,How do I create user feedback with the LangSmi...,To create user feedback using the LangSmith SD...,,To create user feedback with the LangSmith SDK...,1,1.56032,344eceea-8ee6-40a1-807d-7991d0daffff,5fa96eec-a384-4e58-a36c-c7070a51366b
3,What are the benefits of using LangSmith for L...,LangSmith offers seamless integration with Lan...,,The benefits of using LangSmith for LLM develo...,1,1.918514,5b5dc1a5-d646-4752-91a2-41869ff00073,3116b783-7fad-4569-b633-e4087606936e
4,What tools does LangSmith provide for debuggin...,LangSmith provides tools for debugging LLM app...,,LangSmith offers a variety of tools for debugg...,0,2.070926,d42d9301-cc00-4913-bed2-fd77a99cad8b,ea5bb949-53cd-4706-a742-4245137c947e


##### Specific Data Points

You can specify individual data points to run an experiment over as well

### Other Parameters

##### Repetitions

You can run an experiment several times to make sure you have consistent results

In [7]:
evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="two repetitions",
    num_repetitions=2   # This field defaults to 1
)

View the evaluation results for experiment: 'two repetitions-a8c047ef' at:
https://smith.langchain.com/o/4ffb8661-bc93-48d9-919b-d7dd87377bc1/datasets/eca4f364-f2f9-4360-9679-2ba78775775c/compare?selectedSessions=f08144d7-ddf8-47fc-925e-2201cba05f0c




0it [00:00, ?it/s]

Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,How can I integrate LangSmith with existing ma...,You can integrate LangSmith with existing mach...,,To integrate LangSmith with existing machine l...,1,2.130409,05dc2df8-c3e7-4a76-919e-5272836ab243,4e0e5c10-8a56-4186-bd7b-3da9cfe25439
1,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used to evaluate agents....",,"Yes, LangSmith can be used to evaluate agents....",1,3.378818,22ca4c4e-d54a-4bb2-8e79-7013a9bb9f9c,a9ebadae-51c8-469c-a182-342ce1302aff
2,How do I create user feedback with the LangSmi...,To create user feedback with the LangSmith SDK...,,To create user feedback with the LangSmith SDK...,1,1.738592,344eceea-8ee6-40a1-807d-7991d0daffff,29b35a63-dd66-4fb5-b0e4-9c25f9c7f64b
3,What are the benefits of using LangSmith for L...,LangSmith provides seamless integration with L...,,The benefits of using LangSmith for LLM develo...,1,1.587132,5b5dc1a5-d646-4752-91a2-41869ff00073,6de5d96c-af08-499e-ba76-e5cb2b970da2
4,What tools does LangSmith provide for debuggin...,LangSmith provides tools such as the LangSmith...,,LangSmith offers a variety of tools for debugg...,0,6.398594,d42d9301-cc00-4913-bed2-fd77a99cad8b,0a1637d3-40cd-44a7-ba22-a8bbd5957835
5,How do I pass metadata in with @traceable?,"To pass metadata in with @traceable, you can i...",,You can pass metadata with the @traceable deco...,1,8.787112,41e88b84-6db8-4279-8fac-2dd6712e307c,cea35909-ebf8-4410-82d2-84a2bc3915f4
6,How can I trace with the @traceable decorator?,To trace with the @traceable decorator in Pyth...,,To trace with the @traceable decorator in Pyth...,1,1.876831,5c231c7a-7243-4dc8-bec3-780d534cd0f3,20253edb-daf2-4d44-997d-deb6ef211bd3
7,Can LangSmith be used for finetuning and model...,LangSmith is primarily focused on observabilit...,,"Yes, LangSmith can be used for fine-tuning and...",1,7.975329,70906009-6d20-4796-bfa8-07017675e478,a37e504b-9bb2-4ac3-adac-53a97280c389
8,Does LangSmith support offline evaluation?,LangSmith supports online evaluations for real...,,"Yes, LangSmith supports offline evaluation thr...",0,7.68606,b2ecd2d6-db1a-4010-90ba-68c00f2202e4,35404e7e-406e-461f-b8b0-02744ec92f2e
9,Does LangSmith support online evaluation?,"Yes, LangSmith supports online evaluation thro...",,"Yes, LangSmith supports online evaluation as a...",0,1.604232,bc7e17ef-d076-4ef3-812d-b1d8f5f6739e,5c4a9e1a-8439-447c-8915-e2094f4a34d8


##### Concurrency
You can also kick off concurrent threads of execution to make your experiments finish faster!

In [12]:
evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="concurrency",
    max_concurrency=3,  # This defaults to None, so this is an improvement!
)

View the evaluation results for experiment: 'concurrency-2cabe782' at:
https://smith.langchain.com/o/4ffb8661-bc93-48d9-919b-d7dd87377bc1/datasets/eca4f364-f2f9-4360-9679-2ba78775775c/compare?selectedSessions=56e711e0-ba62-4548-a2e6-df08a4783d22




0it [00:00, ?it/s]

Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used to evaluate agents....",,"Yes, LangSmith can be used to evaluate agents....",1,1.924507,22ca4c4e-d54a-4bb2-8e79-7013a9bb9f9c,0d1e4a18-2598-4096-9921-f015e6376418
1,How can I integrate LangSmith with existing ma...,LangSmith is a platform for building productio...,,To integrate LangSmith with existing machine l...,1,1.938701,05dc2df8-c3e7-4a76-919e-5272836ab243,92890ca0-9025-4cf8-8b8d-5a971932d31b
2,How do I create user feedback with the LangSmi...,To create user feedback using the LangSmith SD...,,To create user feedback with the LangSmith SDK...,1,2.365328,344eceea-8ee6-40a1-807d-7991d0daffff,95e5ba90-baa5-40f2-b6ba-407b58b8dec1
3,What are the benefits of using LangSmith for L...,LangSmith seamlessly integrates with LangChain...,,The benefits of using LangSmith for LLM develo...,1,1.461975,5b5dc1a5-d646-4752-91a2-41869ff00073,f3b3cb1e-8b16-4d3c-ac92-68d57c6f1347
4,What tools does LangSmith provide for debuggin...,LangSmith provides tools such as LangSmith SDK...,,LangSmith offers a variety of tools for debugg...,1,1.760743,d42d9301-cc00-4913-bed2-fd77a99cad8b,d3208e8c-8674-4591-9f5e-46e03c8ed8f2
5,How do I pass metadata in with @traceable?,"To pass metadata with @traceable in LangSmith,...",,You can pass metadata with the @traceable deco...,1,1.518324,41e88b84-6db8-4279-8fac-2dd6712e307c,096fba32-73d5-4c89-9546-cac22efb8df2
6,Can LangSmith be used for finetuning and model...,"LangSmith is primarily focused on monitoring, ...",,"Yes, LangSmith can be used for fine-tuning and...",1,1.647049,70906009-6d20-4796-bfa8-07017675e478,bf30064b-8246-41fd-8e2e-46e5a76d61e5
7,Does LangSmith support offline evaluation?,LangSmith does not support offline evaluation....,,"Yes, LangSmith supports offline evaluation thr...",1,2.145998,b2ecd2d6-db1a-4010-90ba-68c00f2202e4,d671b535-cb04-4b45-8d5d-0cdbceb376db
8,How can I trace with the @traceable decorator?,"To trace with the @traceable decorator, you si...",,To trace with the @traceable decorator in Pyth...,1,2.691919,5c231c7a-7243-4dc8-bec3-780d534cd0f3,75548668-1b97-4b26-8648-2861114c7190
9,Does LangSmith support online evaluation?,"Yes, LangSmith supports online evaluation thro...",,"Yes, LangSmith supports online evaluation as a...",0,1.732612,bc7e17ef-d076-4ef3-812d-b1d8f5f6739e,b12495ac-b9bc-4953-bcf5-49db5c92b8c2


##### Metadata 

You can (and should) add metadata to your experiments, to make them easier to find in the UI

In [11]:
evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="metadata added",
    metadata={  # We can pass custom metadata for the experiment, such as the model name
        "model_name": MODEL_NAME
    }
)

View the evaluation results for experiment: 'metadata added-ec2834d2' at:
https://smith.langchain.com/o/4ffb8661-bc93-48d9-919b-d7dd87377bc1/datasets/eca4f364-f2f9-4360-9679-2ba78775775c/compare?selectedSessions=1a568c1d-8ef3-4d62-8d0f-6f358633ae24




0it [00:00, ?it/s]

Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,How can I integrate LangSmith with existing ma...,LangSmith is a platform for building LLM appli...,,To integrate LangSmith with existing machine l...,1,2.253515,05dc2df8-c3e7-4a76-919e-5272836ab243,3d2ef3df-c787-4b27-aea1-18d9e58455be
1,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used to evaluate agents....",,"Yes, LangSmith can be used to evaluate agents....",0,11.858699,22ca4c4e-d54a-4bb2-8e79-7013a9bb9f9c,c3c8cd51-1c9b-4856-a275-9ef9b73d125a
2,How do I create user feedback with the LangSmi...,To create user feedback using the LangSmith SD...,,To create user feedback with the LangSmith SDK...,1,2.968303,344eceea-8ee6-40a1-807d-7991d0daffff,b96f6ef1-91ed-4b5c-a317-132b4814cfff
3,What are the benefits of using LangSmith for L...,LangSmith offers seamless integration with Lan...,,The benefits of using LangSmith for LLM develo...,1,1.639302,5b5dc1a5-d646-4752-91a2-41869ff00073,bd5af564-35ce-41a6-aa29-38cabad3ea94
4,What tools does LangSmith provide for debuggin...,LangSmith provides tools like the LangSmith SD...,,LangSmith offers a variety of tools for debugg...,0,2.351697,d42d9301-cc00-4913-bed2-fd77a99cad8b,c699670c-2e22-4c73-819f-f8ac26cff0af
5,How do I pass metadata in with @traceable?,"To pass metadata with @traceable, you can use ...",,You can pass metadata with the @traceable deco...,1,1.610261,41e88b84-6db8-4279-8fac-2dd6712e307c,8e9aa75b-ab94-4ea4-ba1c-f5c46eaea820
6,How can I trace with the @traceable decorator?,"To trace with the @traceable decorator, you ca...",,To trace with the @traceable decorator in Pyth...,1,1.760986,5c231c7a-7243-4dc8-bec3-780d534cd0f3,849ac41c-81cc-457f-b83a-76c9d011ce9f
7,Can LangSmith be used for finetuning and model...,LangSmith is primarily focused on observabilit...,,"Yes, LangSmith can be used for fine-tuning and...",0,1.605172,70906009-6d20-4796-bfa8-07017675e478,8cec2b9a-10ae-444d-b238-7b03b4d5cf76
8,Does LangSmith support offline evaluation?,LangSmith does not support offline evaluation ...,,"Yes, LangSmith supports offline evaluation thr...",1,1.870259,b2ecd2d6-db1a-4010-90ba-68c00f2202e4,07fa67eb-8345-4c00-96a9-6a3fad06611d
9,Does LangSmith support online evaluation?,LangSmith supports online evaluation through i...,,"Yes, LangSmith supports online evaluation as a...",0,2.450691,bc7e17ef-d076-4ef3-812d-b1d8f5f6739e,d35908c6-6ec0-4dbe-8cf7-a76d5ebb0be9


In [10]:
# Experiment: Testing Different Temperature Values

TEMPERATURES = [0.0, 0.3, 0.7, 1.0]

@traceable(run_type="llm")
def call_openai_with_temp(messages: List[dict], temperature: float) -> str:
    return openai_client.chat.completions.create(
        model=MODEL_NAME,
        messages=messages,
        temperature=temperature
    )

for temp in TEMPERATURES:
    @traceable(run_type="chain")
    def langsmith_rag_temp(question: str, t=temp):
        documents = retrieve_documents(question)
        formatted_docs = "\n\n".join(doc.page_content for doc in documents)
        messages = [
            {"role": "system", "content": RAG_SYSTEM_PROMPT},
            {"role": "user", "content": f"Context: {formatted_docs} \n\n Question: {question}"}
        ]
        response = call_openai_with_temp(messages, t)
        return response.choices[0].message.content
    
    def target_function(inputs: dict):
        return langsmith_rag_temp(inputs["question"])
    
    evaluate(
        target_function,
        data=dataset_name,
        evaluators=[is_concise_enough],
        experiment_prefix=f"temperature-{temp}",
        metadata={"temperature": temp, "model_name": MODEL_NAME}
    )

View the evaluation results for experiment: 'temperature-0.0-93b6a44e' at:
https://smith.langchain.com/o/4ffb8661-bc93-48d9-919b-d7dd87377bc1/datasets/eca4f364-f2f9-4360-9679-2ba78775775c/compare?selectedSessions=d0850e05-5cd4-47e8-91f3-33e968d3552d




0it [00:00, ?it/s]

View the evaluation results for experiment: 'temperature-0.3-a6297d42' at:
https://smith.langchain.com/o/4ffb8661-bc93-48d9-919b-d7dd87377bc1/datasets/eca4f364-f2f9-4360-9679-2ba78775775c/compare?selectedSessions=8eb0a993-9d34-4ad9-8e53-acd71fe66f8d




0it [00:00, ?it/s]

View the evaluation results for experiment: 'temperature-0.7-454bfdeb' at:
https://smith.langchain.com/o/4ffb8661-bc93-48d9-919b-d7dd87377bc1/datasets/eca4f364-f2f9-4360-9679-2ba78775775c/compare?selectedSessions=4ab3ddd1-c36f-4695-b2b2-f7a40c64adc7




0it [00:00, ?it/s]

View the evaluation results for experiment: 'temperature-1.0-1d40b6f2' at:
https://smith.langchain.com/o/4ffb8661-bc93-48d9-919b-d7dd87377bc1/datasets/eca4f364-f2f9-4360-9679-2ba78775775c/compare?selectedSessions=b1238d26-d17b-47e3-9b00-a3a91da404c2




0it [00:00, ?it/s]