# Experiments

### Setup

In [1]:
# You can set them inline
import os
os.environ["OPENAI_API_KEY"] = "" # API key is sensitive to share
os.environ["LANGSMITH_API_KEY"] = "" # API key is sensitive to share
os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_PROJECT"] = "langsmith-academy"

In [2]:
# Or you can use a .env file
from dotenv import load_dotenv
load_dotenv(dotenv_path="../../.env", override=True)

False

In [3]:
!pip install langchain
!pip install langchain-community
!pip install langchain-openai
!pip install scikit-learn
!pip install openai

Collecting langchain-community
  Downloading langchain_community-0.3.30-py3-none-any.whl.metadata (3.0 kB)
Collecting requests<3.0.0,>=2.32.5 (from langchain-community)
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting dataclasses-json<0.7.0,>=0.6.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7.0,>=0.6.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7.0,>=0.6.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7.0,>=0.6.7->langchain-community)
  Downloading mypy_extensions-1.1.0-py3-none-any.whl.metadata (1.1 kB)
Downloading langchain_community-0.3.30-py3-none-any.whl (2.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

Here is the RAG Application that we've been working with throughout this course

In [4]:
import os
import tempfile
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders.sitemap import SitemapLoader
from langchain_community.vectorstores import SKLearnVectorStore
from langchain_openai import OpenAIEmbeddings
from langsmith import traceable
from openai import OpenAI
from typing import List
import nest_asyncio

# TODO: Configure this model!
MODEL_NAME = "gpt-4o"
MODEL_PROVIDER = "openai"
APP_VERSION = 1.0
RAG_SYSTEM_PROMPT = """You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the latest question in the conversation.
If you don't know the answer, just say that you don't know.
Use three sentences maximum and keep the answer concise.
"""

openai_client = OpenAI()

def get_vector_db_retriever():
    persist_path = os.path.join(tempfile.gettempdir(), "union.parquet")
    embd = OpenAIEmbeddings()

    # If vector store exists, then load it
    if os.path.exists(persist_path):
        vectorstore = SKLearnVectorStore(
            embedding=embd,
            persist_path=persist_path,
            serializer="parquet"
        )
        return vectorstore.as_retriever(lambda_mult=0)

    # Otherwise, index LangSmith documents and create new vector store
    ls_docs_sitemap_loader = SitemapLoader(web_path="https://docs.smith.langchain.com/sitemap.xml", continue_on_failure=True)
    ls_docs = ls_docs_sitemap_loader.load()

    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=500, chunk_overlap=0
    )
    doc_splits = text_splitter.split_documents(ls_docs)

    vectorstore = SKLearnVectorStore.from_documents(
        documents=doc_splits,
        embedding=embd,
        persist_path=persist_path,
        serializer="parquet"
    )
    vectorstore.persist()
    return vectorstore.as_retriever(lambda_mult=0)

nest_asyncio.apply()
retriever = get_vector_db_retriever()

"""
retrieve_documents
- Returns documents fetched from a vectorstore based on the user's question
"""
@traceable(run_type="chain")
def retrieve_documents(question: str):
    return retriever.invoke(question)

"""
generate_response
- Calls `call_openai` to generate a model response after formatting inputs
"""
@traceable(run_type="chain")
def generate_response(question: str, documents):
    formatted_docs = "\n\n".join(doc.page_content for doc in documents)
    messages = [
        {
            "role": "system",
            "content": RAG_SYSTEM_PROMPT
        },
        {
            "role": "user",
            "content": f"Context: {formatted_docs} \n\n Question: {question}"
        }
    ]
    return call_openai(messages)

"""
call_openai
- Returns the chat completion output from OpenAI
"""
@traceable(
    run_type="llm",
    metadata={
        "ls_provider": MODEL_PROVIDER,
        "ls_model_name": MODEL_NAME
    }
)
def call_openai(messages: List[dict]) -> str:
    return openai_client.chat.completions.create(
        model=MODEL_NAME,
        messages=messages,
    )

"""
langsmith_rag
- Calls `retrieve_documents` to fetch documents
- Calls `generate_response` to generate a response based on the fetched documents
- Returns the model response
"""
@traceable(run_type="chain")
def langsmith_rag(question: str):
    documents = retrieve_documents(question)
    response = generate_response(question, documents)
    return response.choices[0].message.content


Fetching pages: 100%|##########| 197/197 [00:37<00:00,  5.26it/s]


### Experiment

Here is a code snippet that should look similar to what you see from the starter code!

There are a few important components here.

1. We have defined an Evaluator
2. We pipe our dataset examples (dict) to the shape of input that our function `langsmith_rag` takes (str) using a target function

In [5]:
from langsmith import evaluate, Client

client = Client()
dataset_id = "e15cc577-5260-4be3-a7ab-258585878ade"

def is_concise_enough(reference_outputs: dict, outputs: dict) -> dict:
    score = len(outputs["output"]) < 1.5 * len(reference_outputs["output"])
    return {"key": "is_concise", "score": int(score)}

def target_function(inputs: dict):
    return langsmith_rag(inputs["question"])

evaluate(
    target_function,
    data="e15cc577-5260-4be3-a7ab-258585878ade",
    evaluators=[is_concise_enough],
    experiment_prefix="gpt-4o"
)

View the evaluation results for experiment: 'gpt-4o-fce7d8ab' at:
https://smith.langchain.com/o/c1b67c13-bec2-4f98-8bbf-502c78449186/datasets/e15cc577-5260-4be3-a7ab-258585878ade/compare?selectedSessions=eda05b06-0b74-41e1-93f4-401c571faef3




0it [00:00, ?it/s]

Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,How can I trace with the @traceable decorator?,To trace with the @traceable decorator using t...,,To trace with the @traceable decorator in Pyth...,1,1.766822,1868b359-b3ee-4908-8655-ae9f0184bc56,a7a03b21-1b43-4431-8386-008452ce87d7
1,What is LangSmith used for in three sentences?,LangSmith is a platform designed for building ...,,LangSmith is a platform designed for the devel...,1,1.411823,78e2e0eb-f8fb-4d8e-b6a5-e6e1ca31808b,0f7a9617-c8d9-4cda-baaa-79b2e12b543a
2,Does LangSmith support online evaluation?,"Yes, LangSmith supports online evaluation. It ...",,"Yes, LangSmith supports online evaluation as a...",1,1.868453,b4fd0e07-e10c-4462-bc59-1b1b24e76445,984e5653-bac0-45e3-91bd-56d9ddf11571
3,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used to evaluate agents....",,"Yes, LangSmith can be used to evaluate agents....",1,2.165362,ca92d9ca-b757-4ec9-b317-33c17f60046f,5414d3df-eaae-48ef-89e9-f2c1b70d2bb3
4,What testing capabilities does LangSmith have?,LangSmith allows running multiple experiments ...,,LangSmith offers capabilities for creating dat...,1,1.14806,fb838522-26d9-4966-b272-b687b41c4df7,3c9753f8-d2ed-4d03-a5b3-fea74537ec8c
5,How do I set up tracing to LangSmith if I'm us...,To set up tracing to LangSmith using LangChain...,,To set up tracing to LangSmith while using Lan...,1,1.31943,3d54ef8e-97e6-482d-97a4-2fb5171ecb1d,2949e386-3458-4fcb-9ced-5fa28b72e898
6,Does LangSmith support offline evaluation?,"Yes, LangSmith supports offline evaluation. It...",,"Yes, LangSmith supports offline evaluation thr...",1,1.444862,5524be6a-278d-4d44-830c-49aa9a66fe4d,bd2804ec-2ca1-43c9-9e11-c3032a27d78c
7,How do I pass metadata in with @traceable?,"To pass metadata with `@traceable`, you can in...",,You can pass metadata with the @traceable deco...,0,1.840248,5a2f44cc-10d8-4d38-a734-3ddfea60fe24,495360ff-c797-462a-8a50-ccc08c364f32
8,How do I create user feedback with the LangSmi...,To create user feedback with the LangSmith SDK...,,To create user feedback with the LangSmith SDK...,1,1.173621,b317d976-030d-4bdc-ad85-71bde482f46c,a7ea734b-e5be-4296-8ae8-9404be8d64dd
9,Can LangSmith be used for finetuning and model...,"No, LangSmith is not designed for finetuning a...",,"Yes, LangSmith can be used for fine-tuning and...",1,1.187389,ce8a6050-8b40-4f24-b1c7-f81b3e4f23c0,15cd01c9-9790-463b-ab73-50f0eff78a53


### Modifying your Application

Now, let's change our model to gpt-35-turbo and see how it performs!

Make this change, and then run this code snippet!

In [11]:
from langsmith import evaluate, Client
from langsmith.schemas import Example, Run

def target_function(inputs: dict):
    return langsmith_rag(inputs["question"])

evaluate(
    target_function,
    data="Meeting Transcripts",  # dataset name inside your LangSmith account
    evaluators=[is_concise_enough],
    experiment_prefix="gpt-3.5-turbo"
)

View the evaluation results for experiment: 'gpt-3.5-turbo-4997fdc4' at:
https://smith.langchain.com/o/c1b67c13-bec2-4f98-8bbf-502c78449186/datasets/5925bc48-4e25-427a-9231-dfdbd04387ec/compare?selectedSessions=0a33896d-db55-4395-8545-48968addf828




0it [00:00, ?it/s]

ERROR:langsmith.evaluation._runner:Error running target function: 'question'
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/langsmith/evaluation/_runner.py", line 1923, in _forward
    fn(*args, langsmith_extra=langsmith_extra)
  File "/tmp/ipython-input-1093645800.py", line 5, in target_function
    return langsmith_rag(inputs["question"])
                         ~~~~~~^^^^^^^^^^^^
KeyError: 'question'
ERROR:langsmith.evaluation._runner:Error running evaluator <DynamicRunEvaluator is_concise_enough> on run 59b85afc-4797-4b35-a9af-1e49246d2765: TypeError("object of type 'NoneType' has no len()")
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/langsmith/evaluation/_runner.py", line 1619, in _run_evaluators
    evaluator_response = evaluator.evaluate_run(  # type: ignore[call-arg]
                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/langsmith/evalu

Unnamed: 0,inputs.transcript,outputs.output,error,feedback.wrapper,execution_time,example_id,id
0,"Bob and Mr. Patel (CLOSED DEAL): Bob: Hello, M...",,KeyError('question')\n\nTraceback (most recent...,,0.002431,4f803b9f-c605-4e41-99fc-3cb062d00587,59b85afc-4797-4b35-a9af-1e49246d2765
1,"Bob and Ms. Thompson (NO DEAL): Bob: Hi, Ms. T...",,KeyError('question')\n\nTraceback (most recent...,,0.001499,55dc1cad-8516-4d80-b16c-634d8cacf72b,af81ec9a-d84f-4f1e-bfaa-b68ecb3ce05b
2,Bob and Mr. Johnson (CLOSED DEAL): Bob: Good m...,,KeyError('question')\n\nTraceback (most recent...,,0.001266,6211725a-f93b-4d98-8b09-a6c89e421078,ab14f6c6-aea1-4c82-9d2b-aa571206de3f
3,Bob and Mr. Carter (CLOSED DEAL): Bob: Welcome...,,KeyError('question')\n\nTraceback (most recent...,,0.000489,6efb2329-d624-4468-beba-e41134069113,1a00ab35-a0de-443a-a09b-e5e1369055cc
4,Bob and Ms. Nguyen (NO DEAL): Bob: Good aftern...,,KeyError('question')\n\nTraceback (most recent...,,0.000656,8d1c7d5f-ff17-4fc9-acf6-94bcdfce9fee,9bd4d20a-d6c4-43b6-9519-a68c048a06c4


### Running over Different pieces of Data

##### Dataset Version

You can execute an experiment on a specific version of a dataset in the sdk by using the `as_of` parameter in `list_examples`

Let's try running on just our initial dataset.

In [28]:
evaluate(
    target_function,
    data=client.list_examples(dataset_name="RAG Application Golden Dataset", as_of="initial dataset"),
    evaluators=[is_concise_enough],
    experiment_prefix="Initial dataset"
)

View the evaluation results for experiment: 'Initial dataset-89b2f2a2' at:
https://smith.langchain.com/o/c1b67c13-bec2-4f98-8bbf-502c78449186/datasets/e15cc577-5260-4be3-a7ab-258585878ade/compare?selectedSessions=a075d071-af44-41ca-b185-ae5980abe822




0it [00:00, ?it/s]

Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,How do I create user feedback with the LangSmi...,To create user feedback with the LangSmith SDK...,,To create user feedback with the LangSmith SDK...,1,1.562818,3392f6ef-a421-4bee-b347-7d5556480698,b65e67d9-c502-4acb-996d-183338d9b07c
1,How can I trace with the @traceable decorator?,To trace with the @traceable decorator in your...,,To trace with the @traceable decorator in Pyth...,1,1.985098,5b2929e1-6b02-4bd5-aada-fc208a08dc28,1f71dd85-d51a-4a9e-b3b2-79a6494ec675
2,What is LangSmith used for in three sentences?,LangSmith is a platform designed for building ...,,LangSmith is a platform designed for the devel...,1,1.520779,7f031e8d-2536-42b0-9381-827d46b7f048,f54dd636-552d-4037-bc5c-56720d7aa5d4
3,Can LangSmith be used for finetuning and model...,LangSmith is designed for LLM observability an...,,"Yes, LangSmith can be used for fine-tuning and...",1,1.832546,80ef51fb-9c02-4e83-9b33-34603867ea0a,771d284b-4bb3-4296-98dc-80cc5ac39a1a
4,How do I pass metadata in with @traceable?,"To pass metadata in with `@traceable`, you sho...",,You can pass metadata with the @traceable deco...,1,1.722213,8b49c7b1-1535-45f0-b87f-fc2d3879b681,3b249125-6bc0-46ae-9dea-8c1f43bfcba7
5,What testing capabilities does LangSmith have?,LangSmith allows users to run multiple experim...,,LangSmith offers capabilities for creating dat...,1,1.14746,915513f0-5215-46c4-9ba2-5f063706bee2,c8c232e2-e20f-4e42-8d0e-879ce98d2d7a
6,How do I set up tracing to LangSmith if I'm us...,To set up tracing to LangSmith using LangChain...,,To set up tracing to LangSmith while using Lan...,0,4.701012,9ac0937d-6b53-4656-a9d9-ac3b25ee59b0,41e69c24-5880-40aa-94f1-412478f6d635
7,Does LangSmith support online evaluation?,"Yes, LangSmith supports online evaluation. It ...",,"Yes, LangSmith supports online evaluation as a...",1,1.547458,9acc4f8c-7528-4b88-9b4a-683033a3a16f,6e8ef9c1-3d66-42f5-b9f5-6a745085e6a5
8,Does LangSmith support offline evaluation?,The provided context primarily discusses onlin...,,"Yes, LangSmith supports offline evaluation thr...",1,1.470351,c7dda75f-b182-4b11-920b-db6c12c63b39,09fa1bed-c610-4db2-be85-b5ce47ae5202
9,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used to evaluate agents....",,"Yes, LangSmith can be used to evaluate agents....",1,2.083499,fa99e052-566d-498c-af32-0d5dfc91c148,09d4c69c-12c5-4d54-bf34-cb5ba82a920f


##### Dataset Split

You can run an experiment on a specific split of your dataset, let's try running on the Crucial Examples split.

In [31]:
evaluate(
    target_function,
    data=client.list_examples(dataset_name="RAG Application Golden Dataset", splits=["crucial"]),  # We pass in a list of Splits
    evaluators=[is_concise_enough],
    experiment_prefix="Crucial"
)

View the evaluation results for experiment: 'Crucial-91e551c4' at:
https://smith.langchain.com/o/c1b67c13-bec2-4f98-8bbf-502c78449186/datasets/e15cc577-5260-4be3-a7ab-258585878ade/compare?selectedSessions=81e678fe-441c-4ab0-ad2a-969c8806f1f9




0it [00:00, ?it/s]

Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,How can I trace with the @traceable decorator?,To trace with the @traceable decorator in Pyth...,,To trace with the @traceable decorator in Pyth...,1,2.21688,1868b359-b3ee-4908-8655-ae9f0184bc56,4ce3c205-0dc8-4e49-930c-9edd9549f21c
1,Does LangSmith support offline evaluation?,The provided context does not explicitly menti...,,"Yes, LangSmith supports offline evaluation thr...",1,1.430444,5524be6a-278d-4d44-830c-49aa9a66fe4d,5cf2f11b-d193-47b1-ba07-932407480254
2,How do I create user feedback with the LangSmi...,To create user feedback using the LangSmith SD...,,To create user feedback with the LangSmith SDK...,1,1.793813,b317d976-030d-4bdc-ad85-71bde482f46c,4e82e98d-6549-479f-a133-21f247c4f45d
3,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used to evaluate agents....",,"Yes, LangSmith can be used to evaluate agents....",1,1.862795,ca92d9ca-b757-4ec9-b317-33c17f60046f,4cc923c0-acd8-4c12-bc32-1b4d432e59f5
4,Is there a Javascript Langsmith SDK,"Yes, there is a JavaScript SDK for LangSmith.",,"Yes, there is a Javascript Langsmith SDK!",1,0.601329,cc388aab-a574-48d3-b1ca-050b4e55eca4,86d60b01-d394-4aa4-ade6-a98c3b2cfa2d


##### Specific Data Points

You can specify individual data points to run an experiment over as well

In [34]:
evaluate(
    target_function,
    data=client.list_examples(
        dataset_name= "RAG Application Golden Dataset",
        example_ids=[   # We pass in a specific list of example_ids
            # TODO: You will need to paste in your own example ids for this to work!
            "1868b359-b3ee-4908-8655-ae9f0184bc56",
            "ca92d9ca-b757-4ec9-b317-33c17f60046f"
        ]
    ),
    evaluators=[is_concise_enough],
    experiment_prefix="two specific example ids"
)

View the evaluation results for experiment: 'two specific example ids-e3faca88' at:
https://smith.langchain.com/o/c1b67c13-bec2-4f98-8bbf-502c78449186/datasets/e15cc577-5260-4be3-a7ab-258585878ade/compare?selectedSessions=16c8bb23-fbed-43f3-85c5-3f45bb0d3291




0it [00:00, ?it/s]

Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,How can I trace with the @traceable decorator?,To trace with the @traceable decorator in Pyth...,,To trace with the @traceable decorator in Pyth...,1,2.051688,1868b359-b3ee-4908-8655-ae9f0184bc56,0e6ba976-6e74-4740-b5d0-0473626f59dd
1,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used to evaluate agents....",,"Yes, LangSmith can be used to evaluate agents....",1,1.956684,ca92d9ca-b757-4ec9-b317-33c17f60046f,178b23c2-8dc9-45df-98d9-a026b3e7b309


### Other Parameters

##### Repetitions

You can run an experiment several times to make sure you have consistent results

In [35]:
evaluate(
    target_function,
    data="RAG Application Golden Dataset",
    evaluators=[is_concise_enough],
    experiment_prefix="two repetitions",
    num_repetitions=2   # This field defaults to 1
)

View the evaluation results for experiment: 'two repetitions-2f9431a2' at:
https://smith.langchain.com/o/c1b67c13-bec2-4f98-8bbf-502c78449186/datasets/e15cc577-5260-4be3-a7ab-258585878ade/compare?selectedSessions=2d5bbfa6-10bb-441a-939f-57407975255a




0it [00:00, ?it/s]

Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,How can I trace with the @traceable decorator?,"To trace with the @traceable decorator, you ne...",,To trace with the @traceable decorator in Pyth...,1,2.374679,1868b359-b3ee-4908-8655-ae9f0184bc56,5d28bcf8-8020-49d7-b29f-3820ba109015
1,Does LangSmith support offline evaluation?,The provided context does not specify if LangS...,,"Yes, LangSmith supports offline evaluation thr...",1,2.087093,5524be6a-278d-4d44-830c-49aa9a66fe4d,566541bf-cb50-44fd-ac03-4b6782aadac4
2,How do I create user feedback with the LangSmi...,To create user feedback with the LangSmith SDK...,,To create user feedback with the LangSmith SDK...,1,1.604643,b317d976-030d-4bdc-ad85-71bde482f46c,42f395bb-a0d1-4ce4-b293-54691fa43a3c
3,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used to evaluate agents....",,"Yes, LangSmith can be used to evaluate agents....",1,1.940506,ca92d9ca-b757-4ec9-b317-33c17f60046f,8d262221-49da-486b-a43e-ce5c207fde01
4,Is there a Javascript Langsmith SDK,"Yes, there is a JavaScript/TypeScript (JS/TS) ...",,"Yes, there is a Javascript Langsmith SDK!",0,0.834333,cc388aab-a574-48d3-b1ca-050b4e55eca4,1bfa4e37-8082-460f-b4e9-77f28f4c5e3a
5,What is LangSmith used for in three sentences?,LangSmith is a platform designed for building ...,,LangSmith is a platform designed for the devel...,1,1.782845,78e2e0eb-f8fb-4d8e-b6a5-e6e1ca31808b,6e856d88-0564-4cf9-8db6-f9032fd4afe0
6,Does LangSmith support online evaluation?,"Yes, LangSmith supports online evaluation. It ...",,"Yes, LangSmith supports online evaluation as a...",1,1.462707,b4fd0e07-e10c-4462-bc59-1b1b24e76445,2b65131d-afe5-4c26-9fa1-9d7e04aa6c7f
7,What testing capabilities does LangSmith have?,LangSmith allows for running multiple experime...,,LangSmith offers capabilities for creating dat...,1,0.986781,fb838522-26d9-4966-b272-b687b41c4df7,436f1b72-f93c-46a3-b47a-5c365e88dd45
8,How do I set up tracing to LangSmith if I'm us...,"To set up tracing to LangSmith with LangChain,...",,To set up tracing to LangSmith while using Lan...,0,2.850469,3d54ef8e-97e6-482d-97a4-2fb5171ecb1d,b2e6edd3-e6e6-4ffe-8ba4-715b7af81ffc
9,How do I pass metadata in with @traceable?,"To pass metadata with @traceable in LangSmith,...",,You can pass metadata with the @traceable deco...,1,2.315801,5a2f44cc-10d8-4d38-a734-3ddfea60fe24,ff141964-f2d8-4571-9029-09e8a6da5126


##### Concurrency
You can also kick off concurrent threads of execution to make your experiments finish faster!

In [36]:
evaluate(
    target_function,
    data="RAG Application Golden Dataset",
    evaluators=[is_concise_enough],
    experiment_prefix="concurrency",
    max_concurrency=3,  # This defaults to None, so this is an improvement!
)

View the evaluation results for experiment: 'concurrency-0d8b5c50' at:
https://smith.langchain.com/o/c1b67c13-bec2-4f98-8bbf-502c78449186/datasets/e15cc577-5260-4be3-a7ab-258585878ade/compare?selectedSessions=1f05cc06-226d-4fae-874c-93b1f75310f2




0it [00:00, ?it/s]

Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,Does LangSmith support offline evaluation?,The provided context does not mention offline ...,,"Yes, LangSmith supports offline evaluation thr...",1,1.260514,5524be6a-278d-4d44-830c-49aa9a66fe4d,c0302016-6715-4062-8620-d8e6f39f4607
1,How do I create user feedback with the LangSmi...,To create user feedback with the LangSmith SDK...,,To create user feedback with the LangSmith SDK...,1,1.497214,b317d976-030d-4bdc-ad85-71bde482f46c,232d75af-d266-4c76-9f9a-3b3fd266693a
2,Is there a Javascript Langsmith SDK,"Yes, there is a JavaScript/TypeScript SDK avai...",,"Yes, there is a Javascript Langsmith SDK!",0,0.722813,cc388aab-a574-48d3-b1ca-050b4e55eca4,64c09461-9adf-47cd-b629-527efbf12a3b
3,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used to evaluate agents....",,"Yes, LangSmith can be used to evaluate agents....",1,1.505896,ca92d9ca-b757-4ec9-b317-33c17f60046f,fc82616f-6522-4450-9eab-c66200d1170b
4,How can I trace with the @traceable decorator?,To trace with the @traceable decorator in Pyth...,,To trace with the @traceable decorator in Pyth...,1,3.766875,1868b359-b3ee-4908-8655-ae9f0184bc56,e794e158-decd-401d-873e-4e3cb640b54c
5,Does LangSmith support online evaluation?,"Yes, LangSmith supports online evaluation. It ...",,"Yes, LangSmith supports online evaluation as a...",1,1.217379,b4fd0e07-e10c-4462-bc59-1b1b24e76445,ca9b63dc-d43b-4d72-987d-d9217112f45a
6,What is LangSmith used for in three sentences?,LangSmith is a platform designed for building ...,,LangSmith is a platform designed for the devel...,1,2.074592,78e2e0eb-f8fb-4d8e-b6a5-e6e1ca31808b,ec22a99f-bac3-4bb4-9ee5-3fc3b429b6d5
7,What testing capabilities does LangSmith have?,LangSmith allows users to run multiple experim...,,LangSmith offers capabilities for creating dat...,1,0.820311,fb838522-26d9-4966-b272-b687b41c4df7,68245e7c-b0fc-4e30-8373-d67442b77144
8,Can LangSmith be used for finetuning and model...,"No, LangSmith is not designed for finetuning o...",,"Yes, LangSmith can be used for fine-tuning and...",1,0.929543,ce8a6050-8b40-4f24-b1c7-f81b3e4f23c0,97e1e9f8-5d10-4026-bd41-0032e896ffe4
9,How do I set up tracing to LangSmith if I'm us...,To set up tracing to LangSmith using LangChain...,,To set up tracing to LangSmith while using Lan...,0,2.702279,3d54ef8e-97e6-482d-97a4-2fb5171ecb1d,66c1066b-abc2-4f20-876b-015bd81bc0c1


##### Metadata

You can (and should) add metadata to your experiments, to make them easier to find in the UI

In [37]:
evaluate(
    target_function,
    data="RAG Application Golden Dataset",
    evaluators=[is_concise_enough],
    experiment_prefix="metadata added",
    metadata={  # We can pass custom metadata for the experiment, such as the model name
        "model_name": MODEL_NAME
    }
)

View the evaluation results for experiment: 'metadata added-fa6617d0' at:
https://smith.langchain.com/o/c1b67c13-bec2-4f98-8bbf-502c78449186/datasets/e15cc577-5260-4be3-a7ab-258585878ade/compare?selectedSessions=6250f336-d2b1-4451-891b-614679c5df91




0it [00:00, ?it/s]

Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,How can I trace with the @traceable decorator?,To trace with the @traceable decorator in Pyth...,,To trace with the @traceable decorator in Pyth...,1,1.859,1868b359-b3ee-4908-8655-ae9f0184bc56,dc0f03e5-cfed-46c8-9e9f-177e7ec3c15d
1,Does LangSmith support offline evaluation?,The provided context doesn't explicitly mentio...,,"Yes, LangSmith supports offline evaluation thr...",1,1.262746,5524be6a-278d-4d44-830c-49aa9a66fe4d,1f2ae6a9-c364-44a5-bbfa-b18cd5e91fdc
2,How do I create user feedback with the LangSmi...,To create user feedback with the LangSmith SDK...,,To create user feedback with the LangSmith SDK...,1,2.771505,b317d976-030d-4bdc-ad85-71bde482f46c,d6eda1ae-f1f9-472b-adbd-4e1e30bd0da6
3,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used to evaluate agents....",,"Yes, LangSmith can be used to evaluate agents....",1,2.714412,ca92d9ca-b757-4ec9-b317-33c17f60046f,976ccbf4-995a-417a-b916-fa030fb6d438
4,Is there a Javascript Langsmith SDK,"Yes, there is a JavaScript/TypeScript SDK avai...",,"Yes, there is a Javascript Langsmith SDK!",0,1.438662,cc388aab-a574-48d3-b1ca-050b4e55eca4,16531581-ee4f-4ba4-9477-afb5a8ce8993
5,What is LangSmith used for in three sentences?,LangSmith is a platform designed for building ...,,LangSmith is a platform designed for the devel...,1,1.979646,78e2e0eb-f8fb-4d8e-b6a5-e6e1ca31808b,423cbc2d-16c0-4cfa-bee7-ca65e07c6a77
6,Does LangSmith support online evaluation?,"Yes, LangSmith supports online evaluation. Onl...",,"Yes, LangSmith supports online evaluation as a...",1,2.658897,b4fd0e07-e10c-4462-bc59-1b1b24e76445,21de36e1-56ed-42b5-9631-fce2bfcc1f56
7,What testing capabilities does LangSmith have?,LangSmith allows you to run multiple experimen...,,LangSmith offers capabilities for creating dat...,1,0.971981,fb838522-26d9-4966-b272-b687b41c4df7,f72cefcc-c6f4-4781-b5d2-dd268af0b0d4
8,How do I set up tracing to LangSmith if I'm us...,To set up tracing to LangSmith using LangChain...,,To set up tracing to LangSmith while using Lan...,0,2.015861,3d54ef8e-97e6-482d-97a4-2fb5171ecb1d,204751e7-f0fe-4eeb-87a4-a1f3589e2ae3
9,How do I pass metadata in with @traceable?,"To pass metadata with `@traceable`, you can in...",,You can pass metadata with the @traceable deco...,0,4.35817,5a2f44cc-10d8-4d38-a734-3ddfea60fe24,3918d7a8-dd0d-4f51-ae9a-476d422cc896
