# Experiments

### Setup

In [2]:
# Or you can use a .env file
from dotenv import load_dotenv
load_dotenv(dotenv_path="../../.env", override=True)

True

Here is the RAG Application that we've been working with throughout this course

In [3]:
import os
import tempfile
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders.sitemap import SitemapLoader
from langchain_community.vectorstores import SKLearnVectorStore
from langchain_openai import OpenAIEmbeddings
from langsmith import traceable
from openai import OpenAI
from typing import List
import nest_asyncio

# TODO: Configure this model!
MODEL_NAME = "gpt-4o"
MODEL_PROVIDER = "openai"
APP_VERSION = 1.0
RAG_SYSTEM_PROMPT = """You are an assistant for question-answering tasks. 
Use the following pieces of retrieved context to answer the latest question in the conversation. 
If you don't know the answer, just say that you don't know. 
Use three sentences maximum and keep the answer concise.
"""

openai_client = OpenAI()

def get_vector_db_retriever():
    persist_path = os.path.join(tempfile.gettempdir(), "union.parquet")
    embd = OpenAIEmbeddings()

    # If vector store exists, then load it
    if os.path.exists(persist_path):
        vectorstore = SKLearnVectorStore(
            embedding=embd,
            persist_path=persist_path,
            serializer="parquet"
        )
        return vectorstore.as_retriever(lambda_mult=0)

    # Otherwise, index LangSmith documents and create new vector store
    ls_docs_sitemap_loader = SitemapLoader(web_path="https://docs.smith.langchain.com/sitemap.xml", continue_on_failure=True)
    ls_docs = ls_docs_sitemap_loader.load()

    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=500, chunk_overlap=0
    )
    doc_splits = text_splitter.split_documents(ls_docs)

    vectorstore = SKLearnVectorStore.from_documents(
        documents=doc_splits,
        embedding=embd,
        persist_path=persist_path,
        serializer="parquet"
    )
    vectorstore.persist()
    return vectorstore.as_retriever(lambda_mult=0)

nest_asyncio.apply()
retriever = get_vector_db_retriever()

"""
retrieve_documents
- Returns documents fetched from a vectorstore based on the user's question
"""
@traceable(run_type="chain")
def retrieve_documents(question: str):
    return retriever.invoke(question)

"""
generate_response
- Calls `call_openai` to generate a model response after formatting inputs
"""
@traceable(run_type="chain")
def generate_response(question: str, documents):
    formatted_docs = "\n\n".join(doc.page_content for doc in documents)
    messages = [
        {
            "role": "system",
            "content": RAG_SYSTEM_PROMPT
        },
        {
            "role": "user",
            "content": f"Context: {formatted_docs} \n\n Question: {question}"
        }
    ]
    return call_openai(messages)

"""
call_openai
- Returns the chat completion output from OpenAI
"""
@traceable(
    run_type="llm",
    metadata={
        "ls_provider": MODEL_PROVIDER,
        "ls_model_name": MODEL_NAME
    }
)
def call_openai(messages: List[dict]) -> str:
    return openai_client.chat.completions.create(
        model=MODEL_NAME,
        messages=messages,
    )

"""
langsmith_rag
- Calls `retrieve_documents` to fetch documents
- Calls `generate_response` to generate a response based on the fetched documents
- Returns the model response
"""
@traceable(run_type="chain")
def langsmith_rag(question: str):
    documents = retrieve_documents(question)
    response = generate_response(question, documents)
    return response.choices[0].message.content

### Experiment

Here is a code snippet that should look similar to what you see from the starter code!

There are a few important components here.

1. We have defined an Evaluator
2. We pipe our dataset examples (dict) to the shape of input that our function `langsmith_rag` takes (str) using a target function

In [5]:
from langsmith import evaluate, Client

client = Client()
dataset_name = "RAG Application Dataset"

def is_concise_enough(reference_outputs: dict, outputs: dict) -> dict:
    score = len(outputs["output"]) < 1.5 * len(reference_outputs["output"])
    return {"key": "is_concise", "score": int(score)}

def target_function(inputs: dict):
    return langsmith_rag(inputs["question"])

evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="gpt-4o"
)

View the evaluation results for experiment: 'gpt-4o-1ed94a86' at:
https://smith.langchain.com/o/feddc9d1-c3b7-40c4-a018-8da3f96e851a/datasets/071097b9-5f0b-4058-9f08-66e3e793681a/compare?selectedSessions=7ff04b61-3cc5-4c69-8f2a-351d83bbe41e




0it [00:00, ?it/s]

Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,Does LangSmith support online evaluation?,"Yes, LangSmith supports online evaluation, all...",,"Yes, LangSmith supports online evaluation as a...",1,2.859349,1b7662a9-223e-49f9-9847-d202687c5633,90ab3607-8d55-492e-b5f4-c9ae825ca06b
1,How do I pass metadata in with @traceable?,To pass metadata with the `@traceable` decorat...,,You can pass metadata with the @traceable deco...,0,3.562955,30aa0faa-c7f4-48be-918f-80b34b606b7f,680c9315-b3dc-467b-ae22-b2fd12d9e65c
2,What is LangSmith used for in three sentences?,LangSmith is a platform designed for building ...,,LangSmith is a platform designed for the devel...,1,6.098152,7b65a8c1-0ffb-4c66-8e3f-67f8292a7096,e341ca0c-0711-4677-877b-16c4d70938c4
3,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used to evaluate agents....",,"Yes, LangSmith can be used to evaluate agents....",1,3.016611,c805e821-7ed8-4348-aa93-7843de330100,3e864a8e-a214-4001-ab9e-a678b6f9c925
4,How do I create user feedback with the LangSmi...,To create user feedback using the LangSmith SD...,,To create user feedback with the LangSmith SDK...,1,2.07807,eede1c97-8a01-4fab-839c-829515b8add2,ecf85237-3351-4bf9-8910-f29070dff03d
5,How do I set up tracing to LangSmith if I'm us...,To set up tracing to LangSmith using LangChain...,,To set up tracing to LangSmith while using Lan...,1,2.653023,338cf1ca-6f01-4d99-bd8e-db73b3a96f10,76f62d1b-26a6-4184-ac8c-287814add26e
6,What testing capabilities does LangSmith have?,LangSmith offers capabilities to run multiple ...,,LangSmith offers capabilities for creating dat...,1,2.60283,381cc6a3-7c93-483b-ae7b-cf35670412bd,8576e898-e73c-46b9-8f17-a0691c9c6902
7,Can LangSmith be used for finetuning and model...,"No, LangSmith is primarily designed for observ...",,"Yes, LangSmith can be used for fine-tuning and...",1,2.164712,94bdbb61-701f-411d-b674-906753ccf264,62da4403-ac61-48a1-b369-35e30af362f2
8,Does LangSmith support offline evaluation?,The provided context does not mention support ...,,"Yes, LangSmith supports offline evaluation thr...",1,1.454102,c1dfe065-0886-441b-a07d-7952ae1295d3,39e482ae-301a-404e-babd-76a6432cbdf7
9,How can I trace with the @traceable decorator?,To trace with the @traceable decorator in Pyth...,,To trace with the @traceable decorator in Pyth...,1,3.048716,f80059d7-12d5-4e94-b495-81c5a5bf86cd,c1df855f-0fd1-4852-9978-e996b9aea319


### Modifying your Application

Now, let's change our model to gpt-35-turbo and see how it performs!

Make this change, and then run this code snippet!

In [6]:
from langsmith import evaluate, Client
from langsmith.schemas import Example, Run

def target_function(inputs: dict):
    return langsmith_rag(inputs["question"])

evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="gpt-3.5-turbo"
)

View the evaluation results for experiment: 'gpt-3.5-turbo-cdeab676' at:
https://smith.langchain.com/o/feddc9d1-c3b7-40c4-a018-8da3f96e851a/datasets/071097b9-5f0b-4058-9f08-66e3e793681a/compare?selectedSessions=f4f40a59-914b-4fd4-ad26-1addeec442d0




0it [00:00, ?it/s]

Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,Does LangSmith support online evaluation?,"Yes, LangSmith supports online evaluation. It ...",,"Yes, LangSmith supports online evaluation as a...",1,1.77827,1b7662a9-223e-49f9-9847-d202687c5633,7523ed60-7421-42f5-bd4f-2351b7142349
1,How do I pass metadata in with @traceable?,"To pass metadata in with `@traceable`, you can...",,You can pass metadata with the @traceable deco...,0,3.684816,30aa0faa-c7f4-48be-918f-80b34b606b7f,d65e6a88-89e9-448f-a29b-66fa69d8ecbc
2,What is LangSmith used for in three sentences?,LangSmith is a platform designed for building ...,,LangSmith is a platform designed for the devel...,1,1.955177,7b65a8c1-0ffb-4c66-8e3f-67f8292a7096,156a4a76-28fc-4617-889d-44ac37a6f41f
3,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used to evaluate agents....",,"Yes, LangSmith can be used to evaluate agents....",1,1.837935,c805e821-7ed8-4348-aa93-7843de330100,7d34d242-ae0c-432f-b817-739e42417992
4,How do I create user feedback with the LangSmi...,To create user feedback with the LangSmith SDK...,,To create user feedback with the LangSmith SDK...,1,1.841267,eede1c97-8a01-4fab-839c-829515b8add2,1ad96b4e-ff79-4bf4-914e-161c83b8a038
5,How do I set up tracing to LangSmith if I'm us...,"To set up tracing to LangSmith with LangChain,...",,To set up tracing to LangSmith while using Lan...,1,2.950907,338cf1ca-6f01-4d99-bd8e-db73b3a96f10,bc66bfd5-fd31-4d44-9b16-c38a40a98e8e
6,What testing capabilities does LangSmith have?,LangSmith allows you to run multiple experimen...,,LangSmith offers capabilities for creating dat...,1,3.033927,381cc6a3-7c93-483b-ae7b-cf35670412bd,e49c5599-f87f-4fd9-9965-a46ad0a2218e
7,Can LangSmith be used for finetuning and model...,LangSmith is designed for LLM observability an...,,"Yes, LangSmith can be used for fine-tuning and...",1,1.965365,94bdbb61-701f-411d-b674-906753ccf264,977e6489-83b5-446d-998a-731b03d2b353
8,Does LangSmith support offline evaluation?,The provided context does not explicitly menti...,,"Yes, LangSmith supports offline evaluation thr...",1,1.669148,c1dfe065-0886-441b-a07d-7952ae1295d3,0404781e-8d56-4213-8728-c3dda479ae0e
9,How can I trace with the @traceable decorator?,To trace with the @traceable decorator in Pyth...,,To trace with the @traceable decorator in Pyth...,1,2.136456,f80059d7-12d5-4e94-b495-81c5a5bf86cd,0688378b-7276-4d38-86bb-08ba24c8e136


### Running over Different pieces of Data

##### Dataset Version

You can execute an experiment on a specific version of a dataset in the sdk by using the `as_of` parameter in `list_examples`

Let's try running on just our initial dataset.

evaluate(
    target_function,
    data=client.list_examples(dataset_name=dataset_name, as_of="initial dataset"),   # We use as_of to specify a version
    evaluators=[is_concise_enough],
    experiment_prefix="initial dataset version"
)

##### Dataset Split

You can run an experiment on a specific split of your dataset, let's try running on the Crucial Examples split.

##### Specific Data Points

You can specify individual data points to run an experiment over as well

### Other Parameters

##### Repetitions

You can run an experiment several times to make sure you have consistent results

In [10]:
evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="two repetitions",
    num_repetitions=2   # This field defaults to 1
)

View the evaluation results for experiment: 'two repetitions-2cdb53eb' at:
https://smith.langchain.com/o/feddc9d1-c3b7-40c4-a018-8da3f96e851a/datasets/071097b9-5f0b-4058-9f08-66e3e793681a/compare?selectedSessions=e0fa3be1-a4ad-4756-bc27-dfc52f429c97




0it [00:00, ?it/s]

Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,Does LangSmith support online evaluation?,"Yes, LangSmith supports online evaluation, pro...",,"Yes, LangSmith supports online evaluation as a...",1,2.380697,1b7662a9-223e-49f9-9847-d202687c5633,9d6b5f98-9f96-4682-aba6-d32b70b6a3ca
1,How do I pass metadata in with @traceable?,To pass metadata with the `@traceable` decorat...,,You can pass metadata with the @traceable deco...,0,4.247400,30aa0faa-c7f4-48be-918f-80b34b606b7f,7b72aee6-e071-44a3-9c45-63f94fc1d36d
2,What is LangSmith used for in three sentences?,LangSmith is a platform designed for building ...,,LangSmith is a platform designed for the devel...,1,2.768388,7b65a8c1-0ffb-4c66-8e3f-67f8292a7096,c6c18da3-d6cf-4ce3-9733-1b0471199c84
3,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used to evaluate agents....",,"Yes, LangSmith can be used to evaluate agents....",1,2.043296,c805e821-7ed8-4348-aa93-7843de330100,16fcabaa-60a5-403c-985d-3c5d1d44bc1c
4,How do I create user feedback with the LangSmi...,To create user feedback using the LangSmith SD...,,To create user feedback with the LangSmith SDK...,1,2.971032,eede1c97-8a01-4fab-839c-829515b8add2,85742e76-8395-4360-9fd7-ee06466b7e85
...,...,...,...,...,...,...,...,...
59,What is LangSmith used for in three sentences?,LangSmith is a platform designed for building ...,,LangSmith is a platform designed for the devel...,1,2.170679,8409a9df-abd1-465a-8e57-87a49f7a9012,4532a3b2-0a1a-4f17-98ff-0a454280faab
60,Does LangSmith support offline evaluation?,The provided context does not mention offline ...,,"Yes, LangSmith supports offline evaluation thr...",1,1.471712,8d2d01ef-de60-468e-84e1-c7df1aa3fb77,e69a9494-469f-4c7e-85fe-a9b739f3da3c
61,How can I trace with the @traceable decorator?,To trace with the @traceable decorator in Pyth...,,To trace with the @traceable decorator in Pyth...,1,2.159068,c91778d6-37ad-4178-ad50-9531f4f5347b,a9334d2a-c5c0-4fea-a737-092e2998e5e3
62,Does LangSmith support online evaluation?,"Yes, LangSmith supports online evaluation by p...",,"Yes, LangSmith supports online evaluation as a...",0,2.120934,da49ab53-91c4-4957-aa50-c6e360ef63a4,cdd95b71-b996-44a3-b2ee-be4862423d7c


##### Concurrency
You can also kick off concurrent threads of execution to make your experiments finish faster!

In [11]:
evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="concurrency",
    max_concurrency=3,  # This defaults to None, so this is an improvement!
)

View the evaluation results for experiment: 'concurrency-7faeb534' at:
https://smith.langchain.com/o/feddc9d1-c3b7-40c4-a018-8da3f96e851a/datasets/071097b9-5f0b-4058-9f08-66e3e793681a/compare?selectedSessions=cb449a1d-6e53-432a-9f35-2e24734d73d1




0it [00:00, ?it/s]

Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,Does LangSmith support online evaluation?,"Yes, LangSmith supports online evaluation. It ...",,"Yes, LangSmith supports online evaluation as a...",1,2.147879,1b7662a9-223e-49f9-9847-d202687c5633,6c83b2ed-094e-4f04-ae91-153305bc266b
1,How do I pass metadata in with @traceable?,"To pass metadata with `@traceable`, you need t...",,You can pass metadata with the @traceable deco...,0,3.418271,30aa0faa-c7f4-48be-918f-80b34b606b7f,58b670c1-2ad4-4af0-b418-708ab01aaf13
2,What is LangSmith used for in three sentences?,LangSmith is a platform designed for building ...,,LangSmith is a platform designed for the devel...,1,3.975097,7b65a8c1-0ffb-4c66-8e3f-67f8292a7096,266f0269-1460-4d91-a087-35599284d3ae
3,How do I create user feedback with the LangSmi...,To create user feedback with the LangSmith SDK...,,To create user feedback with the LangSmith SDK...,1,1.882935,eede1c97-8a01-4fab-839c-829515b8add2,f621b037-8fbc-4d61-9a6c-fdec2284cb19
4,How do I set up tracing to LangSmith if I'm us...,To set up tracing to LangSmith using LangChain...,,To set up tracing to LangSmith while using Lan...,0,2.398068,338cf1ca-6f01-4d99-bd8e-db73b3a96f10,1726da69-acdd-48ef-bc5b-c626cfafd6db
5,What testing capabilities does LangSmith have?,LangSmith allows users to run multiple experim...,,LangSmith offers capabilities for creating dat...,1,1.906117,381cc6a3-7c93-483b-ae7b-cf35670412bd,e370084e-71ea-4252-a426-26b712a7c2ee
6,Can LangSmith be used for finetuning and model...,LangSmith is primarily designed for observabil...,,"Yes, LangSmith can be used for fine-tuning and...",1,1.813228,94bdbb61-701f-411d-b674-906753ccf264,57e05b19-bf66-4909-b46e-788a4e6c9ff2
7,Does LangSmith support offline evaluation?,The provided context does not mention support ...,,"Yes, LangSmith supports offline evaluation thr...",1,1.746143,c1dfe065-0886-441b-a07d-7952ae1295d3,d4c20b2a-6114-4cc8-b22b-7e3ebc3ef291
8,How can I trace with the @traceable decorator?,To trace with the @traceable decorator using L...,,To trace with the @traceable decorator in Pyth...,1,1.951237,f80059d7-12d5-4e94-b495-81c5a5bf86cd,547497fc-21f6-4a08-9062-107aaad9c27d
9,Is there a javascript language SDK?,"Yes, there is a JavaScript SDK for LangSmith, ...",,Yes. there is a Javascript language SDK!,0,1.300765,9fa188c8-d70f-40f6-a856-f035f761ff61,cc03e505-b8c9-434d-9573-c868bb4018b8


##### Metadata 

You can (and should) add metadata to your experiments, to make them easier to find in the UI

In [12]:
evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="metadata added",
    metadata={  # We can pass custom metadata for the experiment, such as the model name
        "model_name": MODEL_NAME
    }
)

View the evaluation results for experiment: 'metadata added-4a283565' at:
https://smith.langchain.com/o/feddc9d1-c3b7-40c4-a018-8da3f96e851a/datasets/071097b9-5f0b-4058-9f08-66e3e793681a/compare?selectedSessions=05a1faaf-8ca3-4b1e-89b2-097bb9577997




0it [00:00, ?it/s]

Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,Does LangSmith support online evaluation?,"Yes, LangSmith supports online evaluation, all...",,"Yes, LangSmith supports online evaluation as a...",1,1.732096,1b7662a9-223e-49f9-9847-d202687c5633,7050022a-699e-48a0-b792-0eadb494c11b
1,How do I pass metadata in with @traceable?,"To pass metadata in with `@traceable`, you can...",,You can pass metadata with the @traceable deco...,1,1.967885,30aa0faa-c7f4-48be-918f-80b34b606b7f,132d2a76-2f52-4334-9949-27f9939590fc
2,What is LangSmith used for in three sentences?,LangSmith is a platform designed for building ...,,LangSmith is a platform designed for the devel...,1,2.92945,7b65a8c1-0ffb-4c66-8e3f-67f8292a7096,92625b9f-f1c3-4951-9bb6-5b3ec66da08c
3,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used to evaluate agents....",,"Yes, LangSmith can be used to evaluate agents....",1,2.25257,c805e821-7ed8-4348-aa93-7843de330100,34a074f6-6627-4d42-9220-e3b1a2719e1b
4,How do I create user feedback with the LangSmi...,To create user feedback with the LangSmith SDK...,,To create user feedback with the LangSmith SDK...,1,1.71539,eede1c97-8a01-4fab-839c-829515b8add2,f3c0ec36-179e-4406-a21c-e600a8e733b7
5,How do I set up tracing to LangSmith if I'm us...,To set up tracing to LangSmith using LangChain...,,To set up tracing to LangSmith while using Lan...,0,2.267294,338cf1ca-6f01-4d99-bd8e-db73b3a96f10,319d4aee-b7e1-40da-b251-d86c7cbae322
6,What testing capabilities does LangSmith have?,LangSmith allows for running multiple experime...,,LangSmith offers capabilities for creating dat...,1,1.726352,381cc6a3-7c93-483b-ae7b-cf35670412bd,aa744da5-8996-4248-834f-d4d9c8892119
7,Can LangSmith be used for finetuning and model...,LangSmith is primarily designed for LLM observ...,,"Yes, LangSmith can be used for fine-tuning and...",1,1.78988,94bdbb61-701f-411d-b674-906753ccf264,79a0f840-35fe-4a9f-9c6f-abe8db088bdf
8,Does LangSmith support offline evaluation?,The provided context does not specify if LangS...,,"Yes, LangSmith supports offline evaluation thr...",1,2.211323,c1dfe065-0886-441b-a07d-7952ae1295d3,826321f8-c3cd-4eba-96a4-24d3f646b425
9,How can I trace with the @traceable decorator?,To trace with the @traceable decorator in Pyth...,,To trace with the @traceable decorator in Pyth...,1,2.216604,f80059d7-12d5-4e94-b495-81c5a5bf86cd,e40326fe-cee6-4ad9-92cb-5b616e7a55fd


In [21]:
from langsmith import evaluate, Client

client = Client()
dataset_id = "d10ba4e2-2a65-4dd9-a39d-88ada6c4efa3" 

examples = list(client.list_examples(dataset_id=dataset_id))
print(f"Number of examples: {len(examples)}")  

def is_concise_enough(reference_outputs: dict, outputs: dict) -> dict:
    score = len(outputs.get("output", "")) < 1.5 * len(reference_outputs.get("output", ""))
    return {"key": "is_concise", "score": int(score)}

def target_function(inputs: dict):
    question_key = "question"
    return langsmith_rag(inputs.get(question_key, "")) 

evaluate(
    target_function,
    data=examples, 
    evaluators=[is_concise_enough],
    experiment_prefix="gpt-4o"
)


Number of examples: 10
View the evaluation results for experiment: 'gpt-4o-90beaf0e' at:
https://smith.langchain.com/o/feddc9d1-c3b7-40c4-a018-8da3f96e851a/datasets/d10ba4e2-2a65-4dd9-a39d-88ada6c4efa3/compare?selectedSessions=9c3d6a4b-1bdf-47c3-b162-2eafd759519a




0it [00:00, ?it/s]

Unnamed: 0,inputs.Question,outputs.output,error,reference.Output,feedback.is_concise,execution_time,example_id,id
0,How does a rook move?,"I'm sorry, I don't understand the question. Co...",,Rooks move any number of squares vertically or...,0,1.466587,42187db1-daee-42b7-832b-17881213e49a,1335bb19-3689-4c6d-8f34-2eb9218665e8
1,What is castling in chess?,"I'm sorry, I can't answer that without more co...",,Castling is a special move where the king move...,0,1.756279,54da8971-32c1-4218-b15d-d4fa6b615020,101ba529-a440-4551-a3c8-0c751bc21b78
2,How does the knight move?,"I'm sorry, but it seems like the context provi...",,The knight moves in an L-shape: two squares in...,0,1.730716,5571973e-7305-46bf-b16c-0922b5f7c935,942cc39c-5810-4625-a5ba-020d8013d36c
3,What are the moves for a pawn?,It looks like the question is missing. Could y...,,"Pawns move forward one square, but on their fi...",0,1.35555,61bd4092-f394-419b-a243-990027a8d620,c9b23ded-8cdf-4e83-b23c-6d06c09dda65
4,How do I start a game of chess?,"I'm sorry, but it seems like there isn't enoug...",,"To start a chess game, set up the board with w...",0,1.423096,7c5375ae-90d1-4bd7-93fe-f7673665813e,285c9c8f-353e-4769-8c7b-ffd997eaf954
5,What is check and checkmate?,"I'm sorry, but I can't determine the context o...",,Check is when the king is under threat of capt...,0,2.09303,8c4bd290-8429-4e68-8105-0ff822803e82,9ae9ddb6-dd2b-4efc-b4b1-b221b0352126
6,How does the queen move?,"I'm sorry, but it seems like the question wasn...",,The queen can move any number of squares horiz...,0,1.403024,c7f6fcd8-25cd-4f77-a905-7db937685e82,5e9626f2-8362-44f1-8c5a-f83e52803df6
7,How do bishops move?,"I'm sorry, the context provided doesn't contai...",,Bishops move any number of squares diagonally ...,0,1.347886,cb4cf025-b4d4-4b53-8cfe-c444df6b033f,ac833979-81f8-4929-a4e0-657080d55535
8,What are the rules for en passant?,It seems like the context provided is incomple...,,En passant allows a pawn to capture an opponen...,0,1.513717,e4878487-87dd-468c-9215-c0261668eec1,072f57bb-9807-4c22-b0c0-0084682c38ba
9,How does a game of chess end?,"I'm sorry, but there is no question provided. ...",,"A game of chess ends with checkmate, stalemate...",0,1.249015,f750bd8a-ff9c-4f76-8c71-8f104875fd89,c2b5dfc5-2f4e-42c4-82b7-f143f3eae95d


In [22]:
evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="metadata added",
    metadata={ 
        "model_name": MODEL_NAME
    }
)

View the evaluation results for experiment: 'metadata added-ebc949bc' at:
https://smith.langchain.com/o/feddc9d1-c3b7-40c4-a018-8da3f96e851a/datasets/d10ba4e2-2a65-4dd9-a39d-88ada6c4efa3/compare?selectedSessions=13f32be5-14e1-4eed-9b3b-da3af9e44446




0it [00:00, ?it/s]

Unnamed: 0,inputs.Question,outputs.output,error,reference.Output,feedback.is_concise,execution_time,example_id,id
0,How does a rook move?,"I'm sorry, but I don't have enough information...",,Rooks move any number of squares vertically or...,0,1.405203,42187db1-daee-42b7-832b-17881213e49a,031424e4-5c83-4997-8a41-8705c7369f4d
1,What is castling in chess?,"I'm sorry, but I need more information to answ...",,Castling is a special move where the king move...,0,1.528428,54da8971-32c1-4218-b15d-d4fa6b615020,55b66fa2-dcd7-48c7-83c9-71fcf0fd6d4f
2,How does the knight move?,"I'm sorry, but the provided context does not c...",,The knight moves in an L-shape: two squares in...,0,1.730738,5571973e-7305-46bf-b16c-0922b5f7c935,a121465c-a5f9-42cb-87c6-1892d08aab97
3,What are the moves for a pawn?,"I'm sorry, but it seems like the context provi...",,"Pawns move forward one square, but on their fi...",0,1.710924,61bd4092-f394-419b-a243-990027a8d620,1ddf2179-6452-4211-9d89-96941d043ae0
4,How do I start a game of chess?,I don't understand the question. Could you ple...,,"To start a chess game, set up the board with w...",0,1.347854,7c5375ae-90d1-4bd7-93fe-f7673665813e,6c2e0cf2-ff22-47aa-ba7d-d3d35374f130
5,What is check and checkmate?,"I'm sorry, but I don't have enough context to ...",,Check is when the king is under threat of capt...,0,2.629212,8c4bd290-8429-4e68-8105-0ff822803e82,5a4cf68b-73d5-4bbf-9ede-7d2f5df6bd0d
6,How does the queen move?,"I'm sorry, but I can't answer that as it seems...",,The queen can move any number of squares horiz...,0,1.605837,c7f6fcd8-25cd-4f77-a905-7db937685e82,4a694cf7-9c10-4046-a2d5-1f6a53d8842e
7,How do bishops move?,"I'm sorry, but I don't have enough information...",,Bishops move any number of squares diagonally ...,0,1.316433,cb4cf025-b4d4-4b53-8cfe-c444df6b033f,d38e112a-1685-4986-8a70-dff0d7885122
8,What are the rules for en passant?,"I'm sorry, but the context provided doesn't co...",,En passant allows a pawn to capture an opponen...,0,3.49756,e4878487-87dd-468c-9215-c0261668eec1,80ac5832-602d-402c-8389-1f49e24c71cb
9,How does a game of chess end?,"I'm sorry, the provided context does not conta...",,"A game of chess ends with checkmate, stalemate...",0,1.432524,f750bd8a-ff9c-4f76-8c71-8f104875fd89,f5e8adbd-c2db-465b-b90d-16ff3eabb978
