# Experiments

### Setup

In [None]:
# You can set them inline
import os
os.environ["OPENAI_API_KEY"] = ""
os.environ["LANGSMITH_API_KEY"] = ""
os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_PROJECT"] = "langsmith-academy"

In [1]:
# Or you can use a .env file
import os
from dotenv import load_dotenv
load_dotenv(dotenv_path="../../../.env", override=True)
os.environ["USER_AGENT"] = "496"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [None]:
import warnings
import numpy as np

# Suppress sklearn warnings
warnings.filterwarnings('ignore', category=RuntimeWarning, module='sklearn')
np.seterr(divide='ignore', invalid='ignore', over='ignore')

Here is the RAG Application that we've been working with throughout this course

In [2]:
# Cell 3: RAG Application with Anthropic + HuggingFace
import os
import tempfile
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders.sitemap import SitemapLoader
from langchain_community.vectorstores import SKLearnVectorStore
from langchain_huggingface import HuggingFaceEmbeddings
from langsmith import traceable
from anthropic import Anthropic
from typing import List
import nest_asyncio

# TODO: Configure this model!
MODEL_NAME = "claude-sonnet-4-5-20250929"
MODEL_PROVIDER = "anthropic"
APP_VERSION = 1.0
RAG_SYSTEM_PROMPT = """You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the latest question in the conversation.
If you don't know the answer, just say that you don't know.
Use three sentences maximum and keep the answer concise.
"""

anthropic_client = Anthropic()

def get_vector_db_retriever():
    persist_path = os.path.join(tempfile.gettempdir(), "union.parquet")

    # Use HuggingFace embeddings
    embd = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        model_kwargs={'device': 'cpu'},
        encode_kwargs={'normalize_embeddings': True}
    )

    # If vector store exists, then load it
    if os.path.exists(persist_path):
        vectorstore = SKLearnVectorStore(
            embedding=embd,
            persist_path=persist_path,
            serializer="parquet"
        )
        return vectorstore.as_retriever(lambda_mult=0)

    # Otherwise, index LangSmith documents and create new vector store
    ls_docs_sitemap_loader = SitemapLoader(
        web_path="https://docs.smith.langchain.com/sitemap.xml",
        continue_on_failure=True
    )
    ls_docs = ls_docs_sitemap_loader.load()

    # Use character-based splitter (no tiktoken)
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=50,
        length_function=len
    )
    doc_splits = text_splitter.split_documents(ls_docs)

    vectorstore = SKLearnVectorStore.from_documents(
        documents=doc_splits,
        embedding=embd,
        persist_path=persist_path,
        serializer="parquet"
    )
    vectorstore.persist()
    return vectorstore.as_retriever(lambda_mult=0)

nest_asyncio.apply()
retriever = get_vector_db_retriever()

"""
retrieve_documents
- Returns documents fetched from a vectorstore based on the user's question
"""
@traceable(run_type="chain")
def retrieve_documents(question: str):
    return retriever.invoke(question)

"""
generate_response
- Calls `call_anthropic` to generate a model response after formatting inputs
"""
@traceable(run_type="chain")
def generate_response(question: str, documents):
    formatted_docs = "\n\n".join(doc.page_content for doc in documents)
    user_message = f"Context: {formatted_docs} \n\n Question: {question}"
    return call_anthropic(user_message)

"""
call_anthropic
- Returns the chat completion output from Anthropic
"""
@traceable(
    run_type="llm",
    metadata={
        "ls_provider": MODEL_PROVIDER,
        "ls_model_name": MODEL_NAME
    }
)
def call_anthropic(user_message: str):
    return anthropic_client.messages.create(
        model=MODEL_NAME,
        max_tokens=1024,
        system=RAG_SYSTEM_PROMPT,
        messages=[
            {
                "role": "user",
                "content": user_message
            }
        ]
    )

"""
langsmith_rag
- Calls `retrieve_documents` to fetch documents
- Calls `generate_response` to generate a response based on the fetched documents
- Returns the model response
"""
@traceable(run_type="chain")
def langsmith_rag(question: str):
    documents = retrieve_documents(question)
    response = generate_response(question, documents)
    return response.content[0].text

Fetching pages: 100%|##########| 197/197 [00:34<00:00,  5.74it/s]


### Experiment

Here is a code snippet that should look similar to what you see from the starter code!

There are a few important components here.

1. We have defined an Evaluator
2. We pipe our dataset examples (dict) to the shape of input that our function `langsmith_rag` takes (str) using a target function

In [3]:
# Cell 4: Basic Experiment
from langsmith import evaluate, Client

client = Client()
dataset_name = "RAG Application Golden Dataset"

def is_concise_enough(reference_outputs: dict, outputs: dict) -> dict:
    score = len(outputs["output"]) < 1.5 * len(reference_outputs["output"])
    return {"key": "is_concise", "score": int(score)}

def target_function(inputs: dict):
    return langsmith_rag(inputs["question"])

evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="claude-sonnet-4.5"
)

View the evaluation results for experiment: 'claude-sonnet-4.5-5582f62e' at:
https://smith.langchain.com/o/072e35aa-3a5b-404d-bd5a-459a19c5e651/datasets/978d5e03-9c8e-462b-b597-6007e8da6830/compare?selectedSessions=70fc2101-5887-4427-a64e-60dbca2ce560




0it [00:00, ?it/s]

  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b


Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,Is there a Javascript Langsmith SDK?,"Based on the provided context, I cannot answer...",,Yes there is a Javascript Langsmith SDK.,0,3.660649,dceb4c0c-68d6-4c31-9eea-3aef3d9c0e03,883fd1b9-e09e-4ad0-8d6e-f205ccc107bc
1,How can I trace with the @traceable decorator?,"Based on the context provided, to trace with t...",,To trace with the @traceable decorator in Pyth...,1,4.318079,20f7319c-7162-4314-ae6b-b754396424ba,8213345c-e652-42cf-b67d-38e934c6a45a
2,Can LangSmith be used for finetuning and model...,"Based on the provided context, I cannot determ...",,"Yes, LangSmith can be used for fine-tuning and...",1,3.767415,3a7d30d7-f443-4b35-9a37-57b3fd2c420f,969e0c48-26e6-4443-b89f-af884d033307
3,How do I set up tracing to LangSmith if I'm us...,"To set up tracing to LangSmith with LangChain,...",,To set up tracing to LangSmith while using Lan...,0,4.133823,3c7db24e-be8c-4ef2-b0c6-3c04e65e9633,ce2b0061-04bb-49b7-98ef-4a77da916bab
4,What testing capabilities does LangSmith have?,"Based on the context provided, LangSmith has t...",,LangSmith offers capabilities for creating dat...,1,3.790613,81997b32-c3e2-40de-b34d-ccba00d20004,acdbbfb6-7a45-430f-913d-d04a5019bcc5
5,What is LangSmith used for in three sentences?,"Based on the provided context, LangSmith is us...",,LangSmith is a platform designed for the devel...,1,3.633567,81c80399-456a-4b5c-b1b1-871f6f0750ab,3a664130-8421-4101-8364-01d94e396735
6,How do I create user feedback with the LangSmi...,"Based on the provided context, I can see that ...",,To create user feedback with the LangSmith SDK...,1,4.923594,89361799-0906-4cb3-85b0-24737c8450c0,93a2740b-4f35-48f4-88b0-112fa55f8c7c
7,How do I pass metadata in with @traceable?,"To pass metadata with `@traceable`, you need t...",,You can pass metadata with the @traceable deco...,1,4.105158,940d7c8a-01f9-490c-ba8a-e567e0906f5d,8328f429-3608-4b20-a9a3-5c029dfc9911
8,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used to evaluate agents....",,"Yes, LangSmith can be used to evaluate agents....",1,4.262214,a19131f2-ec0e-405b-8801-0ce7e556cbe6,593dcf3b-23fa-4e24-8437-466366dea4a1
9,Does LangSmith support offline evaluation?,"Yes, LangSmith supports offline evaluation. Yo...",,"Yes, LangSmith supports offline evaluation thr...",1,3.020006,c43a48b8-dbe1-4f31-b9cb-91443bf47847,d1250fb2-9f1c-414d-8fcf-bfa02a825617


### Modifying your Application

Now, let's change our model to gpt-35-turbo and see how it performs!

Make this change, and then run this code snippet!

In [4]:
# Cell 5: Test with Different Model
# Change MODEL_NAME to "claude-3-5-sonnet-20241022" or another model, then run:

from langsmith import evaluate, Client
from langsmith.schemas import Example, Run

def target_function(inputs: dict):
    return langsmith_rag(inputs["question"])

evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="claude-3.5-sonnet"  # Use descriptive prefix for comparison
)

View the evaluation results for experiment: 'claude-3.5-sonnet-70bbda08' at:
https://smith.langchain.com/o/072e35aa-3a5b-404d-bd5a-459a19c5e651/datasets/978d5e03-9c8e-462b-b597-6007e8da6830/compare?selectedSessions=21272554-b5e9-4861-86da-8fc39f2d6378




0it [00:00, ?it/s]

  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b


Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,Is there a Javascript Langsmith SDK?,"Based on the provided context, I cannot find a...",,Yes there is a Javascript Langsmith SDK.,0,3.493915,dceb4c0c-68d6-4c31-9eea-3aef3d9c0e03,5e358907-cb25-461e-9c95-062ef52ba3f8
1,How can I trace with the @traceable decorator?,"Based on the provided context, here's how to t...",,To trace with the @traceable decorator in Pyth...,1,4.533391,20f7319c-7162-4314-ae6b-b754396424ba,8d637387-e197-4828-a0a2-e81a5029c614
2,Can LangSmith be used for finetuning and model...,"Based on the provided context, I don't have en...",,"Yes, LangSmith can be used for fine-tuning and...",1,3.536941,3a7d30d7-f443-4b35-9a37-57b3fd2c420f,5dcf109d-4d9c-4bae-abbc-9aa3d2985232
3,How do I set up tracing to LangSmith if I'm us...,"To set up tracing to LangSmith with LangChain,...",,To set up tracing to LangSmith while using Lan...,0,3.904815,3c7db24e-be8c-4ef2-b0c6-3c04e65e9633,1b7f3549-7b21-41be-a9b3-58ce79f35915
4,What testing capabilities does LangSmith have?,"Based on the context provided, LangSmith offer...",,LangSmith offers capabilities for creating dat...,1,4.547139,81997b32-c3e2-40de-b34d-ccba00d20004,11c5ed92-10fb-4200-8951-a149673108f5
5,What is LangSmith used for in three sentences?,"Based on the provided context, LangSmith is us...",,LangSmith is a platform designed for the devel...,1,3.305028,81c80399-456a-4b5c-b1b1-871f6f0750ab,fdc92707-d4a8-47b3-bd18-ffa76ea94034
6,How do I create user feedback with the LangSmi...,"Based on the provided context, I can see that ...",,To create user feedback with the LangSmith SDK...,1,4.252797,89361799-0906-4cb3-85b0-24737c8450c0,738a0d60-61f3-43c4-9cd1-3930e90dbacd
7,How do I pass metadata in with @traceable?,To pass metadata with the `@traceable` decorat...,,You can pass metadata with the @traceable deco...,1,4.152435,940d7c8a-01f9-490c-ba8a-e567e0906f5d,9ae289c7-e78c-444e-a58a-9191bc28d805
8,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used to evaluate agents....",,"Yes, LangSmith can be used to evaluate agents....",1,3.895166,a19131f2-ec0e-405b-8801-0ce7e556cbe6,ec163d19-06b5-4a65-8e75-cf851da30eb4
9,Does LangSmith support offline evaluation?,"Yes, LangSmith supports offline evaluations. Y...",,"Yes, LangSmith supports offline evaluation thr...",1,2.984067,c43a48b8-dbe1-4f31-b9cb-91443bf47847,b96412a2-74c9-4c54-b37e-5033b9258452


### Running over Different pieces of Data

##### Dataset Version

You can execute an experiment on a specific version of a dataset in the sdk by using the `as_of` parameter in `list_examples`

Let's try running on just our initial dataset.

In [8]:
# Check available dataset versions
dataset = client.read_dataset(dataset_name=dataset_name)
print(f"Dataset ID: {dataset.id}")
print(f"Dataset Name: {dataset.name}")

# List all examples to see if any exist
examples = list(client.list_examples(dataset_name=dataset_name))
print(f"\nTotal examples in dataset: {len(examples)}")

# Check if there are any tagged versions
# Note: You typically create versions via the UI or by tagging

Dataset ID: 978d5e03-9c8e-462b-b597-6007e8da6830
Dataset Name: RAG Application Golden Dataset

Total examples in dataset: 11


In [10]:
# Cell 6: Run on Specific Dataset Version
evaluate(
    target_function,
    data=client.list_examples(dataset_name=dataset_name, as_of="initial dataset"),   # We use as_of to specify a version
    evaluators=[is_concise_enough],
    experiment_prefix="initial dataset version"
)

View the evaluation results for experiment: 'initial dataset version-0f4a3c78' at:
https://smith.langchain.com/o/072e35aa-3a5b-404d-bd5a-459a19c5e651/datasets/978d5e03-9c8e-462b-b597-6007e8da6830/compare?selectedSessions=748ebe9b-d29b-49f2-8369-43031377e0f1




0it [00:00, ?it/s]

  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b


Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,Is there a Javascript Langsmith SDK?,"Based on the provided context, I cannot find a...",,Yes there is a Javascript Langsmith SDK.,0,3.790936,dceb4c0c-68d6-4c31-9eea-3aef3d9c0e03,1359254f-a49a-4763-9969-7614cb317b4f
1,How can I trace with the @traceable decorator?,"Based on the provided context, to trace with t...",,To trace with the @traceable decorator in Pyth...,1,4.627904,20f7319c-7162-4314-ae6b-b754396424ba,4cb88a0e-3ba7-40d2-8767-ef99ca792726
2,Can LangSmith be used for finetuning and model...,"Based on the provided context, I cannot answer...",,"Yes, LangSmith can be used for fine-tuning and...",1,3.578209,3a7d30d7-f443-4b35-9a37-57b3fd2c420f,e4fae625-fe99-462c-9ce1-3fd2d91c4f88
3,How do I set up tracing to LangSmith if I'm us...,To set up tracing to LangSmith with LangChain:...,,To set up tracing to LangSmith while using Lan...,0,3.765477,3c7db24e-be8c-4ef2-b0c6-3c04e65e9633,1a291190-002a-4d87-b63d-36a40a75e16e
4,What testing capabilities does LangSmith have?,LangSmith provides comprehensive testing capab...,,LangSmith offers capabilities for creating dat...,1,3.274475,81997b32-c3e2-40de-b34d-ccba00d20004,89cf5c82-1aaa-4eef-a8c8-a3d2ddc1ed96
5,What is LangSmith used for in three sentences?,"Based on the provided context, LangSmith is us...",,LangSmith is a platform designed for the devel...,1,4.164064,81c80399-456a-4b5c-b1b1-871f6f0750ab,abd2dc8b-9763-45a3-953a-b50437332bc5
6,How do I create user feedback with the LangSmi...,"Based on the provided context, I can see that ...",,To create user feedback with the LangSmith SDK...,1,4.563898,89361799-0906-4cb3-85b0-24737c8450c0,b542af27-a74d-4e74-aebc-f9ef6b862afe
7,How do I pass metadata in with @traceable?,"To pass metadata with `@traceable`, you need t...",,You can pass metadata with the @traceable deco...,1,4.408387,940d7c8a-01f9-490c-ba8a-e567e0906f5d,8e87ac0b-4bcb-4e38-a064-a26cb332f315
8,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used to evaluate agents....",,"Yes, LangSmith can be used to evaluate agents....",1,3.513992,a19131f2-ec0e-405b-8801-0ce7e556cbe6,1e2e0154-5b24-4a3e-bd61-f6ea6a81a8c7
9,Does LangSmith support offline evaluation?,"Yes, LangSmith supports offline evaluations. Y...",,"Yes, LangSmith supports offline evaluation thr...",1,2.945531,c43a48b8-dbe1-4f31-b9cb-91443bf47847,60b9545f-8554-4c66-b8db-8a5ac654d17f


##### Dataset Split

You can run an experiment on a specific split of your dataset, let's try running on the Crucial Examples split.

In [12]:
# Cell 7: Run on Dataset Split
evaluate(
    target_function,
    data=client.list_examples(dataset_name=dataset_name, splits=["Crucial Examples"]),  # We pass in a list of Splits
    evaluators=[is_concise_enough],
    experiment_prefix="Crucial Examples split"
)

View the evaluation results for experiment: 'Crucial Examples split-656f1005' at:
https://smith.langchain.com/o/072e35aa-3a5b-404d-bd5a-459a19c5e651/datasets/978d5e03-9c8e-462b-b597-6007e8da6830/compare?selectedSessions=efae74b9-d792-4ff1-92db-f633d6d8a932




0it [00:00, ?it/s]

  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b


Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,How do I set up tracing to LangSmith if I'm us...,"To set up tracing to LangSmith with LangChain,...",,To set up tracing to LangSmith while using Lan...,0,4.004058,3c7db24e-be8c-4ef2-b0c6-3c04e65e9633,3087cb85-b378-47a7-9790-facd88e83dd0
1,What testing capabilities does LangSmith have?,"Based on the context provided, LangSmith has s...",,LangSmith offers capabilities for creating dat...,0,4.205212,81997b32-c3e2-40de-b34d-ccba00d20004,3aedcbd8-87b8-487e-a639-66f9b35a133a
2,What is LangSmith used for in three sentences?,"Based on the provided context, LangSmith is us...",,LangSmith is a platform designed for the devel...,1,3.611986,81c80399-456a-4b5c-b1b1-871f6f0750ab,f4d742bb-1fb3-42e7-bc21-667dbaef40fb
3,How do I create user feedback with the LangSmi...,"Based on the provided context, I can see that ...",,To create user feedback with the LangSmith SDK...,1,4.079788,89361799-0906-4cb3-85b0-24737c8450c0,68686895-117c-40f4-a134-9a207637f07c
4,How do I pass metadata in with @traceable?,"To pass metadata with `@traceable`, you need t...",,You can pass metadata with the @traceable deco...,1,6.025403,940d7c8a-01f9-490c-ba8a-e567e0906f5d,5de9c4cf-a6a4-43e5-ba1d-025ba806d755
5,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used to evaluate agents....",,"Yes, LangSmith can be used to evaluate agents....",1,3.797636,a19131f2-ec0e-405b-8801-0ce7e556cbe6,e88bd6b5-ce01-4bac-aaf3-a25a10a6a0d2
6,Does LangSmith support offline evaluation?,"Yes, LangSmith supports offline evaluations. Y...",,"Yes, LangSmith supports offline evaluation thr...",1,3.394723,c43a48b8-dbe1-4f31-b9cb-91443bf47847,f027116e-68cf-448b-9155-5f859d0dced6
7,Does LangSmith support online evaluation?,"Yes, LangSmith supports online evaluation. Onl...",,"Yes, LangSmith supports online evaluation as a...",1,3.380342,f5b22f4b-564f-42dd-bff8-3f712fceaa4c,5aa75105-a7a8-4f2f-9c7a-aad9c0defdb0


##### Specific Data Points

You can specify individual data points to run an experiment over as well

In [13]:
# Cell 8: Run on Specific Data Points
evaluate(
    target_function,
    data=client.list_examples(
        dataset_name=dataset_name, 
        example_ids=[
            "dceb4c0c-68d6-4c31-9eea-3aef3d9c0e03",
            "81c80399-456a-4b5c-b1b1-871f6f0750ab",
            "940d7c8a-01f9-490c-ba8a-e567e0906f5d"
        ]
    ),
    evaluators=[is_concise_enough],
    experiment_prefix="three-specific-examples"
)

View the evaluation results for experiment: 'three-specific-examples-6eef5925' at:
https://smith.langchain.com/o/072e35aa-3a5b-404d-bd5a-459a19c5e651/datasets/978d5e03-9c8e-462b-b597-6007e8da6830/compare?selectedSessions=12bf258f-affe-4759-948b-58d677e2191c




0it [00:00, ?it/s]

  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b


Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,What is LangSmith used for in three sentences?,"Based on the provided context, LangSmith is us...",,LangSmith is a platform designed for the devel...,1,3.918385,81c80399-456a-4b5c-b1b1-871f6f0750ab,0805aa8a-2eeb-49fe-a728-d10daa3ae641
1,How do I pass metadata in with @traceable?,"To pass metadata with `@traceable`, you need t...",,You can pass metadata with the @traceable deco...,1,4.069815,940d7c8a-01f9-490c-ba8a-e567e0906f5d,13a6d2f5-7000-41fc-9ec9-940e2f2e9d46
2,Is there a Javascript Langsmith SDK?,I don't know based on the provided context. Th...,,Yes there is a Javascript Langsmith SDK.,0,3.370113,dceb4c0c-68d6-4c31-9eea-3aef3d9c0e03,bb1f7ec9-d715-41d4-8216-835c1318020e


### Other Parameters

##### Repetitions

You can run an experiment several times to make sure you have consistent results

In [14]:
# Cell 9: Run with Repetitions (for consistency checking)
evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="two repetitions",
    num_repetitions=2   # This field defaults to 1
)

View the evaluation results for experiment: 'two repetitions-2593f033' at:
https://smith.langchain.com/o/072e35aa-3a5b-404d-bd5a-459a19c5e651/datasets/978d5e03-9c8e-462b-b597-6007e8da6830/compare?selectedSessions=3255a987-2282-4d5e-b8e8-0fc2159b70d1




0it [00:00, ?it/s]

  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b


Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,How do I set up tracing to LangSmith if I'm us...,"To set up tracing to LangSmith with LangChain,...",,To set up tracing to LangSmith while using Lan...,1,3.700712,3c7db24e-be8c-4ef2-b0c6-3c04e65e9633,c6ae90db-d591-4898-a1f5-22a8930388af
1,What testing capabilities does LangSmith have?,"Based on the context provided, LangSmith has s...",,LangSmith offers capabilities for creating dat...,1,4.572244,81997b32-c3e2-40de-b34d-ccba00d20004,862482aa-6680-4b5c-a008-bc116f449eb4
2,What is LangSmith used for in three sentences?,"Based on the provided context, LangSmith is us...",,LangSmith is a platform designed for the devel...,1,4.336213,81c80399-456a-4b5c-b1b1-871f6f0750ab,b22d4c69-db32-4358-9f5c-2473d85846bd
3,How do I create user feedback with the LangSmi...,"Based on the provided context, I can see that ...",,To create user feedback with the LangSmith SDK...,1,4.240655,89361799-0906-4cb3-85b0-24737c8450c0,8e2ef60a-5f18-4aa9-9b37-49f6da2c1d35
4,How do I pass metadata in with @traceable?,"To pass metadata with `@traceable`, you need t...",,You can pass metadata with the @traceable deco...,1,4.236536,940d7c8a-01f9-490c-ba8a-e567e0906f5d,faee4f47-90b9-47ce-a324-4128211acef6
5,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used to evaluate agents....",,"Yes, LangSmith can be used to evaluate agents....",1,3.694143,a19131f2-ec0e-405b-8801-0ce7e556cbe6,ce3e772d-dfc1-4d59-b56a-e4e3fc2b602e
6,Does LangSmith support offline evaluation?,"Yes, LangSmith supports offline evaluations. Y...",,"Yes, LangSmith supports offline evaluation thr...",1,2.987214,c43a48b8-dbe1-4f31-b9cb-91443bf47847,5308388e-59ff-44e4-aea7-5aa16eb90ace
7,Does LangSmith support online evaluation?,"Yes, LangSmith does support online evaluation....",,"Yes, LangSmith supports online evaluation as a...",1,2.935647,f5b22f4b-564f-42dd-bff8-3f712fceaa4c,9a63fd3e-8152-4b74-9368-6b8805ae390e
8,Is there a Javascript Langsmith SDK?,I don't have information about a JavaScript La...,,Yes there is a Javascript Langsmith SDK.,0,3.611916,dceb4c0c-68d6-4c31-9eea-3aef3d9c0e03,ebcdc443-4b3d-4a90-9b4a-a75d9aef17db
9,How can I trace with the @traceable decorator?,To trace with the @traceable decorator in Lang...,,To trace with the @traceable decorator in Pyth...,1,4.883144,20f7319c-7162-4314-ae6b-b754396424ba,9dd1e33f-3174-451f-906f-e550af4f1d61


##### Concurrency
You can also kick off concurrent threads of execution to make your experiments finish faster!

In [15]:
# Cell 10: Run with Concurrency (faster execution)
evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="concurrency",
    max_concurrency=3,  # This defaults to None, so this is an improvement!
)

View the evaluation results for experiment: 'concurrency-891426da' at:
https://smith.langchain.com/o/072e35aa-3a5b-404d-bd5a-459a19c5e651/datasets/978d5e03-9c8e-462b-b597-6007e8da6830/compare?selectedSessions=4fb5181d-e3dd-441f-832e-ef0dfbaa9994




0it [00:00, ?it/s]

  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b


Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,What is LangSmith used for in three sentences?,"Based on the provided context, LangSmith is us...",,LangSmith is a platform designed for the devel...,1,4.006184,81c80399-456a-4b5c-b1b1-871f6f0750ab,e9f593c1-e514-4589-80b2-406b6a1423dd
1,How do I set up tracing to LangSmith if I'm us...,"To set up tracing to LangSmith with LangChain,...",,To set up tracing to LangSmith while using Lan...,0,4.299176,3c7db24e-be8c-4ef2-b0c6-3c04e65e9633,7c9e22c8-8390-4023-9813-b48f6f7520e4
2,What testing capabilities does LangSmith have?,"Based on the context provided, LangSmith has t...",,LangSmith offers capabilities for creating dat...,1,4.405084,81997b32-c3e2-40de-b34d-ccba00d20004,2d7b916a-1263-4631-8746-d538b1cd15c6
3,How do I pass metadata in with @traceable?,"To pass metadata with `@traceable`, you need t...",,You can pass metadata with the @traceable deco...,1,3.909258,940d7c8a-01f9-490c-ba8a-e567e0906f5d,0e324698-1490-46aa-b992-b9240503ddba
4,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used to evaluate agents....",,"Yes, LangSmith can be used to evaluate agents....",1,3.937462,a19131f2-ec0e-405b-8801-0ce7e556cbe6,a4c6c1fa-2e36-4fba-b49b-459451705d4c
5,How do I create user feedback with the LangSmi...,"Based on the provided context, I can see that ...",,To create user feedback with the LangSmith SDK...,1,4.730778,89361799-0906-4cb3-85b0-24737c8450c0,5637e8c1-a9e7-4dd1-a959-e58841ff3cdb
6,Does LangSmith support offline evaluation?,"Yes, LangSmith supports offline evaluation. Yo...",,"Yes, LangSmith supports offline evaluation thr...",1,2.767025,c43a48b8-dbe1-4f31-b9cb-91443bf47847,e746fd98-0190-4ef3-a872-101c03cf03bc
7,Does LangSmith support online evaluation?,"Yes, LangSmith supports online evaluation. Onl...",,"Yes, LangSmith supports online evaluation as a...",1,3.138817,f5b22f4b-564f-42dd-bff8-3f712fceaa4c,0d99c471-fb3c-4368-9e0c-c10fa56d99bf
8,Is there a Javascript Langsmith SDK?,"Based on the provided context, I don't have in...",,Yes there is a Javascript Langsmith SDK.,0,3.702554,dceb4c0c-68d6-4c31-9eea-3aef3d9c0e03,6ff81db2-da08-4b4e-b4b2-7d2cc3ebe25f
9,Can LangSmith be used for finetuning and model...,"Based on the provided context, there is no inf...",,"Yes, LangSmith can be used for fine-tuning and...",1,3.322447,3a7d30d7-f443-4b35-9a37-57b3fd2c420f,1986d305-6882-4dd7-bed7-10e77b5d9cfc


##### Metadata 

You can (and should) add metadata to your experiments, to make them easier to find in the UI

In [16]:
# Cell 11: Add Metadata to Experiments
evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="metadata added",
    metadata={  # We can pass custom metadata for the experiment, such as the model name
        "model_name": MODEL_NAME,
        "provider": MODEL_PROVIDER,
        "embeddings": "huggingface-all-MiniLM-L6-v2"
    }
)

View the evaluation results for experiment: 'metadata added-7ee82b91' at:
https://smith.langchain.com/o/072e35aa-3a5b-404d-bd5a-459a19c5e651/datasets/978d5e03-9c8e-462b-b597-6007e8da6830/compare?selectedSessions=594c2208-a2da-4c7a-9aa5-d1499b4a3dff




0it [00:00, ?it/s]

  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b


Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,How do I set up tracing to LangSmith if I'm us...,"To set up tracing to LangSmith with LangChain,...",,To set up tracing to LangSmith while using Lan...,0,4.624571,3c7db24e-be8c-4ef2-b0c6-3c04e65e9633,c563acad-3e91-4075-9ff0-bcb148c851e5
1,What testing capabilities does LangSmith have?,"Based on the context provided, LangSmith offer...",,LangSmith offers capabilities for creating dat...,1,4.465655,81997b32-c3e2-40de-b34d-ccba00d20004,bf2b1496-b03f-4685-8d6f-078788ca95dd
2,What is LangSmith used for in three sentences?,"Based on the provided context, LangSmith is us...",,LangSmith is a platform designed for the devel...,1,3.177047,81c80399-456a-4b5c-b1b1-871f6f0750ab,a53c2352-8dc8-4373-9d49-22ab59b56e98
3,How do I create user feedback with the LangSmi...,"Based on the provided context, I can see that ...",,To create user feedback with the LangSmith SDK...,1,3.757101,89361799-0906-4cb3-85b0-24737c8450c0,2b917490-688a-4b19-8a8a-7e3f421ee121
4,How do I pass metadata in with @traceable?,"To pass metadata with `@traceable`, you need t...",,You can pass metadata with the @traceable deco...,1,3.852299,940d7c8a-01f9-490c-ba8a-e567e0906f5d,2c1ba4a5-7723-483a-89dd-e73de9c2a656
5,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used to evaluate agents....",,"Yes, LangSmith can be used to evaluate agents....",1,3.723285,a19131f2-ec0e-405b-8801-0ce7e556cbe6,9f3c0746-de5d-42f8-b98a-8c1f9e49ef3f
6,Does LangSmith support offline evaluation?,"Yes, LangSmith supports offline evaluation. Yo...",,"Yes, LangSmith supports offline evaluation thr...",1,3.561445,c43a48b8-dbe1-4f31-b9cb-91443bf47847,cd51e9eb-a43a-4c2b-89c6-54f0f4c469e6
7,Does LangSmith support online evaluation?,"Yes, LangSmith does support online evaluation....",,"Yes, LangSmith supports online evaluation as a...",0,3.159787,f5b22f4b-564f-42dd-bff8-3f712fceaa4c,7d62c852-c3af-4138-988a-058ab321d45a
8,Is there a Javascript Langsmith SDK?,"Based on the provided context, I don't have in...",,Yes there is a Javascript Langsmith SDK.,0,3.64951,dceb4c0c-68d6-4c31-9eea-3aef3d9c0e03,e6b3a647-7e66-4a4a-a9a5-10db100ea0f1
9,How can I trace with the @traceable decorator?,"Based on the provided context, to trace with t...",,To trace with the @traceable decorator in Pyth...,1,4.770239,20f7319c-7162-4314-ae6b-b754396424ba,02f3f6b7-8469-46a2-b830-9884694bc093
