In [1]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
openai_api_key = os.environ["OPENAI_API_KEY"]

## LangSmith

*Log In at https://smith.langchain.com*

In [2]:
# LANGCHAIN_TRACING_V2=true
# LANGCHAIN_ENDPOINT=https://api.smith.langchain.com
# LANGCHAIN_API_KEY=<your-api-key>
# LANGCHAIN_PROJECT=<your-project>  # if not specified, defaults to "default"

In [3]:
#!pip install langsmith

In [4]:
from langchain_openai import ChatOpenAI
from langchain.callbacks.tracers import LangChainTracer

llm = ChatOpenAI()
tracer = LangChainTracer(project_name="Napoleon v1")
llm.predict("How many brothers had Napoleon Bonaparte?", callbacks=[tracer])

  llm.predict("How many brothers had Napoleon Bonaparte?", callbacks=[tracer])


'Napoleon Bonaparte had three brothers: Joseph, Lucien, and Jerome.'

**See updates in the Projects Area in LangSmith**

## Basic LangSmith Operations

**Create a new project with LangChainTracer**

In [5]:
from langchain.callbacks.tracers import LangChainTracer

tracer = LangChainTracer(project_name="Churchill v1")
llm.predict("How old was Churchill when he was appointed PM?", callbacks=[tracer])

'Churchill was appointed Prime Minister of the United Kingdom on May 10, 1940, at the age of 65.'

**Check updates in the Projects Area in LangSmith**

**Alternative way to do the same**

In [6]:
from langchain.callbacks import tracing_v2_enabled

with tracing_v2_enabled(project_name="Cyrus v1"):
    llm.invoke("When did Cyrus The Great reign in Persia?")

## Creating Tags in LangSmith Projects

In [7]:
from langchain.chains import LLMChain
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate

llm = ChatOpenAI(temperature=0, tags=["History"])

prompt = PromptTemplate.from_template("Say {input}")

chain = LLMChain(
    llm=llm, 
    prompt=prompt, 
    tags=["Cyrus", "Persia"])

chain("When did the first Cyrus king reign in Persia?", tags=["Cyrus"])

  chain = LLMChain(
  chain("When did the first Cyrus king reign in Persia?", tags=["Cyrus"])


{'input': 'When did the first Cyrus king reign in Persia?',
 'text': 'The first Cyrus king to reign in Persia was Cyrus the Great, who ruled from 559-530 BC.'}

*See that this went to the default project since we did not set that differently*

## Creating Groups in LangSmith Projects

In [8]:
from langchain.callbacks.manager import (
    trace_as_chain_group
)

with trace_as_chain_group("American History v1") as group_manager:
    pass

from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

roman_llm = ChatOpenAI(temperature=0.9)

prompt = PromptTemplate(
    input_variables=["question"],
    template="What is the answer to {question}?",
)

chain = LLMChain(
    llm=roman_llm, 
    prompt=prompt
)

with trace_as_chain_group("Roman History v1") as group_manager:
    chain.run(question="Who did Julius Caesar marry?", callbacks=group_manager)
    chain.run(question="Where did Julius Caesar fight?", callbacks=group_manager)
    chain.run(question="What was the name of the horse of Julius Caesar?", callbacks=group_manager)

  chain.run(question="Who did Julius Caesar marry?", callbacks=group_manager)


## LangSmith Client

In [9]:
from langsmith import Client

client = Client()
project_runs = client.list_runs(project_name="default")
project_runs

<generator object Client.list_runs at 0x000001ADD3AC8040>

In [10]:
from datetime import datetime, timedelta

todays_runs = client.list_runs(
    project_name="default",
    start_time=datetime.now() - timedelta(days=1),
    run_type="llm"
)
todays_runs

<generator object Client.list_runs at 0x000001ADD3AC8220>

In [11]:
for run in todays_runs:
    print(run)

In [12]:
todays_runs = client.list_runs(
     project_name="Churchill v1",
     start_time=datetime.now() - timedelta(days=1),
     run_type="llm",
 )

for run in todays_runs:
     print(run)

id=UUID('41ba79f1-d22a-46d2-bdf0-d59b071d62a0') name='ChatOpenAI' start_time=datetime.datetime(2025, 6, 29, 16, 10, 13, 59137) run_type='llm' end_time=datetime.datetime(2025, 6, 29, 16, 10, 13, 836178) extra={'invocation_params': {'model': 'gpt-3.5-turbo', 'model_name': 'gpt-3.5-turbo', 'stream': False, '_type': 'openai-chat', 'stop': None}, 'options': {'stop': None}, 'batch_size': 1, 'metadata': {'ls_provider': 'openai', 'ls_model_name': 'gpt-3.5-turbo', 'ls_model_type': 'chat', 'ls_temperature': None, 'revision_id': '6d77281-dirty', 'ls_run_depth': 0}, 'runtime': {'sdk': 'langsmith-py', 'sdk_version': '0.3.45', 'library': 'langchain-core', 'platform': 'Windows-10-10.0.19045-SP0', 'runtime': 'python', 'py_implementation': 'CPython', 'runtime_version': '3.11.9', 'langchain_version': '0.3.25', 'langchain_core_version': '0.3.65', 'library_version': '0.3.65'}} error=None serialized=None events=[{'name': 'start', 'time': '2025-06-29T16:10:13.059137+00:00'}, {'name': 'end', 'time': '2025-06

## Adding metadata to filter runs
One possible use of this: making A/B tests.

In [13]:
chat_model = ChatOpenAI()
chain = LLMChain.from_string(
    llm=chat_model, 
    template="What's the answer to {input}?")

chain(
    {"input": "Who was the companion of Don Quixote?"}, 
    metadata={"source": "Cervantes"}
)

{'input': 'Who was the companion of Don Quixote?', 'text': 'Sancho Panza'}

In [14]:
runs = list(client.list_runs(
    project_name="default",
    filter='has(metadata, \'{"source": "Cervantes"}\')',
))

print(list(runs))

[]


## Evaluating your LLM App with a Test Dataset in LangSmith

In [15]:
from langsmith import Client

example_inputs = [
  ("What is the largest mammal?", "The blue whale"),
  ("What do mammals and birds have in common?", "They are both warm-blooded"),
  ("What are reptiles known for?", "Having scales"),
  ("What's the main characteristic of amphibians?", "They live both in water and on land"),
]

client = Client()

dataset_name = "Elementary Animal Questions v1"

# Storing inputs in a dataset lets us
# run chains and LLMs over a shared set of examples.
dataset = client.create_dataset(
    dataset_name=dataset_name, 
    description="Questions and answers about animal phylogenetics.",
)

for input_prompt, output_answer in example_inputs:
    client.create_example(
        inputs={"question": input_prompt},
        outputs={"answer": output_answer},
        dataset_id=dataset.id,
    )

LangSmithConflictError: Conflict for /datasets. HTTPError('409 Client Error: Conflict for url: https://api.smith.langchain.com/datasets', '{"detail":"Dataset with this name already exists."}')

In [None]:
from langsmith import Client
from langchain.smith import RunEvalConfig, run_on_dataset

evaluation_config = RunEvalConfig(
    evaluators=[
        "qa",
        "context_qa",
        "cot_qa",
    ]
)

In [None]:
client = Client()
llm = ChatOpenAI()
run_on_dataset(
    dataset_name=dataset_name,
    llm_or_chain_factory=llm,
    client=client,
    evaluation=evaluation_config,
    project_name="evalproject v1",
)