In [1]:
import os
from dotenv import load_dotenv
from langsmith import Client

# --- Setup ---
# Load environment variables from your .env file
load_dotenv()

# Initialize the LangSmith client
client = Client()

# --- Part 1: Create a Q&A Dataset (from the lesson) ---
print("--- Creating Q&A Dataset ---")
qa_dataset_name = "LangSmith Docs Q&A"

# The data for our first dataset
qa_examples = [
    ("How do I set up tracing to LangSmith if I'm using LangChain?", "Set the `LANGCHAIN_TRACING_V2` env var to 'true' and ensure your `LANGCHAIN_API_KEY` is set."),
    ("What testing capabilities does LangSmith have?", "LangSmith allows you to create datasets of inputs and reference outputs to test LLM applications."),
]

# Create the dataset
qa_dataset = client.create_dataset(
    dataset_name=qa_dataset_name,
    description="Questions and answers about LangSmith."
)

# Prepare inputs and outputs for bulk creation
qa_inputs = [{"question": question} for question, _ in qa_examples]
qa_outputs = [{"answer": answer} for _, answer in qa_examples]

# Add the examples to the dataset
client.create_examples(
    inputs=qa_inputs,
    outputs=qa_outputs,
    dataset_id=qa_dataset.id,
)
print(f"Successfully created '{qa_dataset_name}' and uploaded examples.")


# --- Part 2: My Custom Tweak - A Summarization Dataset ---
print("\n--- Creating Summarization Dataset (My Tweak) ---")
summary_dataset_name = "Article Summaries"

# My custom data for a different kind of evaluation
summary_examples = [
    (
        "AI is transforming industries worldwide. From automating tasks in manufacturing to personalizing customer experiences in retail, its impact is undeniable. The field is rapidly evolving with new breakthroughs in machine learning and neural networks.",
        "AI is revolutionizing global industries through automation and personalization, driven by advances in machine learning."
    ),
    (
        "The global push for renewable energy is gaining momentum. Solar and wind power are becoming increasingly cost-competitive with traditional fossil fuels. This shift is crucial for combating climate change and ensuring a sustainable future.",
        "Renewable energy sources like solar and wind are becoming more affordable, which is vital for fighting climate change."
    )
]

# Create the second dataset
summary_dataset = client.create_dataset(
    dataset_name=summary_dataset_name,
    description="Articles and their one-sentence summaries for evaluation."
)

# Prepare inputs and outputs with different keys
summary_inputs = [{"article": article} for article, _ in summary_examples]
summary_outputs = [{"summary": summary} for _, summary in summary_examples]

# Add the new examples to the new dataset
client.create_examples(
    inputs=summary_inputs,
    outputs=summary_outputs,
    dataset_id=summary_dataset.id,
)
print(f"Successfully created '{summary_dataset_name}' and uploaded examples.")

--- Creating Q&A Dataset ---
Successfully created 'LangSmith Docs Q&A' and uploaded examples.

--- Creating Summarization Dataset (My Tweak) ---
Successfully created 'Article Summaries' and uploaded examples.
