In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
from langsmith import Client

example_inputs = [
  ("What is the largest mammal?", "The blue whale"),
  ("What do mammals and birds have in common?", "They are both warm-blooded"),
  ("What are reptiles known for?", "Having scales"),
  ("What's the main characteristic of amphibians?", "They live both in water and on land"),
]

client = Client()
dataset_name = "Elementary Animal Questions"


dataset = client.create_dataset(
    dataset_name=dataset_name, description="Questions and answers about animal phylogenetics.",
)


In [3]:
for input_prompt, output_answer in example_inputs:
    client.create_example(
        inputs={"question": input_prompt},
        outputs={"answer": output_answer},
        dataset_id=dataset.id,
    )

In [6]:
client = Client()

csv_file = 'extended_questions_answers.csv' 
input_keys = ['Question'] 
output_keys = ['Answer', 'Explanation', 'Category', 'Difficulty']  
dataset = client.upload_csv(
    csv_file=csv_file,
    input_keys=input_keys,
    output_keys=output_keys,
    name="My Extended CSV Dataset",
    description="Dataset created from an extended CSV file",
    data_type="kv"
)

In [8]:
from langsmith import Client
from langchain.smith import RunEvalConfig, run_on_dataset
from langchain.chat_models import ChatOpenAI

evaluation_config = RunEvalConfig(
    evaluators=[
        "qa",
        "context_qa",
        "cot_qa",
    ]
)

client = Client()
run_on_dataset(
    client=client,
    dataset_name="Elementary Animal Questions",
    llm_or_chain_factory=ChatOpenAI(),
    evaluation=evaluation_config,
)

View the evaluation results for project '884bb5a0c4e34a74b2c0e1759c584ae1-ChatOpenAI' at:
https://smith.langchain.com/projects/p/87d1704a-7245-4de3-baf4-c9bdf4f02c76?eval=true


{'project_name': '884bb5a0c4e34a74b2c0e1759c584ae1-ChatOpenAI',
 'results': <Task pending name='Task-5' coro=<_arun_on_examples() running at c:\Users\User\Desktop\Langsmith\langsmithvenv\Lib\site-packages\langchain\smith\evaluation\runner_utils.py:816>>}

In [10]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts.prompt import PromptTemplate

_PROMPT_TEMPLATE = """You are an expert professor specialized in grading students' answers to questions.
You are grading the following question:
{query}
Here is the real answer:
{answer}
You are grading the following predicted answer:
{result}
Respond with CORRECT or INCORRECT:
Grade:
"""

PROMPT = PromptTemplate(
    input_variables=["query", "answer", "result"], template=_PROMPT_TEMPLATE
)
eval_llm = ChatOpenAI(temperature=0.0)
evaluation_config = RunEvalConfig(
    evaluators=[
        RunEvalConfig.QA(llm=eval_llm, prompt=PROMPT),
        RunEvalConfig.ContextQA(llm=eval_llm),
        RunEvalConfig.CoTQA(llm=eval_llm),
    ]
)

In [11]:
run_on_dataset(
    client=client,
    dataset_name="Elementary Animal Questions",
    llm_or_chain_factory=eval_llm,
    evaluation=evaluation_config,
)

View the evaluation results for project '62ce2242a9fc408fb978be6ed3835f7e-ChatOpenAI' at:
https://smith.langchain.com/projects/p/dd8bd96c-cdee-47de-9223-87fde95d0153?eval=true


{'project_name': '62ce2242a9fc408fb978be6ed3835f7e-ChatOpenAI',
 'results': <Task pending name='Task-46' coro=<_arun_on_examples() running at c:\Users\User\Desktop\Langsmith\langsmithvenv\Lib\site-packages\langchain\smith\evaluation\runner_utils.py:816>>}