## Load Environment variables

In [None]:
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

## Import libraries and LLM model

In [None]:
from langsmith import wrappers, Client
from pydantic import BaseModel, Field
from openai import OpenAI

client = Client()
openai_client = wrappers.wrap_openai(OpenAI())

## Prepare dataset

In [None]:
# create a customer service dataset in LangSmith
dataset = client.create_dataset(
    dataset_name="Customer Service FAQ",
    description="A dataset of common customer service questions and responses."
)

# Create customer service examples
examples = [
    {
        "inputs": {"question": "How can I track my order?"},
        "outputs": {"answer": "You can track your order using the tracking link sent to your email after shipping."},
    },
    {
        "inputs": {"question": "What is your return policy?"},
        "outputs": {"answer": "We accept returns within 30 days of purchase as long as the item is in its original condition."},
    },
    {
        "inputs": {"question": "Do you offer international shipping?"},
        "outputs": {"answer": "Yes, we ship internationally. Shipping fees and delivery times vary by location."},
    },
    {
        "inputs": {"question": "Can I change my shipping address after placing an order?"},
        "outputs": {"answer": "If your order hasn’t shipped yet, please contact our support team to update your shipping address."},
    },
    {
        "inputs": {"question": "What should I do if I received a damaged product?"},
        "outputs": {"answer": "We’re sorry to hear that! Please contact our support team with a photo of the damaged item for a replacement or refund."},
    },
]

# Add examples to the dataset
client.create_examples(dataset_id=dataset.id, inputs=[example["inputs"] for example in examples],outputs=[example["outputs"] for example in examples])

## Generate output

In [None]:
# Target function to generate responses
def target(inputs: dict) -> dict:
    response = openai_client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "Answer the following question accurately"},
            {"role": "user", "content": inputs["question"]},
        ],
    )
    return {"response": response.choices[0].message.content.strip()}

## Generate evaluation metrics

In [None]:
# Evaluation instructions
instructions = """Evaluate the Customer Support Agent's Answer against the Ground Truth for conceptual similarity and classify as True or False:
- False: If the agent's answer misses the main idea or gives incorrect/misleading info
- True: If the answer covers the key concept, even with different wording
- Key criteria: Conceptual match, not exact phrasing
"""

# Define the output schema for the LLM-based evaluator
class Grade(BaseModel):
    score: bool = Field(
        description="Boolean indicating if the agent's response is accurate relative to the ground truth answer"
    )

def accuracy(outputs: dict, reference_outputs: dict) -> bool:
    response = openai_client.beta.chat.completions.parse(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": instructions},
            {
                "role": "user",
                "content": f"""Ground Truth (Expected Answer): {reference_outputs["answer"]}
Agent's Response: {outputs["response"]}
Is this a conceptually correct response? Answer True or False."""
            },
        ],
        response_format=Grade,
    )
    return response.choices[0].message.parsed.score

## Run the experiment

In [None]:
experiment_results = client.evaluate(
  target,
  data="Customer Service FAQ",
  evaluators=[
      accuracy,
      # can add multiple evaluators here
  ],
  experiment_prefix="CSS Experiment",
  max_concurrency=2,
)

In [19]:
print("Experiment Results:\n")
for result in experiment_results:
    run = result["run"]
    example = result["example"]
    eval_results = result.get("evaluation_results", {}).get("results", [])

    #print(f"Example Link: {example.link}")
    print(f"Input Question: {example.inputs.get('question')}")
    print(f"Ground Truth Answer: {example.outputs.get('answer')}")
    
    # You may need to modify this based on your chain's return format
    model_output = run.outputs.get("response") or run.outputs
    print(f"Model Response: {model_output}")

    for eval_result in eval_results:
        print(f"Evaluation Key: {eval_result.key}")
        print(f"Score: {eval_result.score}")
        if eval_result.comment:
            print(f"Comment: {eval_result.comment}")

    print("-" * 60)


Experiment Results:

Input Question: Can I change my shipping address after placing an order?
Ground Truth Answer: If your order hasn’t shipped yet, please contact our support team to update your shipping address.
Model Response: Yes, you can often change your shipping address after placing an order, but it depends on the retailer's policies and how far along the order process is. Here's what you can generally do:

1. **Contact Customer Service**: Reach out to the retailer's customer service as soon as possible. They can inform you if it’s possible to change the address.

2. **Check Your Order Status**: If your order has not yet been processed or shipped, you may have a better chance of changing the address.

3. **Online Account**: If you have an online account with the retailer, you may be able to update your shipping address directly through your account details.

4. **Cancellation and Re-order**: If changing the address is not possible, you might have to cancel the order and place a