# Pairwise Experiments

## What I Learned
This video introduced pairwise evaluations for comparing two or more experiments side-by-side using custom evaluators and LLM judges. It demonstrated how to set up a pairwise experiment to contrast different prompt versions or model behaviors.

## Changes in Code
I created a themed pairwise experiment comparing two different approaches to Q&A. I defined an LLM-as-judge evaluator to score the quality of responses and ran the comparison using LangSmith.

In [None]:
from langsmith import Client
from langsmith.evaluation import evaluate
from langchain_openai import ChatOpenAI

client = Client()

# Version 1: Simple prompting
def qa_simple(inputs: dict) -> dict:
    llm = ChatOpenAI(model="gpt-4o-mini")
    question = inputs["question"]
    response = llm.invoke(question)
    return {"answer": response.content}

# Version 2: Detailed prompting
def qa_detailed(inputs: dict) -> dict:
    llm = ChatOpenAI(model="gpt-4o-mini")
    question = inputs["question"]
    prompt = f"""Please provide a comprehensive and accurate answer to the following question:
    
Question: {question}

Answer:"""
    response = llm.invoke(prompt)
    return {"answer": response.content}

# Pairwise evaluator
def compare_answers(run, example):
    """Compare two answers using LLM judge"""
    llm = ChatOpenAI(model="gpt-4o-mini")
    
    predicted = run.outputs.get("answer", "")
    expected = example.outputs.get("answer", "")
    
    judge_prompt = f"""Compare these two answers and rate the predicted answer's quality (0-1):

Expected: {expected}
Predicted: {predicted}

Return only a number."""
    
    score = float(llm.invoke(judge_prompt).content.strip())
    return {"key": "pairwise_quality", "score": score}

# Run both experiments
print("Running pairwise comparison...")
results_simple = evaluate(qa_simple, data="qa_examples", evaluators=[compare_answers])
results_detailed = evaluate(qa_detailed, data="qa_examples", evaluators=[compare_answers])

print(f"Simple: {results_simple['experiment_name']}")
print(f"Detailed: {results_detailed['experiment_name']}")