In [7]:
from langsmith.schemas import Example, Run

def correct_label(inputs: dict, reference_outputs: dict, outputs: dict) -> dict:
  score = outputs.get("output") == reference_outputs.get("label")
  return {"score": int(score), "key": "correct_label"}

In [8]:
from dotenv import load_dotenv
load_dotenv(override=True)

True

In [9]:
from openai import OpenAI
from pydantic import BaseModel, Field

client = OpenAI()

class Similarity_Score(BaseModel):
    similarity_score: int = Field(description="Semantic similarity score between 1 and 10, where 1 means unrelated and 10 means identical.")

def compare_semantic_similarity(inputs: dict, reference_outputs: dict, outputs: dict):
    input_question = inputs["question"]
    reference_response = reference_outputs["output"]
    run_response = outputs["output"]
    
    completion = client.beta.chat.completions.parse(
        model="gpt-4o",
        messages=[
            {   
                "role": "system",
                "content": (
                    "You are a semantic similarity evaluator. Compare the meanings of two responses to a question, "
                    "Reference Response and New Response, where the reference is the correct answer, and we are trying to judge if the new response is similar. "
                    "Provide a score between 1 and 10, where 1 means completely unrelated, and 10 means identical in meaning."
                ),
            },
            {"role": "user", "content": f"Question: {input_question}\n Reference Response: {reference_response}\n Run Response: {run_response}"}
        ],
        response_format=Similarity_Score,
    )

    similarity_score = completion.choices[0].message.parsed
    return {"score": similarity_score.similarity_score, "key": "similarity"}

In [10]:
# 🔌 Electronics Knowledge Test Case
inputs = {
  "question": "How do I set up a basic LED circuit with Arduino Uno?"
}
reference_outputs = {
  "output": "To set up a basic LED circuit with Arduino Uno, connect the LED's long leg (anode) to digital pin 13 through a 220-ohm resistor, and connect the short leg (cathode) to GND. In your Arduino sketch, use pinMode(13, OUTPUT) in setup() and digitalWrite(13, HIGH) to turn it on, digitalWrite(13, LOW) to turn it off. This is the classic 'Hello World' of Arduino projects!"
}

# Test Response - Partially Correct
outputs = {
  "output": "Connect LED to pin 13 and ground, then use digitalWrite to control it. You might need a resistor to protect the LED."
}

similarity_score = compare_semantic_similarity(inputs, reference_outputs, outputs)
print(f"Semantic similarity score: {similarity_score}")

Semantic similarity score: {'score': 8, 'key': 'similarity'}


In [11]:
from langsmith.schemas import Run, Example

def compare_semantic_similarity_v2(root_run: Run, example: Example):
    input_question = example["inputs"]["question"]
    reference_response = example["outputs"]["output"]
    run_response = root_run["outputs"]["output"]
    
    completion = client.beta.chat.completions.parse(
        model="gpt-4o",
        messages=[
            {   
                "role": "system",
                "content": (
                    "You are a semantic similarity evaluator. Compare the meanings of two responses to a question, "
                    "Reference Response and New Response, where the reference is the correct answer, and we are trying to judge if the new response is similar. "
                    "Provide a score between 1 and 10, where 1 means completely unrelated, and 10 means identical in meaning."
                ),
            },
            {"role": "user", "content": f"Question: {input_question}\n Reference Response: {reference_response}\n Run Response: {run_response}"}
        ],
        response_format=Similarity_Score,
    )

    similarity_score = completion.choices[0].message.parsed
    return {"score": similarity_score.similarity_score, "key": "similarity"}

In [12]:
# 🤖 Arduino vs Raspberry Pi Knowledge Test
sample_run = {
  "name": "Electronics Assistant Run",
  "inputs": {
    "question": "What's the difference between Arduino Uno and Raspberry Pi?"
  },
  "outputs": {
    "output": "Arduino is for simple projects, Raspberry Pi is like a computer. Arduino is cheaper and easier to use for beginners."
  },
  "is_root": True,
  "status": "success",
  "extra": {
    "metadata": {
      "topic": "microcontrollers",
      "difficulty": "beginner"
    }
  }
}

sample_example = {
  "inputs": {
    "question": "What's the difference between Arduino Uno and Raspberry Pi?"
  },
  "outputs": {
    "output": "Arduino Uno is a microcontroller board best for real-time hardware control, sensor reading, and simple repetitive tasks with low power consumption. Raspberry Pi is a single-board computer that runs a full operating system, making it ideal for complex applications, web servers, image processing, and projects requiring multitasking. Choose Arduino for dedicated hardware control, Raspberry Pi for computational tasks."
  },
  "metadata": {
    "dataset_split": [
      "electronics_expert",
      "verified_answer"
    ],
    "topic": "microcontroller_comparison"
  }
}

print(f"Semantic similarity score: {similarity_score}")
similarity_score = compare_semantic_similarity_v2(sample_run, sample_example)

Semantic similarity score: {'score': 8, 'key': 'similarity'}
