In [12]:
# Load env variables and create client
from dotenv import load_dotenv
from anthropic import Anthropic
import os

load_dotenv()
Anthropic_API_Key = os.getenv("Anthropic_API_Key")
if not Anthropic_API_Key:
    raise ValueError("Anthropic_API_Key is not set in the environment variables.")
client = Anthropic(api_key=Anthropic_API_Key)
model = "claude-3-5-haiku-latest"

In [13]:
# Helper functions
def add_user_message(messages, text):
    user_message = {"role": "user", "content": text}
    messages.append(user_message)


def add_assistant_message(messages, text):
    assistant_message = {"role": "assistant", "content": text}
    messages.append(assistant_message)


def chat(messages, system=None, temperature=1.0, stop_sequences=[]):
    params = {
        "model": model,
        "max_tokens": 1000,
        "messages": messages,
        "temperature": temperature,
        "stop_sequences": stop_sequences,
    }

    if system:
        params["system"] = system

    message = client.messages.create(**params)
    return message.content[0].text

In [14]:
import json


def generate_dataset():
    prompt = """
Generate a evaluation dataset for a prompt evaluation. The dataset will be used to evaluate prompts
that generate Python, JSON, or Regex specifically for AWS-related tasks. Generate an array of JSON objects,
each representing task that requires Python, JSON, or a Regex to complete.

Example output:
```json
[
    {
        "task": "Description of task",
    },
    ...additional
]
```

* Focus on tasks that can be solved by writing a single Python function, a single JSON object, or a regular expression.
* Focus on tasks that do not require writing much code

Please generate 3 objects.
"""
    messages = []
    add_user_message(messages, prompt) 
    add_assistant_message(messages, "```json")
    text = chat(messages, stop_sequences=["```"])
    return json.loads(text)



In [16]:
dataset = generate_dataset()
dataset

[{'task': 'Create a Python function to extract the AWS region from an EC2 instance ARN'},
 {'task': 'Write a JSON configuration that defines IAM policy permissions for S3 bucket read access'},
 {'task': 'Develop a regular expression to validate an AWS CloudFormation stack name (lowercase, alphanumeric, max 128 characters)'}]

In [17]:
with open("dataset.json", "w") as f:
    json.dump(dataset, f, indent=2)

In [18]:
def run_prompt(test_case):
    """Merges the prompt and test case input, then returns the result"""
    prompt = """
please solve the following task:
{test_case["task"]}
"""
    messages = []
    add_user_message(messages, prompt)
    output = chat(messages)
    return output

In [19]:
def run_test_case(test_case):
    """Call run_prompt, then grades the result"""
    output = run_prompt(test_case)
    
    score = 10
    return {
        "output": output,
        "test_case": test_case,
        "score": score
    }

In [20]:
def run_eval(dataset):
    """Loads the dataset and calls run_test_case with each case"""
    results = []
    for test_case in dataset:
        result = run_test_case(test_case)
        results.append(result)
    return results


In [21]:
with open("dataset.json", "r") as f:
    dataset = json.load(f)
results = run_eval(dataset)

In [24]:
results

[{'output': "I apologize, but there is no task provided in your message. Could you please share the specific task you would like me to solve? I'm ready to help you with any problem or question you might have.",
  'test_case': {'task': 'Create a Python function to extract the AWS region from an EC2 instance ARN'},
  'score': 10},
 {'output': "I apologize, but it seems like there's no actual task or problem statement provided in your message. Could you please share the specific task or problem you would like me to solve? I'm ready to help you with various types of tasks, such as:\n\n- Mathematical problems\n- Coding challenges\n- Algorithm design\n- Data analysis\n- Writing tasks\n- Logical reasoning problems\n\nPlease provide the complete details of the task, and I'll be happy to assist you.",
  'test_case': {'task': 'Write a JSON configuration that defines IAM policy permissions for S3 bucket read access'},
  'score': 10},
 {'output': "I apologize, but it seems like there's no specific