In [1]:
from dotenv import load_dotenv
from anthropic import Anthropic
import json

load_dotenv()
client = Anthropic()
model = "claude-sonnet-4-0"

In [2]:
def add_user_message(messages, text):
    messages.append({"role": "user", "content": text})
    return messages

def add_assistent_messages(messages, text):
    messages.append({"role": "assistant", "content": text})
    return messages

def chat(messages, system=None, temperature=1.0, stop_sequences=[]):
    params = {
            "model": model,
            "max_tokens": 1000,
            "messages": messages,
            "temperature":temperature,
            "stop_sequences":stop_sequences,
    }
    if system:
        params["system"] = system

    message = client.messages.create(
        **params
    )
    return message.content[0].text


In [3]:
with open("data/dataset.json") as f:
    dataset = json.load(f)
dataset

[{'task': 'Write a Python function to create an AWS S3 bucket with a given name.'},
 {'task': 'Create a JSON object to configure an AWS Lambda function with a specified runtime, memory size, and timeout.'},
 {'task': "Write a regular expression to validate an AWS EC2 instance ID in the format 'i-0123456789abcdef'."}]

In [19]:
def run_prompt(test_case):
    """Merges the prompt and test case input, then returns the result"""
    
    prompt = f"""Please solve the following task:

    {test_case["task"]}
    """

    messages=[]
    add_user_message(messages, prompt)
    output = chat(messages)
    return output
    

def run_test_case(test_case):
    """Calls run_prompt, then grades the result"""
    output = run_prompt(test_case)
    # GRADE
    score = 10
    
    return {
    "output": output,
    "test_case": test_case,
    "score": score,
    }
    
def run_eval(dataset):
    """Loads the dataset and calls run_test_case with each case"""
    results=[]
    for item in dataset:
        # print(item)
        result = run_test_case(item)
        results.append(result)
    return results

In [20]:
results = run_eval(dataset)

In [23]:
with open("data/results.json", "w") as f:
    json.dump(results, f, indent=2)
print(json.dumps(results, indent=2))

[
  {
    "output": "Here's a Python function to create an AWS S3 bucket with a given name:\n\n```python\nimport boto3\nfrom botocore.exceptions import ClientError\nimport logging\n\ndef create_s3_bucket(bucket_name, region=None):\n    \"\"\"\n    Create an S3 bucket in a specified region\n    \n    :param bucket_name: Bucket to create\n    :param region: String region to create bucket in, e.g., 'us-west-2'\n    :return: True if bucket created successfully, False otherwise\n    \"\"\"\n    \n    # Create S3 client\n    try:\n        if region is None:\n            s3_client = boto3.client('s3')\n            s3_client.create_bucket(Bucket=bucket_name)\n        else:\n            s3_client = boto3.client('s3', region_name=region)\n            location = {'LocationConstraint': region}\n            s3_client.create_bucket(\n                Bucket=bucket_name,\n                CreateBucketConfiguration=location\n            )\n        \n        print(f\"Bucket '{bucket_name}' created succes