In [None]:
# imports

import os
import re
import math
import json
import random
from dotenv import load_dotenv
from item import Item
from tester import Tester
import matplotlib.pyplot as plt
import numpy as np
import pickle
from collections import Counter
from openai import OpenAI
from anthropic import Anthropic

In [None]:
# environment

load_dotenv()

In [None]:
openai = OpenAI()
claude = Anthropic()

In [None]:
%matplotlib inline

In [None]:
# Load in the pickle files:

with open('train_lite.pkl', 'rb') as file:
    train = pickle.load(file)

with open('test_lite.pkl', 'rb') as file:
    test = pickle.load(file)

## GPT-4o-mini

In [None]:
# Prompt for a Frontier model
# Removing the " to the nearest dollar"
# because a Frontier model needs no such simplification.
# And save a bit of tokens

def messages_for(item):
    system_message = "You estimate prices of items. Reply only with the price, no explanation"
    user_prompt = item.test_prompt().replace(" to the nearest dollar","").replace("\n\nPrice is $","")
    return [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt},
        {"role": "assistant", "content": "Price is $"}
    ]

In [None]:
messages_for(test[0])

In [None]:
# A utility function to extract the price from a string

def get_price(s):
    s = s.replace('$','').replace(',','')
    match = re.search(r"[-+]?\d*\.\d+|\d+", s)
    return float(match.group()) if match else 0

get_price("The price is roughly $99.99 because something")

In [None]:
# The function for gpt-4o-mini

def gpt_4o_mini(item):
    response = openai.chat.completions.create(
        model="gpt-4o-mini", 
        messages=messages_for(item),
        seed=42,
        max_tokens=5
    )
    reply = response.choices[0].message.content
    return get_price(reply)

In [None]:
# Less than 50k input tokens, less than 1k output tokens
# Price for api usage: <$0.01
# Error=$32.71, Hits=83.6%

Tester.test(gpt_4o_mini, test)

In [None]:
def gpt_4o_frontier(item):
    response = openai.chat.completions.create(
        model="gpt-4o-2024-08-06", 
        messages=messages_for(item),
        seed=42,
        max_tokens=5
    )
    reply = response.choices[0].message.content
    return get_price(reply)

In [None]:
# Less than 50k input tokens, less than 1k output tokens
# Price for api usage: ~0.14$
# Error=$26.07, Hits=86.8%

Tester.test(gpt_4o_frontier, test)

In [None]:
# Claude Sonnet 3.5 v2
def claude_3_point_5_sonnet(item):
    messages = messages_for(item)
    system_message = messages[0]['content']
    messages = messages[1:]
    response = claude.messages.create(
        model="claude-3-5-sonnet-20241022",
        max_tokens=5,
        system=system_message,
        messages=messages
    )
    reply = response.content[0].text
    return get_price(reply)

In [None]:
# Less than 50k input tokens, less than 1k output tokens
# Price for api usage: ~0.19$
# Error=$28.20, Hits=86.0%

Tester.test(claude_3_point_5_sonnet, test)

In [None]:
# Brand new model (I'm testing on 27.02.2025)

def claude_3_point_7_sonnet(item):
    messages = messages_for(item)
    system_message = messages[0]['content']
    messages = messages[1:]
    response = claude.messages.create(
        model="claude-3-7-sonnet-20250219",
        max_tokens=5,
        system=system_message,
        messages=messages
    )
    reply = response.content[0].text
    return get_price(reply)

In [None]:
# Less than 50k input tokens, less than 1k output tokens
# Price for api usage: ~0.18$ (cheaper than sonnet 3.5???)
# Error=$31.76, Hits=83.6%

Tester.test(claude_3_point_7_sonnet, test)