# The Product Pricer Continued

A model that can estimate how much something costs, from its description.

## Time for Fine Tuning!

In [None]:
# imports

import re
import json
import pickle
from dotenv import load_dotenv
from tester import Tester
from openai import OpenAI

In [None]:
# environment

load_dotenv()

In [None]:
openai = OpenAI()

In [None]:
%matplotlib inline

In [None]:
# Load in the pickle files:

with open('train_lite.pkl', 'rb') as file:
    train = pickle.load(file)

with open('test_lite.pkl', 'rb') as file:
    test = pickle.load(file)

In [None]:
# OpenAI recommends fine-tuning with populations of 50-100 examples
# But as the examples are very small, I'm suggesting 200 examples (and 1 epoch)

fine_tune_train = train[:200]
fine_tune_validation = train[200:250]

# Step 1

Prepare the data for fine-tuning in JSONL (JSON Lines) format and upload to OpenAI

In [None]:
# Prompt for a Frontier model
# Removing the " to the nearest dollar"
# because a Frontier model needs no such simplification.
# And save a bit of tokens

def messages_for(item):
    system_message = "You estimate prices of items. Reply only with the price, no explanation"
    user_prompt = item.test_prompt().replace(" to the nearest dollar","").replace("\n\nPrice is $","")
    return [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt},
        {"role": "assistant", "content": f"Price is ${item.price:.2f}"}
    ]

In [None]:
messages_for(train[0])

In [None]:
# Convert the items into a list of json objects - a "jsonl" string
# Each row represents a message in the form:
# {"messages" : [{"role": "system", "content": "You estimate prices...

def make_jsonl(items):
    result = ""
    for item in items:
        messages = messages_for(item)
        messages_str = json.dumps(messages)
        result += '{"messages": ' + messages_str +'}\n'
    return result.strip()

In [None]:
print(make_jsonl(train[:3]))

In [None]:
# Convert the items into jsonl and write them to a file

def write_jsonl(items, filename):
    with open(filename, "w") as f:
        jsonl = make_jsonl(items)
        f.write(jsonl)

In [None]:
write_jsonl(fine_tune_train, "fine_tune_train.jsonl")

In [None]:
write_jsonl(fine_tune_validation, "fine_tune_validation.jsonl")

In [None]:
with open("fine_tune_train.jsonl", "rb") as f:
    train_file = openai.files.create(file=f, purpose="fine-tune")

In [None]:
train_file

In [None]:
with open("fine_tune_validation.jsonl", "rb") as f:
    validation_file = openai.files.create(file=f, purpose="fine-tune")

In [None]:
validation_file

# Step 2 - Fine-tune!

In [None]:
train_file.id

In [None]:
openai.fine_tuning.jobs.create(
    training_file=train_file.id,
    validation_file=validation_file.id,
    model="gpt-4o-mini-2024-07-18",
    seed=42,
    hyperparameters={"n_epochs": 1},
    suffix="pricer"
)

In [None]:
openai.fine_tuning.jobs.list(limit=1)

In [None]:
job_id = openai.fine_tuning.jobs.list(limit=1).data[0].id
job_id

In [None]:
openai.fine_tuning.jobs.retrieve(job_id)

In [None]:
openai.fine_tuning.jobs.list_events(fine_tuning_job_id=job_id, limit=10).data

# Step 3

Test fine tuned model

In [None]:
fine_tuned_model_name = openai.fine_tuning.jobs.retrieve(job_id).fine_tuned_model
fine_tuned_model_name

In [None]:
# The prompt

def messages_for(item):
    system_message = "You estimate prices of items. Reply only with the price, no explanation"
    user_prompt = item.test_prompt().replace(" to the nearest dollar","").replace("\n\nPrice is $","")
    return [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt},
        {"role": "assistant", "content": "Price is $"}
    ]

In [None]:
messages_for(test[0])

In [None]:
# A utility function to extract the price from a string

def get_price(s):
    s = s.replace('$','').replace(',','')
    match = re.search(r"[-+]?\d*\.\d+|\d+", s)
    return float(match.group()) if match else 0

get_price("The price is roughly $99.99 because something")

In [None]:
# The function for gpt-4o-mini

def gpt_fine_tuned(item):
    response = openai.chat.completions.create(
        model=fine_tuned_model_name, 
        messages=messages_for(item),
        seed=42,
        max_tokens=7
    )
    reply = response.choices[0].message.content
    return get_price(reply)

In [None]:
print(test[0].price)
print(gpt_fine_tuned(test[0]))
print(test[0].test_prompt())

In [None]:
# Price for the fine tuning: $0.12
# Price for api usage: <$0.01
# Error=$38.65, Hits=77.6%

Tester.test(gpt_fine_tuned, test)