In [1]:
import functools
import itertools

import IPython
import bs4
import pandas as pd
import requests

In [2]:
@functools.cache
def get_pricing_page():
    return requests.get('https://openai.com/api/pricing/').text

In [3]:
def need(condition):
    if not condition:
        raise ValueError("can't find correct part of page")

In [4]:
def find_price():
    doc = bs4.BeautifulSoup(get_pricing_page())

    headings = doc.find_all('h3', text='Embedding models')
    need(len(headings) == 1)
    heading = headings[0]

    doc_row = heading.parent.parent.parent
    need(doc_row.name == 'div')
    need(len(doc_row.find_all('h3')) == 1)

    tables = doc_row.find_all('table')
    need(all(table.find('table') is None for table in tables))

    ada_cells = [
        cell
        for table in tables
        for row in table.find_all('tr')
        for cell in row.find_all('td', text='Ada')
    ]
    need(len(ada_cells) == 1)
    ada_cell = ada_cells[0]

    price_cells = ada_cell.parent.find_all('td', text=lambda t: t != 'Ada')
    need(len(price_cells) == 1)
    price_cell = price_cells[0]
    return price_cell.text

In [5]:
find_price()

'\n$0.0004\n/ 1K tokens\n'

In [6]:
def find_embedding_model_prices():
    doc = bs4.BeautifulSoup(get_pricing_page())

    headings = doc.find_all('h3', text='Embedding models')
    need(len(headings) == 1)
    doc_row = headings[0].parent.parent.parent
    need(len(doc_row.find_all('h3')) == 1)
    
    frames = pd.read_html(str(doc_row), displayed_only=False)
    need({tuple(df.columns.values) for df in frames} == {('Model', 'Usage')})
    data_rows = itertools.chain.from_iterable(df.to_numpy() for df in frames)
    return dict(data_rows)

In [7]:
find_embedding_model_prices()

{'Ada': '$0.0004 / 1K tokens',
 'Ada v1': '$0.0040 / 1K tokens',
 'Babbage v1': '$0.0050 / 1K tokens',
 'Curie v1': '$0.0200 / 1K tokens',
 'Davinci v1': '$0.2000 / 1K tokens'}