# Pricing for embeddings, from OpenAI website

SPDX-License-Identifier: 0BSD

In [1]:
import functools
import itertools

import bs4
import pandas
import requests
from tabulate import tabulate

import fr2ex

In [2]:
_find_model_heading = functools.partial(
    bs4.Tag.find_all,
    name='h3',
    attrs={'class': 'f-heading-3'},
)
"""Select all h3.f-heading-3 in a given element, with given requirements."""

'Select all h3.f-heading-3 in a given element, with given requirements.'

In [3]:
response = requests.get('https://openai.com/pricing/')
response.raise_for_status()
response.encoding = response.apparent_encoding
# print(response.apparent_encoding)
# print(response.encoding)

In [4]:
doc = bs4.BeautifulSoup(response.text, features='lxml')
headings = _find_model_heading(doc, string='Embedding models')
headings

[<h3 class="f-heading-3">Embedding models</h3>]

In [5]:
doc_row = headings[0].parent.parent.parent.parent
_find_model_heading(doc_row)

[<h3 class="f-heading-3">Embedding models</h3>]

In [6]:
frames = pandas.read_html(str(doc_row), displayed_only=False)
frames

[       0                    1
 0  Model                Usage
 1    Ada  $0.0004 / 1K tokens]

In [7]:
data_header, *data_rows = (row for df in frames for row in df.values)
data_header

array(['Model', 'Usage'], dtype=object)

In [8]:
(data_header == ('Model', 'Usage')).all()

True

In [9]:
data_rows

[array(['Ada', '$0.0004\xa0/ 1K tokens'], dtype=object)]

In [10]:
{name: fr2ex.tokens._parse_rate(text) for name, text in data_rows}

{'Ada': Rate(numerator=Decimal('0.0004'), denominator=1000)}

*End scratchwork.*

In [11]:
prices = fr2ex.tokens.find_embedding_model_prices(displayed_only=False)

In [12]:
table = ((model, rate.numerator) for model, rate in prices.items())

tabulate(
    tabular_data=table,
    headers=['Model', '$ / 1K tokens'],
    tablefmt='html',
    floatfmt='.4f',
)

Model,$ / 1K tokens
Ada,0.0004
