In [None]:
import tiktoken

In [None]:
# enc = tiktoken.get_encoding("cl100k_base")
enc = tiktoken.encoding_for_model("gpt-4")

## Counting Tokens In Text

In [None]:
len(enc.encode("Hello world!"))

In [None]:
book_text = """
Mrs. Darling quivered and went to the window. It was securely fastened.
She looked out, and the night was peppered with stars. They were
crowding round the house, as if curious to see what was to take place
there, but she did not notice this, nor that one or two of the smaller
ones winked at her. Yet a nameless fear clutched at her heart and made
her cry, “Oh, how I wish that I wasn’t going to a party to-night!”

Even Michael, already half asleep, knew that she was perturbed, and he
asked, “Can anything harm us, mother, after the night-lights are lit?”

“Nothing, precious,” she said; “they are the eyes a mother leaves
behind her to guard her children.”

She went from bed to bed singing enchantments over them, and little
Michael flung his arms round her. “Mother,” he cried, “I’m glad of
you.” They were the last words she was to hear from him for a long
time.
"""

In [None]:
len(enc.encode(book_text))

In [None]:
def num_tokens_from_string(string: str, model_name: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.encoding_for_model(model_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

In [None]:
num_tokens_from_string(book_text, "gpt-3.5-turbo")

In [None]:
num_tokens_from_string(book_text, "text-davinci-003")

In [None]:
price_per_token = 0.002 / 1000

In [None]:
num_tokens_from_string(book_text, "gpt-3.5-turbo") * price_per_token

## Counting Tokens in Messages (for Chat API)

In [None]:
# Updated 1/4/2024
def num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613"):
    """Return the number of tokens used by a list of messages."""
    try:
        encoding = tiktoken.encoding_for_model(model)
    except KeyError:
        print("Warning: model not found. Using cl100k_base encoding.")
        encoding = tiktoken.get_encoding("cl100k_base")
    if model in {
        "gpt-3.5-turbo-0613",
        "gpt-3.5-turbo-16k-0613",
        "gpt-4-0314",
        "gpt-4-32k-0314",
        "gpt-4-0613",
        "gpt-4-32k-0613",
    }:
        tokens_per_message = 3
        tokens_per_name = 1
    elif model == "gpt-3.5-turbo-0301":
        tokens_per_message = (
            4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
        )
        tokens_per_name = -1  # if there's a name, the role is omitted
    elif "gpt-3.5-turbo" in model:
        return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613")
    elif "gpt-4" in model:
        return num_tokens_from_messages(messages, model="gpt-4-0613")
    else:
        raise NotImplementedError(
            f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
        )
    num_tokens = 0
    for message in messages:
        num_tokens += tokens_per_message
        for key, value in message.items():
            num_tokens += len(encoding.encode(value))
            if key == "name":
                num_tokens += tokens_per_name
    num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
    return num_tokens

In [None]:
example_messages = [
    {
        "role": "system",
        "content": "You are a helpful, pattern-following assistant that translates corporate jargon into plain English.",
    },
    {
        "role": "system",
        "name": "example_user",
        "content": "New synergies will help drive top-line growth.",
    },
    {
        "role": "system",
        "name": "example_assistant",
        "content": "Things working well together will increase revenue.",
    },
    {
        "role": "system",
        "name": "example_user",
        "content": "Let's circle back when we have more bandwidth to touch base on opportunities for increased leverage.",
    },
    {
        "role": "system",
        "name": "example_assistant",
        "content": "Let's talk later when we're less busy about how to do better.",
    },
    {
        "role": "user",
        "content": "This late pivot means we don't have time to boil the ocean for the client deliverable.",
    },
]

In [None]:
num_tokens_from_messages(example_messages, "gpt-3.5-turbo")

In [None]:
num_tokens_from_messages(example_messages, "gpt-4")

### Confirming Token Counts With a Real Request

In [None]:
import openai
from dotenv import dotenv_values

config = dotenv_values(".env")
openai.api_key = config["OPENAI_API_KEY"]

In [None]:
openai.chat.completions.create(model="gpt-4", messages=example_messages, max_tokens=1)