# Solution: Estimating costs of an LLM API

[![open in colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/LinkedInLearning/generative-ai-and-llmops-deploying-and-managing-llms-in-production-4465782/blob/solution/ch-06/challenge_estimating_API_costs.ipynb)

In [4]:
!pip install openai -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m320.6/320.6 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [5]:
from openai import OpenAI
import getpass
import os

In [6]:
client = OpenAI(api_key=getpass.getpass())

··········


In [3]:
input_token_cost = 0.00001
output_token_cost = 0.00003

In [11]:
def calculate_cost(input_tokens, output_tokens):
    total_cost=(input_tokens * input_token_cost) + (output_tokens * output_token_cost)
    return total_cost

def generate_text_and_calculate_cost(prompt, model="gpt-4-turbo"):
    response = client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {"role": "user", "content": prompt}
            ]
            )

    generated_output = response.choices[0].message.content

    # Get the number of tokens used
    input_tokens = response.usage.prompt_tokens
    output_tokens = response.usage.completion_tokens
    cost = calculate_cost(input_tokens, output_tokens)

    return generated_output, input_tokens, output_tokens, cost


prompt = "Write a blog about taking Generative AI applications to production."
generated_output, input_tokens, output_tokens, cost = generate_text_and_calculate_cost(prompt)

print(f"Total input tokens: {input_tokens}")
print(f"Total output tokens: {output_tokens}")
print(f"Cost of the API call: ${cost:.4f}")

Total input tokens: 19
Total output tokens: 770
Cost of the API call: $0.0233


## Using tiktoken

In [1]:
!pip install tiktoken

Collecting tiktoken
  Downloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tiktoken
Successfully installed tiktoken-0.7.0


In [20]:
import tiktoken

def calculate_cost(input_prompt, generated_output, model):
    # Initialize the tiktoken encoding for the model
    encoding = tiktoken.encoding_for_model(model)

    # Encode the prompt to get the number of tokens
    input_tokens = len(encoding.encode(input_prompt))
    output_tokens = len(encoding.encode(generated_output))

    # Calculate the cost
    total_cost=(input_tokens * input_token_cost) + (output_tokens * output_token_cost)

    return total_cost

model="gpt-4"
# model="gpt-3.5-turbo"

total_cost = calculate_cost(prompt, generated_output, model)
print(f"Cost of the prompt: ${cost:.4f}")

Cost of the prompt: $0.0233
