In [1]:
import os
from openai import OpenAI
import openai
from pydantic_settings import BaseSettings, SettingsConfigDict
from pathlib import Path

In [2]:
ENV_PATH = Path("../.envs/dev.env")

In [4]:
class Settings(BaseSettings):
    OPENAI_API_KEY: str
    HF_TOKEN: str
    LLAMAPARSE_API_TOKEN: str

    model_config = SettingsConfigDict(
        env_file=ENV_PATH if ENV_PATH.exists() else None,
        env_file_encoding="utf-8",
        extra="ignore",
        case_sensitive=False
    )


In [5]:
settings = Settings()

In [6]:
client = OpenAI(api_key=settings.OPENAI_API_KEY,)

In [7]:
# --- Block 2: First Turn - Asking the Initial Question ---
print("\n--- Starting Conversation: Turn 1 ---")

# Define the system message (persona) for the AI Tutor
system_message = {"role": "system", "content": "You are a helpful AI Tutor explaining Large Language Model concepts simply."}

# Define the user's first question
user_message_1 = {"role": "user", "content": "Can you explain what 'tokens' are in the context of LLMs, like I'm new to this?"}

# Create the messages list for the *first* API call
messages_history = [
    system_message,
    user_message_1
]

print(f"Sending messages: {messages_history}")


--- Starting Conversation: Turn 1 ---
Sending messages: [{'role': 'system', 'content': 'You are a helpful AI Tutor explaining Large Language Model concepts simply.'}, {'role': 'user', 'content': "Can you explain what 'tokens' are in the context of LLMs, like I'm new to this?"}]


In [8]:
# Define parameters for this call
MODEL = "gpt-4o-mini"
TEMPERATURE = 0.5 # Slightly creative but still grounded explanation
MAX_TOKENS = 150  # Limit the length of the explanation
SEED = 123        # Make this explanation reproducible

In [9]:
try:
    print(f"\nMaking API call to {MODEL}...")
    # Use the client object's method to create a chat completion
    completion_1 = client.chat.completions.create(
        model=MODEL,
        messages=messages_history,
        temperature=TEMPERATURE,
        max_tokens=MAX_TOKENS,
        seed=SEED
    )
    print("API call successful.")

    # --- Process the response from the first turn ---
    # Extract the assistant's reply content
    assistant_response_1 = completion_1.choices[0].message.content
    # Extract the full message object to add to history later
    assistant_message_1 = completion_1.choices[0].message

    print("\nAI Tutor (Turn 1):")
    print(assistant_response_1)

    # Print token usage for this call
    usage_1 = completion_1.usage
    print(f"\nToken Usage (Turn 1): Prompt={usage_1.prompt_tokens}, Completion={usage_1.completion_tokens}, Total={usage_1.total_tokens}")
    finish_reason_1 = completion_1.choices[0].finish_reason
    print(f"Finish Reason: {finish_reason_1}")

except openai.APIError as e:
    # Handle API errors (e.g., server issues, rate limits)
    print(f"OpenAI API returned an API Error: {e}")
except openai.AuthenticationError as e:
    # Handle Authentication errors (e.g., invalid API key)
    print(f"OpenAI Authentication Error: {e}")
except Exception as e:
    # Handle other potential errors
    print(f"An unexpected error occurred: {e}")


Making API call to gpt-4o-mini...
API call successful.

AI Tutor (Turn 1):
Sure! In the context of Large Language Models (LLMs), a "token" is a basic unit of text that the model processes. Tokens can be words, parts of words, or even punctuation marks. 

Think of it this way: when you read a sentence, you break it down into smaller pieces to understand it better. Similarly, LLMs break down text into tokens to analyze and generate language.

For example, the sentence "I love apples!" might be broken down into the following tokens:
- "I"
- "love"
- "apples"
- "!"

In some cases, a token might represent just a part of a word, especially for longer or complex words. For instance, the word "unhappiness"

Token Usage (Turn 1): Prompt=46, Completion=150, Total=196
Finish Reason: length


In [18]:
# --- Block 3: Second Turn - Asking a Follow-up Question ---
print("\n--- Continuing Conversation: Turn 2 ---")

# Assume the first turn was successful and we have 'assistant_message_1'
# Define the user's second question, referencing the previous explanation
user_message_2 = {"role": "user", "content": "Thanks! So, based on your explanation, are common words like 'the' or 'is' usually single tokens?"}

# --- CRITICAL STEP: Update the message history ---
# Append the assistant's *previous* response to the history
messages_history.append(assistant_message_1)
# Append the user's *new* question to the history
messages_history.append(user_message_2)

print(f"\nSending updated messages: {messages_history}") # Notice how the list has grown


--- Continuing Conversation: Turn 2 ---

Sending updated messages: [{'role': 'system', 'content': 'You are a helpful AI Tutor explaining Large Language Model concepts simply.'}, {'role': 'user', 'content': "Can you explain what 'tokens' are in the context of LLMs, like I'm new to this?"}, ChatCompletionMessage(content='Sure! In the context of Large Language Models (LLMs), a "token" is a basic unit of text that the model processes. Tokens can be words, parts of words, or even punctuation marks. \n\nThink of it this way: when you read a sentence, you break it down into smaller pieces to understand it better. Similarly, LLMs break down text into tokens to analyze and generate language.\n\nFor example, the sentence "I love apples!" might be broken down into the following tokens:\n- "I"\n- "love"\n- "apples"\n- "!"\n\nIn some cases, a token might represent just a part of a word, especially for longer or complex words. For instance, the word "unhappiness"', refusal=None, role='assistant', a

In [19]:
# Parameters for the second call (could be the same or different)
# Let's make it slightly more deterministic for a factual answer
TEMPERATURE_2 = 0.2
MAX_TOKENS_2 = 100
# Using the same seed ensures the *entire conversation flow* is reproducible if inputs are identical
SEED_2 = 123

In [20]:
try:
    print(f"\nMaking API call to {MODEL} (Turn 2)...")
    completion_2 = client.chat.completions.create(
        model=MODEL,
        messages=messages_history, # Send the *full* history
        temperature=TEMPERATURE_2,
        max_tokens=MAX_TOKENS_2,
        seed=SEED_2
    )
    print("API call successful.")

    # --- Process the response from the second turn ---
    assistant_response_2 = completion_2.choices[0].message.content
    # We don't strictly need to save assistant_message_2 unless continuing the conversation

    print("\nAI Tutor (Turn 2):")
    print(assistant_response_2)

    # Print token usage for this call
    usage_2 = completion_2.usage
    print(f"\nToken Usage (Turn 2): Prompt={usage_2.prompt_tokens}, Completion={usage_2.completion_tokens}, Total={usage_2.total_tokens}")
    # Note: prompt_tokens for turn 2 will be larger as it includes the history from turn 1.
    finish_reason_2 = completion_2.choices[0].finish_reason
    print(f"Finish Reason: {finish_reason_2}")

except openai.APIError as e:
    print(f"OpenAI API returned an API Error: {e}")
except openai.AuthenticationError as e:
    print(f"OpenAI Authentication Error: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")


Making API call to gpt-4o-mini (Turn 2)...
API call successful.

AI Tutor (Turn 2):
Yes, that's correct! Common words like "the," "is," "and," and other short, frequently used words are typically represented as single tokens in most language models. These words are so common that they are often treated as standalone units.

However, it's important to note that the way text is tokenized can vary depending on the specific model and its tokenization method. Some models might use different strategies that could break down words differently, but generally, common words tend to be single tokens.

Token Usage (Turn 2): Prompt=228, Completion=99, Total=327
Finish Reason: stop


In [22]:
# # Example usage object from completion_1 or completion_2:
print(usage_1.prompt_tokens)  # -> number of input tokens
print(usage_1.completion_tokens) # -> number of output tokens
print(usage_1.total_tokens) # -> sum of both

46
150
196


In [26]:
# --- Block 4: Cost Calculation Function & Example ---

def calculate_cost(usage, input_price_per_mil, output_price_per_mil):
    """Calculates the cost of an API call based on token usage and prices.

    Args:
        usage: The usage object from the OpenAI completion response
               (e.g., completion.usage). It should have attributes
               'prompt_tokens' and 'completion_tokens'.
        input_price_per_mil: Cost in USD per 1 million input tokens.
        output_price_per_mil: Cost in USD per 1 million output tokens.

    Returns:
        The total cost in USD for the API call, or None if usage is invalid.
    """
    if not usage or not hasattr(usage, 'prompt_tokens') or not hasattr(usage, 'completion_tokens'):
        print("Warning: Invalid usage object provided for cost calculation.")
        return None

    input_cost = (usage.prompt_tokens / 1_000_000) * input_price_per_mil
    output_cost = (usage.completion_tokens / 1_000_000) * output_price_per_mil
    total_cost = input_cost + output_cost
    return total_cost

In [23]:
# --- Current Prices (April 2025) for GPT-4o-mini ---
PRICE_INPUT_PER_MIL = 0.60
PRICE_OUTPUT_PER_MIL = 2.40

print(f"\n--- Cost Calculations (GPT-4o-mini, April 2025 Rates) ---")
print(f"Prices: Input=${PRICE_INPUT_PER_MIL:.2f}/1M, Output=${PRICE_OUTPUT_PER_MIL:.2f}/1M")


--- Cost Calculations (GPT-4o-mini, April 2025 Rates) ---
Prices: Input=$0.60/1M, Output=$2.40/1M


In [27]:
# Calculate cost for Turn 1 (assuming completion_1 and usage_1 exist from Block 2)
try:
    if 'usage_1' in locals(): # Check if usage_1 variable exists
         cost_1 = calculate_cost(usage_1, PRICE_INPUT_PER_MIL, PRICE_OUTPUT_PER_MIL)
         if cost_1 is not None:
              print(f"\nCost for Turn 1:")
              print(f"  Prompt Tokens: {usage_1.prompt_tokens}, Completion Tokens: {usage_1.completion_tokens}")
              print(f"  Total Cost: ${cost_1:.8f}")
    else:
         print("\nSkipping Turn 1 cost calculation (usage_1 not found).")

    # Calculate cost for Turn 2 (assuming completion_2 and usage_2 exist from Block 3)
    if 'usage_2' in locals(): # Check if usage_2 variable exists
        cost_2 = calculate_cost(usage_2, PRICE_INPUT_PER_MIL, PRICE_OUTPUT_PER_MIL)
        if cost_2 is not None:
            print(f"\nCost for Turn 2:")
            print(f"  Prompt Tokens: {usage_2.prompt_tokens}, Completion Tokens: {usage_2.completion_tokens}")
            print(f"  Total Cost: ${cost_2:.8f}")
    else:
         print("\nSkipping Turn 2 cost calculation (usage_2 not found).")

    # Calculate total conversation cost
    if 'cost_1' in locals() and 'cost_2' in locals() and cost_1 is not None and cost_2 is not None:
        total_conversation_cost = cost_1 + cost_2
        print(f"\nTotal Conversation Cost (Turn 1 + Turn 2): ${total_conversation_cost:.8f}")

except NameError as e:
    print(f"\nCould not calculate costs, a required variable is missing: {e}")
except Exception as e:
    print(f"An error occurred during cost calculation: {e}")


Cost for Turn 1:
  Prompt Tokens: 46, Completion Tokens: 150
  Total Cost: $0.00038760

Cost for Turn 2:
  Prompt Tokens: 228, Completion Tokens: 99
  Total Cost: $0.00037440

Total Conversation Cost (Turn 1 + Turn 2): $0.00076200
