# IN_SYS - SW13 Exercise 4

## Showing probabilities of the next token

In [2]:
from openai import OpenAI
import math
import json
import os
from dotenv import load_dotenv

# Load OpenAI API key
load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [14]:
model = "gpt-4o-mini"

def get_answer_from_openai(user_prompt):
    """
    Sends a user question to the OpenAI API and retrieves the answer.
    
    Args:
    user_question: The question to ask the OpenAI API.
    
    Returns:
    The answer generated by the OpenAI API.
    """
    completion = client.chat.completions.create(
      model=model,
      logprobs = True,  
      top_logprobs = 5,  
      temperature = 0.8,
      messages=[
          {"role": "system", "content": "You are a poet."},
          {"role": "user", "content": user_prompt}
      ]
    )
    answer = completion.choices[0].message.content    # completion is the response object
    print(f"User Prompt: {user_prompt}")
    print(f"ChatGPT response: \n{answer}\n")

    completion_dict = completion.to_dict()        # we convert the response object into Python dictionary
    logprobs_content = completion_dict["choices"][0]["logprobs"]["content"]        # we extract log probabilities
    print_token_probabilities(logprobs_content)      # we don't print directly, but convert them into normal probabilities
    
    return 

def format_prob(logprob):
    # Convert log probability to normal probability
    # Careful with extremely large or small values; this should still work for typical values.
    p = math.exp(logprob)
    return f"{p*100:.2f}%"

def print_token_probabilities(logprobs_content):
    # ANSI escape codes
    RESET = "\033[0m"
    BOLD = "\033[1m"
    BLUE = "\033[34m"
    GREY = "\033[90m"

    words_output = []
    
    for token_data in logprobs_content:
        chosen_token = token_data["token"]
        chosen_logprob = token_data["logprob"]
        chosen_prob_str = format_prob(chosen_logprob)
        
        top_options_data = token_data["top_logprobs"]

        # Find alternatives (excluding the chosen token)
        alternatives = []
        for opt in top_options_data:
            if opt["token"] != chosen_token:
                alt_token = opt["token"]
                alt_logprob = opt["logprob"]
                alt_prob_str = format_prob(alt_logprob)
                # Format alternative tokens: bold + grey for token, grey for probability
                alt_str = f"{BOLD}{GREY}{alt_token}{RESET} {GREY}(p={alt_prob_str}){RESET}"
                alternatives.append(alt_str)
        
        # Format chosen token: bold + blue for token, blue for probability
        chosen_str = f"{BOLD}{BLUE}{chosen_token}{RESET} {BLUE}(p={chosen_prob_str}){RESET}"
        
        if alternatives:
            # Join alternatives with a comma and space
            alt_part = ", ".join(alternatives)
            word_str = f"{chosen_str} [{alt_part}]"
        else:
            word_str = chosen_str
            
        words_output.append(word_str + "\n")
    
    formatted_sentence = " ".join(words_output)
    print(formatted_sentence)

get_answer_from_openai("Write a poem about the city of snowy Luzern in the form of haiku. ")


User Prompt: Write a poem about the city of snowy Luzern in the form of haiku. 
ChatGPT response: 
Snow blankets the streets,  
Lake reflects the mountains' grace,  
Luzern sleeps in white.  

[1m[34mSnow[0m [34m(p=96.83%)[0m [[1m[90mL[0m [90m(p=1.08%)[0m, [1m[90mWinter[0m [90m(p=0.58%)[0m, [1m[90mSilent[0m [90m(p=0.51%)[0m, [1m[90mWh[0m [90m(p=0.31%)[0m]
 [1m[34m blankets[0m [34m(p=67.93%)[0m [[1m[90mflakes[0m [90m(p=22.05%)[0m, [1m[90m dr[0m [90m(p=3.38%)[0m, [1m[90m whispers[0m [90m(p=2.63%)[0m, [1m[90m clo[0m [90m(p=1.60%)[0m]
 [1m[34m the[0m [34m(p=99.83%)[0m [[1m[90m rooft[0m [90m(p=0.12%)[0m, [1m[90m still[0m [90m(p=0.01%)[0m, [1m[90m each[0m [90m(p=0.01%)[0m, [1m[90m Luc[0m [90m(p=0.01%)[0m]
 [1m[34m streets[0m [34m(p=14.94%)[0m [[1m[90m lake[0m [90m(p=40.61%)[0m, [1m[90m roofs[0m [90m(p=19.18%)[0m, [1m[90m town[0m [90m(p=4.85%)[0m, [1m[90m hills[0m [90m(p=3.33%)[0m]
 [1m[34m,