# Playing with Temperature and Top-p in Open AI's API

This Jupyter Notebook can be used to play with Open AI's logprob values, and is an addition to the blog post [Playing with Temperature and Top-p in Open AI's API](https://lundgren.io/posts/playing-with-temperature-and-top-p-in-open-ais-api/).

In [1]:
from dotenv import load_dotenv
import openai
import math
import pandas as pd
import os

# Either set this to a valid Open AI API key or make sure one is set in the .env file
open_ai_api_key = None

load_dotenv()

if "OPENAI_API_KEY" not in os.environ:
    if open_ai_api_key:
        os.environ["OPENAI_API_KEY"] = open_ai_api_key
    else:
        raise Exception(
            "OpenAI API key not found. Please set the OPENAI_API_KEY environment variable or provide a valid API key."
        )

Set `logprobs` to `True`, and `top_logprobs` to the amount of probabilities you want (between 1 and 20). Optionally, remove `max_tokens=1` to get more tokens. That will require an update to the rest of the code as well.

In [2]:
def fetch_logprobs(prompt, model="gpt-4o"):
    response = openai.chat.completions.create(
        model=model,
        messages=[
            {
                "role": "system",
                "content": prompt,
            },
        ],
        logprobs=True,
        top_logprobs=10,
        max_tokens=1,
    )
    
    return response.choices[0].logprobs.content[0].top_logprobs

This function will apply Temperature and Top-p filtering to the raw probabilities we get from `fetch_logprobs`, and return a Pandas data frame that can be inspected.

In [3]:
def apply_temperature_top_p(logprobs, temperature=1.0, top_p=1.0):
    original_probs = {}
    temperature_probs = {}

    # 1. Convert logprobs to probabilities and apply Temperature scaling
    for obj in logprobs:
        original_probs[obj.token] = math.exp(obj.logprob)
        temperature_probs[obj.token] = math.exp(obj.logprob / temperature)

    # 2. Normalize temperature-scaled probabilities to sum to 1
    total_prob = sum(temperature_probs.values())
    normalized_temperature_probs = {
        k: v / total_prob for k, v in temperature_probs.items()
    }

    # 3. Apply Top-p (nucleus) filtering
    cumulative_prob = 0
    top_p_filtered_probs = {}
    top_p_filter_status = {}
    cumulative_probs = {}

    for token, prob in normalized_temperature_probs.items():
        if cumulative_prob <= top_p:
            top_p_filtered_probs[token] = prob
            top_p_filter_status[token] = True
        else:
            top_p_filtered_probs[token] = 0
            top_p_filter_status[token] = False

        cumulative_prob += prob
        cumulative_probs[token] = cumulative_prob

    # 4. Re-normalize probabilities of the tokens that passed the Top-p filter
    total_prob = sum(top_p_filtered_probs.values())
    final_probs = {
        k: v / total_prob if v > 0 else 0 for k, v in top_p_filtered_probs.items()
    }

    return pd.DataFrame(
        {
            "Token": normalized_temperature_probs.keys(),
            "Original Probability": original_probs.values(),
            "After Temperature": normalized_temperature_probs.values(),
            "Cumulative Probability": cumulative_probs.values(),
            "Top-p Status": top_p_filter_status.values(),
            "Final Probability": final_probs.values(),
        }
    )

Finally, alter the prompt and the scenarios to see the different probabilities.

In [4]:
prompt = "The quick brown fox jumps over the"
logprobs = fetch_logprobs(prompt)


print("Scenario 1: Temperature=0.2, Top_p=0.5")
df1 = apply_temperature_top_p(logprobs, temperature=0.2, top_p=0.5)
display(df1)


print("\nScenario 2: Temperature=1.5, Top_p=0.95")
df2 = apply_temperature_top_p(logprobs, temperature=1.5, top_p=0.95)
display(df2)


Scenario 1: Temperature=0.2, Top_p=0.5


Unnamed: 0,Token,Original Probability,After Temperature,Cumulative Probability,Top-p Status,Final Probability
0,lazy,0.943006,0.9999997,1.0,True,1.0
1,The,0.04695,3.059023e-07,1.0,False,0.0
2,Sorry,0.004948,3.978958e-12,1.0,False,0.0
3,I'm,0.00086,6.30511e-16,1.0,False,0.0
4,Lazy,0.00067,1.806444e-16,1.0,False,0.0
5,l,0.000406,1.4828200000000002e-17,1.0,False,0.0
6,...,0.000316,4.248358e-18,1.0,False,0.0
7,the,0.000316,4.248358e-18,1.0,False,0.0
8,"""",0.000246,1.217175e-18,1.0,False,0.0
9,What,0.000246,1.217175e-18,1.0,False,0.0



Scenario 2: Temperature=1.5, Top_p=0.95


Unnamed: 0,Token,Original Probability,After Temperature,Cumulative Probability,Top-p Status,Final Probability
0,lazy,0.943006,0.828892,0.828892,True,0.857977
1,The,0.04695,0.112178,0.941071,True,0.116115
2,Sorry,0.004948,0.02503,0.966101,True,0.025909
3,I'm,0.00086,0.007795,0.973896,False,0.0
4,Lazy,0.00067,0.006598,0.980494,False,0.0
5,l,0.000406,0.004728,0.985221,False,0.0
6,...,0.000316,0.004002,0.989223,False,0.0
7,the,0.000316,0.004002,0.993225,False,0.0
8,"""",0.000246,0.003387,0.996613,False,0.0
9,What,0.000246,0.003387,1.0,False,0.0
