# Exlopring Constrained Decoding

Constrained Decoding is a method that enforce structured outputs from llms. This is done on the logits level, the model is allowed to generate specific logits in specific order.

Its different from JSON-mode where llms are fine tuned to generate accurate json.
## Requirements
- we need to have access to logits from the api provider or use self hosted llms.
- openai supports logits

In [8]:
from openai import OpenAI
import tiktoken

First lets see how effective are logits.

In [9]:
# lets explore the logits for this sentence
sentence = "Once upon a time."
encoder = tiktoken.encoding_for_model("gpt-4o-mini")
token_ids = encoder.encode(sentence)
print(token_ids)

# time has the id 1058 with space
token = encoder.decode([1058])
print(token)
print(encoder.encode("time"))
print(encoder.encode(" time"))

[18049, 7557, 261, 1058, 13]
 time
[4580]
[1058]


In [10]:
system_prompt = "Your job is to finish user's sentences. Keep it to a single sentence."
user_message = "Once upon a"
messages = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": user_message}
]


In [11]:
from pprint import pprint
# without any restirctions
client = OpenAI()
resp = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=messages,
    logprobs=True
)
pprint(resp.choices[0].message.content)
pprint(resp.choices[0].logprobs.content)

('time, in a faraway land, there lived a brave young knight who dreamed of '
 'adventure.')
[ChatCompletionTokenLogprob(token='time', bytes=[116, 105, 109, 101], logprob=-0.00015633940347470343, top_logprobs=[]),
 ChatCompletionTokenLogprob(token=',', bytes=[44], logprob=-0.20142541825771332, top_logprobs=[]),
 ChatCompletionTokenLogprob(token=' in', bytes=[32, 105, 110], logprob=-0.010051299817860126, top_logprobs=[]),
 ChatCompletionTokenLogprob(token=' a', bytes=[32, 97], logprob=0.0, top_logprobs=[]),
 ChatCompletionTokenLogprob(token=' far', bytes=[32, 102, 97, 114], logprob=-1.1378127336502075, top_logprobs=[]),
 ChatCompletionTokenLogprob(token='away', bytes=[97, 119, 97, 121], logprob=-0.005545004736632109, top_logprobs=[]),
 ChatCompletionTokenLogprob(token=' land', bytes=[32, 108, 97, 110, 100], logprob=-0.3132808804512024, top_logprobs=[]),
 ChatCompletionTokenLogprob(token=',', bytes=[44], logprob=-0.0007227989844977856, top_logprobs=[]),
 ChatCompletionTokenLogprob(token='

In [15]:
resp = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=messages,
    logprobs=True,
    logit_bias={1058:-100, 4580: -100} # time can occur in two forms with a leading space or a single word without spaces -100 blocks these words
)
pprint(resp.choices[0].message.content)
pprint(resp.choices[0].logprobs.content)

('a midnight dreary, a weary traveler stumbled upon an ancient forest filled '
 'with secrets.')
[ChatCompletionTokenLogprob(token='a', bytes=[97], logprob=-7.625537872314453, top_logprobs=[]),
 ChatCompletionTokenLogprob(token=' midnight', bytes=[32, 109, 105, 100, 110, 105, 103, 104, 116], logprob=-15.375000953674316, top_logprobs=[]),
 ChatCompletionTokenLogprob(token=' dre', bytes=[32, 100, 114, 101], logprob=-0.005293054040521383, top_logprobs=[]),
 ChatCompletionTokenLogprob(token='ary', bytes=[97, 114, 121], logprob=-5.512236498361744e-07, top_logprobs=[]),
 ChatCompletionTokenLogprob(token=',', bytes=[44], logprob=0.0, top_logprobs=[]),
 ChatCompletionTokenLogprob(token=' a', bytes=[32, 97], logprob=-0.4927191436290741, top_logprobs=[]),
 ChatCompletionTokenLogprob(token=' weary', bytes=[32, 119, 101, 97, 114, 121], logprob=-2.674888849258423, top_logprobs=[]),
 ChatCompletionTokenLogprob(token=' traveler', bytes=[32, 116, 114, 97, 118, 101, 108, 101, 114], logprob=-0.000865249

In [41]:
# Case Two increase likely hood of generating 
system_prompt = "Guess what user might like. for one a cat also birds"
user_message = "which pet do i like?"
messages = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": user_message}
]
# lets block cat and increase chance of dogs
print(encoder.encode("cat"))
print(encoder.encode(" cat"))
pprint(encoder.encode("cats"))
print(encoder.encode(" cats"))
print(encoder.encode(" dog"))
print(encoder.encode("dog"))
print(encoder.encode(" dogs"))
print(encoder.encode("dogs"))

[8837]
[9059]
[70782]
[28854]
[6446]
[30146]
[16798]
[154045]


In [42]:
resp = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=messages,
    logprobs=True,
    logit_bias={
        6446: 10,
        30146: 10,
        8837: -100,
        9059:-100,
        70782: -100,
        28854:-100
    } # 1 is usually enough but for this case 5 for better enforcement 10 will break things
)
pprint(resp.choices[0].message.content)
pprint(resp.choices[0].logprobs.content)

('Based on your mention of liking a dog, a dog would likely be a pet you '
 'enjoy! If you have a dog, you might also appreciate dog-related activities '
 'or dog breeds.')
[ChatCompletionTokenLogprob(token='Based', bytes=[66, 97, 115, 101, 100], logprob=-0.03236684203147888, top_logprobs=[]),
 ChatCompletionTokenLogprob(token=' on', bytes=[32, 111, 110], logprob=0.0, top_logprobs=[]),
 ChatCompletionTokenLogprob(token=' your', bytes=[32, 121, 111, 117, 114], logprob=-0.10628903657197952, top_logprobs=[]),
 ChatCompletionTokenLogprob(token=' mention', bytes=[32, 109, 101, 110, 116, 105, 111, 110], logprob=-0.03819664195179939, top_logprobs=[]),
 ChatCompletionTokenLogprob(token=' of', bytes=[32, 111, 102], logprob=-6.635164754698053e-05, top_logprobs=[]),
 ChatCompletionTokenLogprob(token=' liking', bytes=[32, 108, 105, 107, 105, 110, 103], logprob=-0.5692939162254333, top_logprobs=[]),
 ChatCompletionTokenLogprob(token=' a', bytes=[32, 97], logprob=-3.1077194213867188, top_logprobs=[]