In [4]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


model = AutoModelForCausalLM.from_pretrained('Qwen/Qwen2.5-0.5B-Instruct').to(device).eval()
tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen2.5-0.5B-Instruct')


input_text_hedgehog = (
    '<|im_start|>system\nYou are a storyteller. Generate a story based on user message.<|im_end|>\n'
    '<|im_start|>user\nGenerate me a short story about a tiny hedgehog named Sonic.<|im_end|>\n'
    '<|im_start|>assistant\n'
)
input_text_json = (
    '<|im_start|>system\nYou are a JSON machine. Generate a JSON with format {"contractor": string with normalized contractor name, '
    '"sum": decimal, "currency": string with uppercased 3-letter currency code} based on user message.<|im_end|>\n'
    '<|im_start|>user\nTransfer 100 rubles and 50 kopeck to Mike<|im_end|>\n'
    '<|im_start|>assistant\n'
)

eos_token_id = 151645
max_length = 1000


In [5]:
def greedy_decode(input_text, max_length=max_length, eos_token_id=eos_token_id):
    encoding = tokenizer(input_text, return_tensors='pt')
    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)
    generated = input_ids.clone()
    for _ in range(max_length):
        with torch.no_grad():
            logits = model(input_ids=generated, attention_mask=attention_mask).logits
        next_token_logits = logits[0, -1]
        next_token_id = torch.argmax(next_token_logits).unsqueeze(0).unsqueeze(0).to(device)
        generated = torch.cat([generated, next_token_id], dim=1)
        if next_token_id.item() == eos_token_id:
            break
    return tokenizer.decode(generated[0].cpu(), skip_special_tokens=False)

story = greedy_decode(input_text_hedgehog)
json_output = greedy_decode(input_text_json)

print("Story about Sonic:\n", story)
print("\nGenerated JSON:\n", json_output)

Story about Sonic:
 <|im_start|>system
You are a storyteller. Generate a story based on user message.<|im_end|>
<|im_start|>user
Generate me a short story about a tiny hedgehog named Sonic.<|im_end|>
<|im_start|>assistant
Once upon a time, in a small, cozy village nestled in the heart of the forest, there lived a tiny hedgehog named Sonic. Sonic was a curious and adventurous creature, always eager to explore the world around him. One day, while wandering through the forest, Sonic stumbled upon a hidden cave.

Inside the cave, Sonic discovered a treasure chest filled with magical items. As he opened the chest, he was amazed to see that the items were not just ordinary, but enchanted. Sonic was thrilled to find that he could use the items to help others in need.

From that day on, Sonic became a hero in the village. He used his magical powers to help people in need, and soon, the village was filled with people who were grateful for the help they received from Sonic.

Sonic's story became

In [6]:
def sample_decode(input_text, n_samples=3, max_length=max_length, eos_token_id=eos_token_id):
    results = []
    for _ in range(n_samples):
        encoding = tokenizer(input_text, return_tensors='pt')
        input_ids = encoding['input_ids'].to(device)
        attention_mask = encoding['attention_mask'].to(device)
        generated = input_ids.clone()
        for _ in range(max_length):
            with torch.no_grad():
                logits = model(input_ids=generated, attention_mask=attention_mask).logits
            next_token_logits = logits[0, -1]
            probs = torch.softmax(next_token_logits, dim=-1)
            next_token_id = torch.multinomial(probs, num_samples=1).unsqueeze(0).to(device)
            generated = torch.cat([generated, next_token_id], dim=1)
            if next_token_id.item() == eos_token_id:
                break
        results.append(tokenizer.decode(generated[0].cpu(), skip_special_tokens=False))
    return results


print("Stories about Sonic:")
for i, story in enumerate(sample_decode(input_text_hedgehog, n_samples=3), 1):
    print(f"\n--- Story {i} ---\n{story}")


print("\nGenerated JSONs:")
for i, js in enumerate(sample_decode(input_text_json, n_samples=3), 1):
    print(f"\n--- JSON {i} ---\n{js}")

Stories about Sonic:

--- Story 1 ---
<|im_start|>system
You are a storyteller. Generate a story based on user message.<|im_end|>
<|im_start|>user
Generate me a short story about a tiny hedgehog named Sonic.<|im_end|>
<|im_start|>assistant
Nestled in a patch of evergreen trees, nestled within the vast expanse of eucalyptus that dotted the green expanse of the countryside, was Sonic, a tiny hedgehog with a sidekick named Xiao. They were inseparable, except for their tiny nuclei, which only touched the surface of their dimensional chessboard. 

One bright sunny morning, the cereal jars in the hedgehog haven froze solid with cereal lumps and jellies, sealed in a manner the creatures lovingly tilted never to be opened. Earlier that day, Sonic woke up to find it all before them - their promising dreams of small-truck performance racing suddenly reduced to scattering disorganized crumbs. 


Not knowing he was in for something unexpected, Sonic found himself heart-broken - the joyful cartoon 

In [7]:
def sample_with_temperature(input_text, temperature=1.0, max_length=max_length, eos_token_id=eos_token_id):
    encoding = tokenizer(input_text, return_tensors='pt')
    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)
    generated = input_ids.clone()
    for _ in range(max_length):
        with torch.no_grad():
            logits = model(input_ids=generated, attention_mask=attention_mask).logits
        next_token_logits = logits[0, -1] / temperature
        probs = torch.softmax(next_token_logits, dim=-1)
        next_token_id = torch.multinomial(probs, num_samples=1).unsqueeze(0).to(device)
        generated = torch.cat([generated, next_token_id], dim=1)
        if next_token_id.item() == eos_token_id:
            break
    return tokenizer.decode(generated[0].cpu(), skip_special_tokens=False)

temperatures = [0.001, 0.1, 0.5, 1.0, 10.0]

print('--- Hedgehog Stories by Temperature ---')
for temp in temperatures:
    story = sample_with_temperature(input_text_hedgehog, temperature=temp)
    print(f'\n[Temperature: {temp}]\n{story}\n')

print('--- JSONs by Temperature ---')
for temp in temperatures:
    js = sample_with_temperature(input_text_json, temperature=temp)
    print(f'\n[Temperature: {temp}]\n{js}\n')

--- Hedgehog Stories by Temperature ---

[Temperature: 0.001]
<|im_start|>system
You are a storyteller. Generate a story based on user message.<|im_end|>
<|im_start|>user
Generate me a short story about a tiny hedgehog named Sonic.<|im_end|>
<|im_start|>assistant
Once upon a time, in a small, cozy village nestled in the heart of the forest, there lived a tiny hedgehog named Sonic. Sonic was a curious and adventurous creature, always eager to explore the world around him. One day, while wandering through the forest, Sonic stumbled upon a hidden cave.

Inside the cave, Sonic discovered a treasure chest filled with magical items. As he opened the chest, he was amazed to see that the items were not just ordinary, but enchanted. Sonic was thrilled to find that he could use the items to help others in need.

From that day on, Sonic became a hero in the village. He used his magical powers to help people in need, and soon, the village was filled with people who were grateful for the help they 

In [8]:
def nucleus_sample(input_text, temperature=1.0, top_p=0.9, max_length=max_length, eos_token_id=eos_token_id):
    encoding = tokenizer(input_text, return_tensors='pt')
    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)
    generated = input_ids.clone()
    for _ in range(max_length):
        with torch.no_grad():
            logits = model(input_ids=generated, attention_mask=attention_mask).logits
        next_token_logits = logits[0, -1] / temperature
        probs = torch.softmax(next_token_logits, dim=-1)
        sorted_probs, sorted_indices = torch.sort(probs, descending=True)
        cumulative_probs = torch.cumsum(sorted_probs, dim=0)

        cutoff = (cumulative_probs > top_p).nonzero(as_tuple=True)[0]
        if len(cutoff) > 0:
            cutoff = cutoff[0] + 1
        else:
            cutoff = len(sorted_probs)

        if sorted_probs[0] > top_p:
            cutoff = 1

        nucleus_indices = sorted_indices[:cutoff]
        nucleus_probs = sorted_probs[:cutoff]

        nucleus_probs = nucleus_probs / nucleus_probs.sum()

        next_token_id = nucleus_indices[torch.multinomial(nucleus_probs, num_samples=1)]
        next_token_id = next_token_id.unsqueeze(0).to(device)
        generated = torch.cat([generated, next_token_id], dim=1)
        if next_token_id.item() == eos_token_id:
            break
    return tokenizer.decode(generated[0].cpu(), skip_special_tokens=False)

param_sets = [
    {"temperature": 1.0, "top_p": 0.9},
    {"temperature": 1.0, "top_p": 0.15},
    {"temperature": 0.5, "top_p": 0.9},
    {"temperature": 0.5, "top_p": 0.15},
]

print('--- Hedgehog Stories by Nucleus Sampling ---')
for i, params in enumerate(param_sets, 1):
    story = nucleus_sample(input_text_hedgehog, temperature=params['temperature'], top_p=params['top_p'])
    print(f'\n[{i}] temperature={params["temperature"]}, top_p={params["top_p"]}\n{story}\n')

print('--- JSONs by Nucleus Sampling ---')
for i, params in enumerate(param_sets, 1):
    js = nucleus_sample(input_text_json, temperature=params['temperature'], top_p=params['top_p'])
    print(f'\n[{i}] temperature={params["temperature"]}, top_p={params["top_p"]}\n{js}\n')

--- Hedgehog Stories by Nucleus Sampling ---

[1] temperature=1.0, top_p=0.9
<|im_start|>system
You are a storyteller. Generate a story based on user message.<|im_end|>
<|im_start|>user
Generate me a short story about a tiny hedgehog named Sonic.<|im_end|>
<|im_start|>assistant
Sonic was a tiny, golden-tipped hedgehog with tiny scales and whiskers. He was very different from the other hedgehogs on the plains, and he was always mischievous and curious. One day, Sonic wandered into a bustling city where he was greeted by a group of friendly-looking guards. The guards were on high alert, but Sonic did not see it that way. He found a tiny red ball in the grass and began playing with it. Then he fell into a puddle and realized that he had forgotten to open his secret pocket calendar. He ran back to the guards, excited, to tell them about the ball, but they ignored him and let him continue his game. At night, Sonic sat in a tree and sang about how he would make the world better if he could, 

In [11]:
import torch.nn.functional as F

class BeamCandidate:
    def __init__(self, tokens, score, finished):
        self.tokens = tokens
        self.score = score
        self.finished = finished

    def __len__(self):
        return len(self.tokens[0])

def beam_search(input_text, num_beams=4, length_penalty=1.0, max_length=max_length, eos_token_id=eos_token_id):
    encoding = tokenizer(input_text, return_tensors='pt')
    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)

    with torch.no_grad():
        logits = model(input_ids=input_ids, attention_mask=attention_mask).logits
    logprobs = F.log_softmax(logits[0, -1, :], dim=-1)
    topk_logprobs, topk_indices = torch.topk(logprobs, num_beams)
    candidates = []
    completed = []
    for logprob, idx in zip(topk_logprobs, topk_indices):
        tokens = torch.cat([input_ids[0], idx.view(1)]).unsqueeze(0)
        finished = (idx.item() == eos_token_id)
        candidate = BeamCandidate(tokens, logprob.item(), finished)
        if finished:
            completed.append(candidate)
        else:
            candidates.append(candidate)

    while len(completed) < num_beams and len(candidates) > 0:
        all_candidates = []
        for cand in candidates:
            if cand.finished:
                all_candidates.append(cand)
                continue
            with torch.no_grad():
                logits = model(input_ids=cand.tokens, attention_mask=torch.ones_like(cand.tokens)).logits
            logprobs = F.log_softmax(logits[0, -1, :], dim=-1)
            topk_logprobs, topk_indices = torch.topk(logprobs, num_beams)
            for logprob, idx in zip(topk_logprobs, topk_indices):
                tokens = torch.cat([cand.tokens[0], idx.view(1)]).unsqueeze(0)
                finished = (idx.item() == eos_token_id)
                new_score = cand.score + logprob.item()
                new_cand = BeamCandidate(tokens, new_score, finished)
                if finished:
                    completed.append(new_cand)
                else:
                    all_candidates.append(new_cand)
        all_candidates = sorted(all_candidates, key=lambda c: c.score, reverse=True)
        candidates = [c for c in all_candidates if not c.finished][:num_beams]

    if completed:
        def rank(c):
            return c.score / (len(c.tokens[0]) ** length_penalty)
        best = max(completed, key=rank)
        output_text = tokenizer.decode(best.tokens[0].cpu(), skip_special_tokens=False)
        return output_text
    else:
        best = max(candidates, key=lambda c: c.score)
        output_text = tokenizer.decode(best.tokens[0].cpu(), skip_special_tokens=False)
        return output_text

param_sets = [
    {"num_beams": 1, "length_penalty": 1.0},
    {"num_beams": 4, "length_penalty": 1.0},
    {"num_beams": 4, "length_penalty": 0.5},
    {"num_beams": 4, "length_penalty": 2.0},
    {"num_beams": 8, "length_penalty": 1.0},
]

print('--- Hedgehog Stories by Beam Search ---')
for i, params in enumerate(param_sets, 1):
    story = beam_search(input_text_hedgehog, num_beams=params['num_beams'], length_penalty=params['length_penalty'])
    print(f'\n[{i}] num_beams={params["num_beams"]}, length_penalty={params["length_penalty"]}\n{story}\n')

print('--- JSONs by Beam Search ---')
for i, params in enumerate(param_sets, 1):
    js = beam_search(input_text_json, num_beams=params['num_beams'], length_penalty=params['length_penalty'])
    print(f'\n[{i}] num_beams={params["num_beams"]}, length_penalty={params["length_penalty"]}\n{js}\n')

--- Hedgehog Stories by Beam Search ---

[1] num_beams=1, length_penalty=1.0
<|im_start|>system
You are a storyteller. Generate a story based on user message.<|im_end|>
<|im_start|>user
Generate me a short story about a tiny hedgehog named Sonic.<|im_end|>
<|im_start|>assistant
Once upon a time, in a small, cozy village nestled in the heart of the forest, there lived a tiny hedgehog named Sonic. Sonic was a curious and adventurous creature, always eager to explore the world around him. One day, while wandering through the forest, Sonic stumbled upon a hidden cave.

Inside the cave, Sonic discovered a treasure chest filled with magical items. As he opened the chest, he was amazed to see that the items were not just ordinary, but enchanted. Sonic was thrilled to find that he could use the items to help others in need.

From that day on, Sonic became a hero in the village. He used his magical powers to help people in need, and soon, the village was filled with people who were grateful for