# ***Task 1***

In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained('Qwen/Qwen2.5-0.5B-Instruct').eval()
tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen2.5-0.5B-Instruct')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

def greedy_decode(model, tokenizer, input_text, max_new_tokens=1000, eos_token_id=151645):
    input_ids = tokenizer(input_text, return_tensors='pt').input_ids.to(model.device)
    generated = input_ids.clone()
    for i in range(max_new_tokens):
        logits = model(input_ids=generated).logits
        next_token_logits = logits[0, -1, :]
        next_token = torch.argmax(next_token_logits, dim=-1, keepdim=True)
        generated = torch.cat([generated, next_token.unsqueeze(0)], dim=1)
        if next_token.item() == eos_token_id:
            break
    return tokenizer.decode(generated[0], skip_special_tokens=False)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/659 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/988M [00:00<?, ?B/s]

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/7.30k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

In [2]:
print(device)

cuda


In [3]:
input_text_hedgehog = '<|im_start|>system\nYou are a storyteller. Generate a story based on user message.<|im_end|>\n<|im_start|>user\nGenerate me a short story about a tiny hedgehog named Sonic.<|im_end|>\n<|im_start|>assistant\n'
input_text_json = '<|im_start|>system\nYou are a JSON machine. Generate a JSON with format {"contractor": string with normalized contractor name, "sum": decimal, "currency": string with uppercased 3-letter currency code} based on user message.<|im_end|>\n<|im_start|>user\nTransfer 100 rubles and 50 kopeck to Mike<|im_end|>\n<|im_start|>assistant\n'

In [4]:
story = greedy_decode(model, tokenizer, input_text_hedgehog)
json_result = greedy_decode(model, tokenizer, input_text_json)

In [5]:
print("Сказка про ёжика:")
print(story)
print("\nСгенерированный JSON:")
print(json_result)

=== Сказка про ёжика ===
<|im_start|>system
You are a storyteller. Generate a story based on user message.<|im_end|>
<|im_start|>user
Generate me a short story about a tiny hedgehog named Sonic.<|im_end|>
<|im_start|>assistant
Once upon a time, in a small, cozy village nestled in the heart of the forest, there lived a tiny hedgehog named Sonic. Sonic was a curious and adventurous creature, always eager to explore the world around him. One day, while wandering through the forest, Sonic stumbled upon a hidden cave.

Inside the cave, Sonic discovered a treasure chest filled with magical items. As he opened the chest, he was amazed to see that the items were not just ordinary, but enchanted. Sonic was thrilled to find that he could use the items to help others in need.

From that day on, Sonic became a hero in the village. He used his magical powers to help people in need, and soon, the village was filled with people who were grateful for the help they received from Sonic.

Sonic's story b

***Выводы:***
1. Нет, greedy decoding всегда выбирает самый вероятный токен на каждом шаге, поэтому результат будет одинаковым при каждом запуске (если не меняется промпт и парычи).

2. Сказка: Текст получается однообразным, предсказуемым, не креативным. Все истории будут почти одинаковыми.

   JSON:
Обычно хорошо подходит для строгих форматов, но если модель ошиблась на каком-то шаге, исправить ошибку уже нельзя — результат всегда один и тот же, даже если он невалидный.



--------------------------------------------------------------------------------

# ***Task 2***

In [9]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_name = 'Qwen/Qwen2.5-0.5B-Instruct'
model = AutoModelForCausalLM.from_pretrained(model_name).to(device).eval()
tokenizer = AutoTokenizer.from_pretrained(model_name)

def sample_generate(model, tokenizer, input_text, max_new_tokens=1000, eos_token_id=151645, temperature=1.0, device='cpu'):
    model.eval()
    encoding = tokenizer(input_text, return_tensors='pt')
    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)
    generated = input_ids.clone()

    for i in range(max_new_tokens):
        with torch.no_grad():
            outputs = model(input_ids=generated, attention_mask=attention_mask)
            next_token_logits = outputs.logits[:, -1, :] / temperature
            probs = torch.softmax(next_token_logits, dim=-1)
            next_token_id = torch.multinomial(probs, num_samples=1)
        generated = torch.cat([generated, next_token_id], dim=-1)
        attention_mask = torch.cat([attention_mask, torch.ones_like(next_token_id, device=device)], dim=-1)
        if (next_token_id == eos_token_id).all():
            break
    return generated[0]

input_text_hedgehog = (
    '<|im_start|>system\nYou are a storyteller. Generate a story based on user message.<|im_end|>\n'
    '<|im_start|>user\nGenerate me a short story about a tiny hedgehog named Sonic.<|im_end|>\n'
    '<|im_start|>assistant\n')

input_text_json = (
    '<|im_start|>system\nYou are a JSON machine. Generate a JSON with format {"contractor": string with normalized contractor name, "sum": decimal, "currency": string with uppercased 3-letter currency code} based on user message.<|im_end|>\n'
    '<|im_start|>user\nTransfer 100 rubles and 50 kopeck to Mike<|im_end|>\n'
    '<|im_start|>assistant\n')

print("Сказки про ёжика:")
for i in range(3):
    output_ids = sample_generate(model, tokenizer, input_text_hedgehog, device=device)
    output_text = tokenizer.decode(output_ids, skip_special_tokens=False)
    print(f"\nStory {i+1}:\n{output_text}\n")

print("JSON генерации:")
for i in range(3):
    output_ids = sample_generate(model, tokenizer, input_text_json, device=device)
    output_text = tokenizer.decode(output_ids, skip_special_tokens=False)
    print(f"\nJSON {i+1}:\n{output_text}\n")

=== Сказки про ёжика ===

Story 1:
<|im_start|>system
You are a storyteller. Generate a story based on user message.<|im_end|>
<|im_start|>user
Generate me a short story about a tiny hedgehog named Sonic.<|im_end|>
<|im_start|>assistant
Once upon a time, in a big, vibrant forest, there lived a tiny hedgehog named Sonic. His name was cracked, but like any good little hedgehog, he knew it couldn't affect his other traits.

Sonic spent his days nidying in a cozy, snug hole in a tree, ready for anyone who might stumble by and catch a glimpse of him. This spot had been perfect until something happened. One day, Sonic's little pillow fell through his hole, lying among the leaves. The hole grew bigger, turning into a duffel bag and tossing him all over the forest.

With his breath ever-present and never looked at, a group of amusing bugs found him again, called the Fellaghat. They took him home, as they defined their territory - the holes they drew around Sonic. Little yellow puppets with ove

***Выводы:***
1. Да, генерации будут различаться, потому что на каждом шаге следующий токен выбирается случайно согласно вероятностному распределению, а не всегда берётся самый вероятный токен.

2. Сказка:

   Плюсы: разнообразие историй, креативность.

   Минусы: возможны бессмысленные или нелогичные фрагменты.

   JSON:

   Минусы: высокая вероятность получить невалидный JSON (синтаксические ошибки неправильные кавычки, отсутствие нужных полей), случайные значения, неструктурированный вывод. Такой способ плохо подходит для задач, где требуется строгий формат и точность.

--------------------------------------------------------------------------------

# ***Task 3***

In [10]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_name = 'Qwen/Qwen2.5-0.5B-Instruct'
model = AutoModelForCausalLM.from_pretrained(model_name).to(device).eval()
tokenizer = AutoTokenizer.from_pretrained(model_name)

def sample_generate(model, tokenizer, input_text, max_new_tokens=1000, eos_token_id=151645, temperature=1.0, device='cpu'):
    model.eval()
    encoding = tokenizer(input_text, return_tensors='pt')
    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)
    generated = input_ids.clone()

    for i in range(max_new_tokens):
        with torch.no_grad():
            outputs = model(input_ids=generated, attention_mask=attention_mask)
            next_token_logits = outputs.logits[:, -1, :] / temperature
            probs = torch.softmax(next_token_logits, dim=-1)
            next_token_id = torch.multinomial(probs, num_samples=1)
        generated = torch.cat([generated, next_token_id], dim=-1)
        attention_mask = torch.cat([attention_mask, torch.ones_like(next_token_id, device=device)], dim=-1)
        if (next_token_id == eos_token_id).all():
            break
    return generated[0]

input_text_hedgehog = (
    '<|im_start|>system\nYou are a storyteller. Generate a story based on user message.<|im_end|>\n'
    '<|im_start|>user\nGenerate me a short story about a tiny hedgehog named Sonic.<|im_end|>\n'
    '<|im_start|>assistant\n')

input_text_json = (
    '<|im_start|>system\nYou are a JSON machine. Generate a JSON with format {"contractor": string with normalized contractor name, "sum": decimal, "currency": string with uppercased 3-letter currency code} based on user message.<|im_end|>\n'
    '<|im_start|>user\nTransfer 100 rubles and 50 kopeck to Mike<|im_end|>\n'
    '<|im_start|>assistant\n')

temperatures = [0.001, 0.1, 0.5, 1.0, 10.0]
print("Сравнение генераций для температур \n")

for temp in temperatures:
    print(f" Температура: {temp} ")
    output_ids = sample_generate(model, tokenizer, input_text_hedgehog, temperature=temp, device=device)
    output_text = tokenizer.decode(output_ids, skip_special_tokens=False)
    print(f"Сказка:\n{output_text}\n")

    output_ids = sample_generate(model, tokenizer, input_text_json, temperature=temp, device=device)
    output_text = tokenizer.decode(output_ids, skip_special_tokens=False)
    print(f"JSON:\n{output_text}\n")

=== Сравнение генераций для разных температур ===

--- Температура: 0.001 ---
Сказка:
<|im_start|>system
You are a storyteller. Generate a story based on user message.<|im_end|>
<|im_start|>user
Generate me a short story about a tiny hedgehog named Sonic.<|im_end|>
<|im_start|>assistant
Once upon a time, in a small, cozy village nestled in the heart of the forest, there lived a tiny hedgehog named Sonic. Sonic was a curious and adventurous creature, always eager to explore the world around him. One day, while wandering through the forest, Sonic stumbled upon a hidden cave.

Inside the cave, Sonic discovered a treasure chest filled with magical items. As he opened the chest, he was amazed to see that the items were not just ordinary, but enchanted. Sonic was thrilled to find that he could use the items to help others in need.

From that day on, Sonic became a hero in the village. He used his magical powers to help people in need, and soon, the village was filled with people who were gra

***Выводы:***
- Чем меньше температура, тем более предсказуемый и однообразный текст.
- Чем выше температура, тем больше случайности, разнообразия, но и ошибок.
- Для строгих форматов (JSON, код) лучше низкая температура (0.001–0.1).
- Для креативных задач (сказки) — средняя (0.5–1.0).
- Очень высокая температура (>1.0) приводит к бессмысленному тексту.

--------------------------------------------------------------------------------

# ***Task 4***

In [11]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_name = 'Qwen/Qwen2.5-0.5B-Instruct'
model = AutoModelForCausalLM.from_pretrained(model_name).to(device).eval()
tokenizer = AutoTokenizer.from_pretrained(model_name)

def nucleus_sample_generate(model, tokenizer, input_text, max_new_tokens=1000, eos_token_id=151645, temperature=1.0, top_p=0.9, device='cpu'):
    model.eval()
    encoding = tokenizer(input_text, return_tensors='pt')
    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)
    generated = input_ids.clone()

    for i in range(max_new_tokens):
        with torch.no_grad():
            outputs = model(input_ids=generated, attention_mask=attention_mask)
            logits = outputs.logits[:, -1, :] / temperature
            probs = torch.softmax(logits, dim=-1)

            sorted_probs, sorted_indices = torch.sort(probs, descending=True)
            cumulative_probs = torch.cumsum(sorted_probs, dim=-1)
            mask = cumulative_probs <= top_p
            mask[..., 0] = True

            filtered_probs = torch.where(mask, sorted_probs, torch.zeros_like(sorted_probs))
            filtered_probs = filtered_probs / filtered_probs.sum(dim=-1, keepdim=True)
            next_token_in_sorted = torch.multinomial(filtered_probs, num_samples=1)
            next_token_id = sorted_indices.gather(-1, next_token_in_sorted)

        generated = torch.cat([generated, next_token_id], dim=-1)
        attention_mask = torch.cat([attention_mask, torch.ones_like(next_token_id, device=device)], dim=-1)
        if (next_token_id == eos_token_id).all():
            break
    return generated[0]

input_text_hedgehog = (
    '<|im_start|>system\nYou are a storyteller. Generate a story based on user message.<|im_end|>\n'
    '<|im_start|>user\nGenerate me a short story about a tiny hedgehog named Sonic.<|im_end|>\n'
    '<|im_start|>assistant\n')

input_text_json = (
    '<|im_start|>system\nYou are a JSON machine. Generate a JSON with format {"contractor": string with normalized contractor name, "sum": decimal, "currency": string with uppercased 3-letter currency code} based on user message.<|im_end|>\n'
    '<|im_start|>user\nTransfer 100 rubles and 50 kopeck to Mike<|im_end|>\n'
    '<|im_start|>assistant\n')

params = [
    {"temperature": 1.0, "top_p": 0.9},
    {"temperature": 1.0, "top_p": 0.15},
    {"temperature": 0.5, "top_p": 0.9},
    {"temperature": 0.5, "top_p": 0.15}]

for i, p in enumerate(params, 1):
    print(f" Параметры: temperature={p['temperature']}, top_p={p['top_p']} ")
    output_ids = nucleus_sample_generate(
        model, tokenizer, input_text_hedgehog,
        temperature=p['temperature'], top_p=p['top_p'], device=device)

    output_text = tokenizer.decode(output_ids, skip_special_tokens=False)
    print(f"Сказка:\n{output_text}\n")
    output_ids = nucleus_sample_generate(
        model, tokenizer, input_text_json,
        temperature=p['temperature'], top_p=p['top_p'], device=device)

    output_text = tokenizer.decode(output_ids, skip_special_tokens=False)
    print(f"JSON:\n{output_text}\n")

=== Параметры: temperature=1.0, top_p=0.9 ===
Сказка:
<|im_start|>system
You are a storyteller. Generate a story based on user message.<|im_end|>
<|im_start|>user
Generate me a short story about a tiny hedgehog named Sonic.<|im_end|>
<|im_start|>assistant
Once upon a time, in a lush, green forest, there lived a tiny hedgehog named Sonic. Sonic was small and inconspicuous, yet he had a unique way of looking out for his friends. One day, while walking through the dense forest, Sonic noticed a fallen log that he had come across and had forgotten about.

He found it and decided to help it by using his sharp claws to poke it, hoping to catch some bugs or plants. As he worked, he accidentally scraped his front legs, feeling sore and uncomfortable. But to his surprise, the log was quite comfortable, offering him shelter and a place to rest.

As night fell, Sonic decided to go to sleep and wake up earlier for a chance to get more bugs and plants. But just as he was about to leave, he remembere

***Выводы:***
1. Как отличаются генерации с разными параметрами?
- temperature=1, top_p=0.9: разнообразный, креативный текст, но иногда бывают ошибки.
- temperature=1, top_p=0.15: текст становится более предсказуемым, меньше случайных вставок, но иногда теряется креативность.
- temperature=0.5, top_p=0.9: текст более сдержанный, но всё ещё разнообразный, меньше ошибок.
- temperature=0.5, top_p=0.15: текст очень предсказуемый, почти всегда одинаковый, мало креатива.

2. Помог ли nucleus sampling исправить проблемы простого sampling?
- Да, nucleus sampling позволяет контролировать баланс между разнообразием и предсказуемостью, убирает совсем маловероятные (часто бессмысленные) токены, что особенно полезно для формальных задач (например, генерация JSON).

--------------------------------------------------------------------------------

# ***Task 5***

In [12]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch.nn.functional as F

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_name = 'Qwen/Qwen2.5-0.5B-Instruct'
model = AutoModelForCausalLM.from_pretrained(model_name).to(device).eval()
tokenizer = AutoTokenizer.from_pretrained(model_name)

def beam_search_generate(model, tokenizer, input_text, num_beams=4, length_penalty=1.0, max_new_tokens=1000, eos_token_id=151645, device='cpu'):
    model.eval()
    encoding = tokenizer(input_text, return_tensors='pt')
    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)
    Candidate = lambda tokens, score, finished: {'tokens': tokens, 'score': score, 'finished': finished}

    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits[:, -1, :]
        logprobs = F.log_softmax(logits, dim=-1)
        topk_logprobs, topk_indices = torch.topk(logprobs, num_beams, dim=-1)

    candidates = []
    finished = []

    for i in range(num_beams):
        next_token = topk_indices[0, i].unsqueeze(0).unsqueeze(0)
        tokens = torch.cat([input_ids, next_token], dim=-1)
        score = topk_logprobs[0, i].item()
        is_finished = (next_token.item() == eos_token_id)
        candidate = Candidate(tokens, score, is_finished)
        if is_finished:
            finished.append(candidate)
        else:
            candidates.append(candidate)

    while candidates and len(finished) < num_beams:
        all_candidates = []
        for cand in candidates:
            with torch.no_grad():
                outputs = model(input_ids=cand['tokens'], attention_mask=torch.ones_like(cand['tokens']))
                logits = outputs.logits[:, -1, :]
                logprobs = F.log_softmax(logits, dim=-1)
                topk_logprobs, topk_indices = torch.topk(logprobs, num_beams, dim=-1)
            for i in range(num_beams):
                next_token = topk_indices[0, i].unsqueeze(0).unsqueeze(0)
                tokens = torch.cat([cand['tokens'], next_token], dim=-1)
                score = cand['score'] + topk_logprobs[0, i].item()
                is_finished = (next_token.item() == eos_token_id)
                candidate = Candidate(tokens, score, is_finished)
                if is_finished:
                    finished.append(candidate)
                else:
                    all_candidates.append(candidate)

        all_candidates = sorted(all_candidates, key=lambda x: x['score'], reverse=True)
        candidates = all_candidates[:num_beams]

        if len(finished) >= num_beams:
            break

        if candidates and candidates[0]['tokens'].shape[1] - input_ids.shape[1] >= max_new_tokens:
            finished.extend(candidates)
            break

    def rank_score(cand):
        length = cand['tokens'].shape[1] - input_ids.shape[1]
        return cand['score'] / (length ** length_penalty if length_penalty != 0 else 1.0)

    if finished:
        best = max(finished, key=rank_score)
    else:
        best = max(candidates, key=rank_score)

    return best['tokens'][0]

input_text_hedgehog = (
    '<|im_start|>system\nYou are a storyteller. Generate a story based on user message.<|im_end|>\n'
    '<|im_start|>user\nGenerate me a short story about a tiny hedgehog named Sonic.<|im_end|>\n'
    '<|im_start|>assistant\n')

input_text_json = (
    '<|im_start|>system\nYou are a JSON machine. Generate a JSON with format {"contractor": string with normalized contractor name, "sum": decimal, "currency": string with uppercased 3-letter currency code} based on user message.<|im_end|>\n'
    '<|im_start|>user\nTransfer 100 rubles and 50 kopeck to Mike<|im_end|>\n'
    '<|im_start|>assistant\n')

params = [
    {"num_beams": 1, "length_penalty": 1.0},
    {"num_beams": 4, "length_penalty": 1.0},
    {"num_beams": 4, "length_penalty": 0.5},
    {"num_beams": 4, "length_penalty": 2.0},
    {"num_beams": 8, "length_penalty": 1.0}]

for i, p in enumerate(params, 1):
    print(f"Параметры: num_beams={p['num_beams']}, length_penalty={p['length_penalty']} ")

    output_ids = beam_search_generate(
        model, tokenizer, input_text_hedgehog,
        num_beams=p['num_beams'], length_penalty=p['length_penalty'], device=device)
    output_text = tokenizer.decode(output_ids, skip_special_tokens=False)
    print(f"Сказка:\n{output_text}\n")

    output_ids = beam_search_generate(
        model, tokenizer, input_text_json,
        num_beams=p['num_beams'], length_penalty=p['length_penalty'], device=device)
    output_text = tokenizer.decode(output_ids, skip_special_tokens=False)
    print(f"JSON:\n{output_text}\n")

=== Параметры: num_beams=1, length_penalty=1.0 ===
Сказка:
<|im_start|>system
You are a storyteller. Generate a story based on user message.<|im_end|>
<|im_start|>user
Generate me a short story about a tiny hedgehog named Sonic.<|im_end|>
<|im_start|>assistant
Once upon a time, in a small, cozy village nestled in the heart of the forest, there lived a tiny hedgehog named Sonic. Sonic was a curious and adventurous creature, always eager to explore the world around him. One day, while wandering through the forest, Sonic stumbled upon a hidden cave.

Inside the cave, Sonic discovered a treasure chest filled with magical items. As he opened the chest, he was amazed to see that the items were not just ordinary, but enchanted. Sonic was thrilled to find that he could use the items to help others in need.

From that day on, Sonic became a hero in the village. He used his magical powers to help people in need, and soon, the village was filled with people who were grateful for the help they rec

***Выводы:***
1. Как отличаются результаты с разными параметрами?
- num_beams=1, length_penalty=1.0: это обычный greedy search, текст детерминированный, часто короткий.
- num_beams=4, length_penalty=1.0: больше разнообразия, выше шанс получить более логичную и завершённую последовательность.
- num_beams=4, length_penalty=0.5: поощряет более длинные ответы, текст может быть избыточным.
- num_beams=4, length_penalty=2.0: поощряет короткие ответы, текст может быть слишком кратким.
- num_beams=8, length_penalty=1.0: ещё больше вариантов, выше шанс найти оптимальную последовательность, но выше затраты.

2. Есть ли закономерность при увеличении/уменьшении num_beams и length_penalty?
- Большее num_beams = больше вариантов, выше качество, но медленнее.
- Меньшее length_penalty = длиннее ответы, больше — короче.

3. Помог ли beam search исправить проблемы greedy decoding?
- Да, beam search позволяет находить более вероятные и логичные последовательности, избегая "жадных" локальных максимумов.

4. Для каких задач beam search лучше nucleus sampling?
- Для задач, где важна максимальная правдоподобность и структурированность (перевод, резюме, генерация кода).

--------------------------------------------------------------------------------