In [2]:
import os
import re
import warnings
import random
from collections import defaultdict
from typing import Dict, List, Tuple

import torch
import torch.nn.functional as F
import numpy as np
import numpy as np
import torch
from tqdm.notebook import tqdm
from transformers import GPT2LMHeadModel, GPT2Tokenizer

warnings.filterwarnings("ignore")

In [3]:
def seed_everything(seed: int):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(42)

## Задание

1) Реализовать методы `greedy_sampling` и `generate` (1 балл)
2) Реализовать метод `random_sampling` и поддержать его в `generate` (1 балл)
3) Реализовать метод `_beam_search_generate` и поддержать его в `generate` (2 балла)
4) Реализовать методы `apply_top_p`, `apply_top_k`, `apply_temperature` и поддержать их в `generate` (1 балл)  
Все методы необходимо реализовать через векторные операции в torch/numpy везде где это возможно

In [28]:
EPS = 1e-5

class Model:
    def __init__(self, model_name: str = "gpt2"):
        self.model = GPT2LMHeadModel.from_pretrained(model_name)
        self.tokenizer = GPT2Tokenizer.from_pretrained(model_name)
        self.tokenizer.pad_token = self.tokenizer.eos_token
        self.vocab_size = self.tokenizer.vocab_size
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)

    def greedy_sampling(self, logits: torch.Tensor) -> int:
        return torch.argmax(logits, dim=-1).item()

    def random_sampling(self, logits: torch.Tensor) -> int:
        probs = F.softmax(logits, dim=-1)
        return torch.multinomial(probs, 1).item()

    def _beam_search_generate(
        self,
        prompt: str,
        max_length: int,
        num_beams: int
    ) -> str:
        # токенизация промпта
        input_ids = self.tokenizer.encode(prompt, return_tensors="pt")
        # (sequence, score)
        beams = [(input_ids, 0.0)]
        
        for _ in range(max_length - len(input_ids[0])):
            new_beams = []
            
            for beam_seq, beam_score in beams:
                # вычисление логитов для последнего токена
                with torch.no_grad():
                    outputs = self.model(beam_seq)
                    next_token_logits = outputs.logits[:, -1, :]
                
                probs = F.softmax(next_token_logits, dim=-1)
                
                # выбор топ-K кандидатов
                topk_probs, topk_indices = torch.topk(probs[0], num_beams * 2)
                
                for prob, token_id in zip(topk_probs, topk_indices):
                    new_seq = torch.cat([beam_seq[0], token_id.unsqueeze(0)]).unsqueeze(0)
                    new_score = beam_score + torch.log(prob).item()
                    new_beams.append((new_seq, new_score))
            
            # сортировка по score и выбор топ-K
            new_beams.sort(key=lambda x: x[1], reverse=True)
            beams = new_beams[:num_beams]
            
            all_finished = all(beam_seq[0, -1] == self.tokenizer.eos_token_id for beam_seq, _ in beams)
            if all_finished:
                break
        
        # выбор лучшей последовательности
        best_sequence, best_score = max(beams, key=lambda x: x[1])
        return self.tokenizer.decode(best_sequence[0], skip_special_tokens=True)

    def apply_temperature(self, logits: torch.Tensor, temperature: float = 1.0) -> torch.Tensor:
        if temperature <= 0:
            raise ValueError("Temperature must be positive")
        return logits / temperature

    def _apply_top_k(self, logits: torch.Tensor, top_k: float = None) -> torch.Tensor:
        if top_k is None or top_k >= self.vocab_size:
            return logits
        
        # поиск k-го по величине значения для каждого примера
        top_k_values = torch.topk(logits, top_k, dim=-1).values
        kth_value = top_k_values[:, -1]
        
        # маска с True для всех значений меньше k-го
        mask = logits < kth_value.unsqueeze(-1)
        # замена маленьких по маске значения на -бесконечность
        logits[mask] = float('-inf')
    
        return logits

    def _apply_top_p(self, logits: torch.Tensor, top_p: float = 1.0) -> torch.Tensor:
        if top_p >= 1.0:
            return logits
        
        # сортировка вероятностей
        probs = F.softmax(logits, dim=-1)
        sorted_probs, sorted_indices = torch.sort(probs, descending=True, dim=-1)
        cumulative_probs = torch.cumsum(sorted_probs, dim=-1)
        
        # маска для токенов вне top-p
        sorted_mask = cumulative_probs > top_p
        # сдвиг маски для вкл первого токена, превышающего порог
        sorted_mask[:, 1:] = sorted_mask[:, :-1].clone()
        sorted_mask[:, 0] = 0
        
        # восстановление исходного порядка и создание маски
        mask = torch.zeros_like(logits, dtype=torch.bool)
        mask.scatter_(1, sorted_indices, sorted_mask)
        
        # отфильтрованные логиты заменяются на -бесконечность
        filtered_logits = logits.masked_fill(mask, float('-inf'))
        return filtered_logits

    def generate(
        self,
        prompt: str,
        max_length: int = 50,
        strategy: str = "greedy",
        temperature: float = 1.0,
        top_k: int = 0,
        top_p: float = 1.0,
        num_beams: int = 3
    ) -> str:
        if strategy == "beam_search":
            return self._beam_search_generate(prompt, max_length, num_beams)
        
        # токенизация промта
        input_ids = self.tokenizer.encode(prompt, return_tensors="pt")
        generated = input_ids.clone()

        for _ in range(max_length - len(input_ids[0])):
            # вычисление логитов для последнего токена
            with torch.no_grad():
                outputs = self.model(generated)
                next_token_logits = outputs.logits[:, -1, :]

            if abs(temperature) > EPS:
                next_token_logits = self.apply_temperature(next_token_logits, temperature)
            else:
                strategy = "greedy"

            if top_k > 0:
                next_token_logits = self._apply_top_k(next_token_logits, top_k)

            if top_p < 1.0:
                next_token_logits = self._apply_top_p(next_token_logits, top_p)

            if strategy == "greedy":
                next_token_id = self.greedy_sampling(next_token_logits)
            elif strategy == "random":
                next_token_id = self.random_sampling(next_token_logits)
            else:
                raise ValueError(f"Unknown strategy: {strategy}")
            
            # добавление токена к последовательности
            next_token = torch.tensor([[next_token_id]])
            generated = torch.cat([generated, next_token], dim=1)

            # достижение конца текста
            if next_token_id == self.tokenizer.eos_token_id:
                break

        return self.tokenizer.decode(generated[0], skip_special_tokens=True)


In [30]:
# Продемонстрируйте результат работы `generate` при различных параметрах
model = Model()

In [22]:
prompt = "Artificial intelligence has made great strides forward. This is"

Greedy sampling

In [23]:
result = model.generate(prompt, strategy="greedy", max_length=30)
print(result)

Artificial intelligence has made great strides forward. This is a great achievement, but it is not the only one.

The next step is to


Random sampling

In [24]:
result = model.generate(prompt, strategy="random", max_length=30)
print(result)

Artificial intelligence has made great strides forward. This is partly because intelligent machines nowadays can be tweaked more rapidly than humans. Other problems may be much smaller


Random sampling + temp=2

In [25]:
result = model.generate(prompt, strategy="random", temperature=2.0, max_length=30)
print(result)

Artificial intelligence has made great strides forward. This is happens bacon∴ fil)." >>> Once It AC Jobs Thomas Conglees Sal Meielve Dal Griffin


Random sampling + temp=0.5

In [26]:
result = model.generate(prompt, strategy="random", temperature=0.5, max_length=30)
print(result)

Artificial intelligence has made great strides forward. This is because it is able to find and understand patterns in the world and to find and understand patterns in


Random sampling + top_k=10

In [31]:
result = model.generate(prompt, strategy="random", top_k=10, max_length=30)
print(result)

Artificial intelligence has made great strides forward. This is a great thing for our nation," said the President.

But there's another side to


Random sampling + top_p=0.8

In [32]:
result = model.generate(prompt, strategy="random", top_p=0.8, max_length=30)
print(result)

Artificial intelligence has made great strides forward. This is true because in recent years, our methods have improved and we have more information to apply to more


Beam search (num_beams=3)

In [33]:
result = model.generate(prompt, strategy="beam_search", max_length=30, num_beams=3)
print(result)

Artificial intelligence has made great strides forward. This is not to say that AI is going to be the next big thing, but it's clear that


Комбинации параметров

In [34]:
result = model.generate(
    prompt, 
    strategy="random", 
    temperature=0.7, 
    top_k=8, 
    top_p=0.95, 
    max_length=50
)
print(result)

Artificial intelligence has made great strides forward. This is the first time in the history of the world that a single AI is able to predict the future, and this is a big step forward for the human race.

The AI that we know


In [35]:
prompt1 = "And they tell us that the leg is shorter than the hypotenuse. And I tell you: Enough!"

In [39]:
result = model.generate(
    prompt1, 
    strategy="random", 
    temperature=1.5, 
    top_k=8, 
    max_length=100
)
print(result)

And they tell us that the leg is shorter than the hypotenuse. And I tell you: Enough!

But they don't know what's happening to the other one, because you can see how far the leg will fall off in the ground, so you can't really tell what is actually going on here.

The leg that's going up, you have the leg. You have the knee. You have the back of the thigh that's just above the elbow where it can


In [40]:
result = model.generate(
    prompt1, 
    strategy="greedy",
    max_length=100
)
print(result)

And they tell us that the leg is shorter than the hypotenuse. And I tell you: Enough!

I'm not saying that the leg is shorter than the hypotenuse. I'm saying that the leg is shorter than the hypotenuse.

I'm not saying that the leg is shorter than the hypotenuse.

I'm not saying that the leg is shorter than the hypotenuse.

I'm not saying that the leg is shorter than the


In [41]:
result = model.generate(
    prompt1, 
    strategy="random", 
    temperature=1.2, 
    top_p=0.6, 
    max_length=100
)
print(result)

And they tell us that the leg is shorter than the hypotenuse. And I tell you: Enough!

The other issue is that the final table shows an astonishing amount of subtle differences in surface area. The pentagonal paper covers only 3.5-inches across. Because the backboard is long, this leaves little room for slightly over 10 feet of board space. But it also leaves room for nearly 2 feet of gain. The foot-depth was 28 inches and not even 3.
