In [2]:
import os
import json
import numpy as np
from dataclasses import dataclass

import torch
import torch.nn.functional as F
from torch.serialization import safe_globals
from transformers import AutoTokenizer

from src.reflex_model import GPT, Block, Attention, RMSNorm, MLP, SwiGLU

In [3]:
import torch._dynamo
torch._dynamo.config.suppress_errors = True

In [None]:
main_path = os.getcwd()

In [4]:
@dataclass
class GPTConfig:
    batch_size: int = 8
    block_size: int = 2048
    vocab_size: int = 50257
    n_layer: int = 6
    n_head: int = 8
    n_embd: int = 1536
    dropout: float = 0.1
    bias: bool = True
    local_files_only: bool=True

In [5]:
config = GPTConfig()
config.pretrained_model_path = os.path.join(main_path, 'rugpt/ckpt_4000.pt')
config.init_type = 'load_pretrained'

In [6]:
allowed_classes = [GPTConfig, GPT, Block, Attention, RMSNorm, MLP, SwiGLU]    
with safe_globals(allowed_classes):
    model = GPT(config)

Loaded trained weights


In [10]:
model = model.cuda().eval()

In [11]:
temperature = 0.8
max_new_tokens = 200
top_k = 20

In [12]:
tokenizer = AutoTokenizer.from_pretrained(
    os.path.join(main_path, "models/rugpt"),
    use_fast=True
)
tokenizer.pad_token = tokenizer.eos_token
tokenized = tokenizer(tokenizer.bos_token)['input_ids'] + tokenizer(tokenizer.eos_token)['input_ids']
cur_pad_token = tokenizer('#')['input_ids'][0]

In [19]:
text = tokenizer.bos_token + '### Вопрос: я люблю россию. скажи, кто лучший российский писатель? ### Ответ:'
tokens = tokenizer(text, padding='max_length', max_length=tokenizer.model_max_length)['input_ids']
idx = torch.tensor([tokens]).cuda()

In [20]:
output = model.generate(idx, max_new_tokens, tokenized, cur_pad_token, temperature, top_k)
out = tokenizer.convert_ids_to_tokens(output[0], skip_special_tokens=True)
res = tokenizer.convert_tokens_to_string(out)

In [21]:
print(res)

### Вопрос: я люблю россию. скажи, кто лучший российский писатель? ### Ответ: Кто-нибудь Я##
