In [1]:
import os
import sys

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig, LoraModel, get_peft_model

model_name = 'ai-forever/rugpt3medium_based_on_gpt2'

sys.path.append(os.path.abspath('../'))


In [2]:
model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=model_name)
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name)

In [3]:
model.save_pretrained('.')

In [9]:
tokenizer.save_pretrained('.')

('.\\tokenizer_config.json',
 '.\\special_tokens_map.json',
 '.\\vocab.json',
 '.\\merges.txt',
 '.\\added_tokens.json',
 '.\\tokenizer.json')

In [3]:
total_memory = 0
for param in model.parameters():
    total_memory += param.numel() * param.element_size()

print(f"Общий объём памяти весов: {total_memory / (1024 ** 2):.2f} МБ")

Общий объём памяти весов: 477.72 МБ


In [4]:
sum(p.numel() for p in model.parameters())

125231616

In [5]:
lora_config = LoraConfig(
    r=8,
    target_modules=['c_attn'], 
    lora_alpha=16,
    lora_dropout=0.1,
    bias='none',
    fan_in_fan_out=True,
    task_type="CAUSAL_LM",
)

lora_model = get_peft_model(model, lora_config, adapter_name='adptr')

In [6]:
lora_model.print_trainable_parameters()

trainable params: 294,912 || all params: 125,526,528 || trainable%: 0.2349


In [7]:

inputs = tokenizer('Вышел заяц', return_tensors="pt", padding=True, truncation=True)

with torch.no_grad():
    outputs = model(**inputs)

print(outputs.logits.shape)  # должно быть [batch_size, seq_len, vocab_size]

torch.Size([1, 4, 50264])


In [8]:
tokenizer.decode([  618, 12090,   280,   370,   430])

'Анекдот 1'

In [35]:
model(torch.LongTensor(tokenizer.encode('привет, как дела')))

IndexError: too many indices for tensor of dimension 1

In [29]:
tokenizer.decode(tokenizer.encode('привет'))

'привет'

In [32]:
tokenizer.encode('привет, как дела')

[960, 577, 16, 428, 1101]

In [51]:
lora_model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): GPT2LMHeadModel(
      (transformer): GPT2Model(
        (wte): Embedding(50257, 1024)
        (wpe): Embedding(2048, 1024)
        (drop): Dropout(p=0.1, inplace=False)
        (h): ModuleList(
          (0-23): 24 x GPT2Block(
            (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
            (attn): GPT2Attention(
              (c_attn): lora.Linear(
                (base_layer): Conv1D(nf=3072, nx=1024)
                (lora_dropout): ModuleDict(
                  (adptr): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (adptr): Linear(in_features=1024, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (adptr): Linear(in_features=8, out_features=3072, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
            

In [52]:
type(lora_model)

peft.peft_model.PeftModelForCausalLM

In [53]:
lora_model.save_pretrained('.')

In [23]:
model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path='ai-forever/rugpt3small_based_on_gpt2')

In [24]:
from peft import PeftModel

In [25]:
m = PeftModel.from_pretrained(model, 
                              r'D:/workspace/projects/anek/models/checkpoints/peftmodelforcausallm_adamw_lambdalr/7')

In [26]:
m

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): GPT2LMHeadModel(
      (transformer): GPT2Model(
        (wte): Embedding(50264, 768)
        (wpe): Embedding(2048, 768)
        (drop): Dropout(p=0.1, inplace=False)
        (h): ModuleList(
          (0-11): 12 x GPT2Block(
            (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (attn): GPT2Attention(
              (c_attn): lora.Linear(
                (base_layer): Conv1D(nf=2304, nx=768)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=768, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=2304, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
           

In [27]:
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path='ai-forever/rugpt3small_based_on_gpt2')

In [28]:
r = m.generate()
tokenizer.decode(r[0])

'<s>- А что такое "плохая" жена?\n- Это когда она не может родить,'