In [1]:
import os
os.environ["HF_TOKEN"] = "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"


import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import warnings
warnings.filterwarnings('ignore')



In [2]:
# Пути и параметры
BASE_MODEL_NAME = "ExplosionNuclear/Llama-2.3-3B-Instruct-special"
CHECKPOINT_PATH = "./VectorSFT-checkpoints-upd/checkpoint-1100"
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

# Специальные токены из конфигурации
SPECIAL_TOKENS = ["<simple_talk>", "</simple_talk>"]

print(f"Используется устройство: {DEVICE}")


Используется устройство: cuda:0


In [3]:
# Загрузка и настройка токенайзера
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)

print(f"Размер словаря токенайзера: {len(tokenizer)}")
print(f"ID специальных токенов: {[tokenizer.convert_tokens_to_ids(token) for token in SPECIAL_TOKENS]}")


Размер словаря токенайзера: 128258
ID специальных токенов: [128256, 128257]


In [4]:
# Загрузка базовой модели
base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_NAME,
    torch_dtype=torch.bfloat16,
    device_map=DEVICE,
    attn_implementation="flash_attention_2",
)

print(f"Базовая модель загружена")
print(f"Размер словаря модели: {base_model.config.vocab_size}")


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Базовая модель загружена
Размер словаря модели: 128258


In [5]:
repo_id = "ExplosionNuclear/Llama-2.3-3B-Instruct-special"

In [6]:
tokenizer.push_to_hub(repo_id, use_auth_token=True)
base_model.push_to_hub(repo_id, use_auth_token=True)

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/ExplosionNuclear/Llama-2.3-3B-Instruct-special/commit/a3015fb699ea5abbeed699f99e390a7719a34bd9', commit_message='Upload LlamaForCausalLM', commit_description='', oid='a3015fb699ea5abbeed699f99e390a7719a34bd9', pr_url=None, repo_url=RepoUrl('https://huggingface.co/ExplosionNuclear/Llama-2.3-3B-Instruct-special', endpoint='https://huggingface.co', repo_type='model', repo_id='ExplosionNuclear/Llama-2.3-3B-Instruct-special'), pr_revision=None, pr_num=None)

In [9]:
tokenizer = AutoTokenizer.from_pretrained(repo_id)
model     = AutoModelForCausalLM.from_pretrained(repo_id)

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   2%|1         | 83.9M/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

In [13]:
tokenizer.tokenize("fhfhf</simple_talk>")

['fh', 'fh', 'f', '</simple_talk>']

In [6]:
tokenizer.decode([128256, 0, 2, 3, 4, 128257])

'<simple_talk>!#$%</simple_talk>'

In [8]:
rm -rf ~/.cache/huggingface/transformers

In [None]:
import inspect
print(inspect.getsource(base_model.forward))

In [70]:
# Загрузка модели с LoRA адаптерами
model = PeftModel.from_pretrained(base_model, CHECKPOINT_PATH)

# Переключение в режим инференса
model.eval()

print("LoRA адаптеры загружены успешно")
print(f"Модель в режиме: {'eval' if not model.training else 'train'}")


LoRA адаптеры загружены успешно
Модель в режиме: eval


In [68]:
model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128258, 3072)
        (layers): ModuleList(
          (0-27): 28 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=3072, out_features=3072, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=3072, out_features=128, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=128, out_features=3072, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lora.Line

In [9]:
from datasets import load_dataset

In [10]:
dataset = load_dataset("ExplosionNuclear/ExpNew7")

In [54]:
Companies benefit from access to a wider talent pool, as geographical limitations are removed, and can also experience reduced overhead costs associated with office space. A distributed workforce can also lead to enhanced productivity, as employees may find fewer distractions in their home environments. Furthermore, remote work fosters greater autonomy, empowering individuals to manage their schedules more effectively. Overall, it's a transformative approach to work that benefits many.

In [91]:
def generate_response(prompt, max_length=2000, temperature=0, top_p=0.9):
   
    
    inputs = tokenizer(prompt, return_tensors="pt")
    inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
    
    # Генерация
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            top_p=top_p,
            do_sample=False,
            pad_token_id=tokenizer.eos_token_id
        )
    
    # Декодирование результата
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=False)
    
    # Удаление исходного промпта из результата
    #response = generated_text[len(prompt):].strip()
    
    return response

print("Функция генерации готова")


Функция генерации готова


In [93]:

q = 1
prompt = dataset['train'][q]['question']
response = generate_response(prompt, max_length=1200)
print(response)


. 

Your answer:  The user experience (UX) design is a human-centered approach to creating products, services, and


. 

Your answer: The user interface (UI) design is a crucial aspect of software development, focusing on creating an intuitive


## dataset['train'][q]['final_answer']

In [None]:
# Пример с использованием специальных токенов
math_prompt = "<simple_talk>Объясните простыми словами, как решить квадратное уравнение.</simple_talk>"

print(f"Промпт с спец. токенами: {math_prompt}")
print("\nОтвет модели:")
response = generate_response(math_prompt, max_length=200)
print(response)


In [None]:
# Информация о конфигурации модели
print("=== Информация о модели ===")
print(f"Архитектура: {model.config.architectures}")
print(f"Размер скрытого слоя: {model.config.hidden_size}")
print(f"Количество слоев: {model.config.num_hidden_layers}")
print(f"Количество attention головок: {model.config.num_attention_heads}")
print(f"Размер словаря: {model.config.vocab_size}")
print(f"Максимальная длина позиции: {model.config.max_position_embeddings}")

# Информация о LoRA
print("\n=== Информация о LoRA ===")
peft_config = model.peft_config['default']
print(f"LoRA rank (r): {peft_config.r}")
print(f"LoRA alpha: {peft_config.lora_alpha}")
print(f"LoRA dropout: {peft_config.lora_dropout}")
print(f"Целевые модули: {peft_config.target_modules}")
