In [3]:
import json
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    BitsAndBytesConfig
)
from peft import LoraConfig, get_peft_model
import torch

# 1. Подготовка данных
def prepare_dataset():
    data = []
    with open("prepared_training_data.jsonl") as f:
        for line in f:
            entry = json.loads(line)
            
            # Форматируем в промпт-ответ
            formatted_text = f"""<|im_start|>system
{entry['instruction']}<|im_end|>
<|im_start|>user
{entry['input']}<|im_end|>
<|im_start|>assistant
{entry['output']}<|im_end|>"""
            
            data.append({"text": formatted_text})
    
    return Dataset.from_dict({"text": [item["text"] for item in data]})

# 2. Конфигурация модели
def setup_model():
    model_name = "NousResearch/Nous-Hermes-2-Yi-34B"  # Для 13B используйте подходящую версию
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=True
    )
    
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True
    )
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    tokenizer.pad_token = tokenizer.eos_token
    
    return model, tokenizer

# 3. Настройка LoRA
def setup_peft(model):
    lora_config = LoraConfig(
        r=16,
        lora_alpha=32,
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM"
    )
    return get_peft_model(model, lora_config)

# 4. Обучение
def train_model():
    # Подготовка данных
    dataset = prepare_dataset().shuffle(seed=42)
    model, tokenizer = setup_model()
    
    # Настройка PEFT
    model = setup_peft(model)
    
    # Токенизация
    def tokenize_function(examples):
        return tokenizer(
            examples["text"],
            truncation=True,
            max_length=2048,
            padding="max_length"
        )
    
    tokenized_dataset = dataset.map(tokenize_function, batched=True)
    
    # Параметры обучения
    training_args = TrainingArguments(
        output_dir="./results-13b",
        num_train_epochs=5,
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        learning_rate=2e-5,
        fp16=True,
        save_strategy="epoch",
        logging_steps=50,
        optim="paged_adamw_32bit",
        report_to="none",
        lr_scheduler_type="cosine",
        warmup_ratio=0.03
    )
    
    # Запуск обучения
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset,
    )
    
    trainer.train()
    model.save_pretrained("./nous-hermes2-13b-custom")
    tokenizer.save_pretrained("./nous-hermes2-13b-custom")

if __name__ == "__main__":
    train_model()

config.json:   0%|          | 0.00/694 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/35.8k [00:00<?, ?B/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

model-00001-of-00005.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00004-of-00005.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00005-of-00005.safetensors:   0%|          | 0.00/1.69G [00:00<?, ?B/s]

model-00003-of-00005.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00002-of-00005.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

OSError: NousResearch/Nous-Hermes-2-SOLAR-10.7B does not appear to have files named ('model-00001-of-00005.safetensors', 'model-00002-of-00005.safetensors', 'model-00003-of-00005.safetensors', 'model-00004-of-00005.safetensors', 'model-00005-of-00005.safetensors'). Checkout 'https://huggingface.co/NousResearch/Nous-Hermes-2-SOLAR-10.7B/tree/main'for available files.

In [2]:
from transformers import AutoModel, AutoTokenizer
import shutil

# Удаление локальных файлов
model_name = "NousResearch/Nous-Hermes-2-Yi-34B"
shutil.rmtree(f"/root/.cache/huggingface/hub/models--{model_name.replace('/', '--')}", ignore_errors=True)