In [None]:
!pip install unsloth

In [None]:
import os
from unsloth import FastLanguageModel
import torch
max_seq_length = 3056 
dtype = None 
load_in_4bit = True

fourbit_models = [
    "unsloth/Meta-Llama-3.1-8B-bnb-4bit",     
    "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
    "unsloth/Meta-Llama-3.1-70B-bnb-4bit",
    "unsloth/Meta-Llama-3.1-405B-bnb-4bit",    
    "unsloth/Mistral-Nemo-Base-2407-bnb-4bit",
    "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit",
    "unsloth/mistral-7b-v0.3-bnb-4bit",       
    "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    "unsloth/Phi-3.5-mini-instruct",           
    "unsloth/Phi-3-medium-4k-instruct",
    "unsloth/gemma-2-9b-bnb-4bit",
    "unsloth/gemma-2-27b-bnb-4bit",           
]

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit
)


In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 24,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 32,
    lora_dropout = 0.05, 
    bias = "none",    
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,  
    loftq_config = None, 
)


In [None]:
import json
from datasets import Dataset

data_path = "/home/jupyter/datasphere/project/category_1_math_cs_qwen_with_qdrant.jsonl"

EOS_TOKEN = tokenizer.eos_token

def create_chat_dataset(file_path):
    formatted_data = []
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            record = json.loads(line)
            original_question = record.get("original_question")
            cleaned_reasoning = record.get("cleaned_reasoning")
            model_answer = record.get("model_extracted_answer")
            qdrant_list = record.get("qdrant_answer", [])

            if original_question and cleaned_reasoning and model_answer:
                qdrant_text = "\n\n".join(qdrant_list) if qdrant_list else ""

                qdrant_block = f"\n\n---\nПолезная справочная информация:\n{qdrant_text}" if qdrant_text else ""

                prompt = f"""Реши задачу пошагово на русском языке, объясняя каждое действие. Покажи все размышления, вычисления, логические выводы и проверь расчеты перед окончательным ответом.
Задача, которую нужно решить: {original_question}{qdrant_block}

Твой ответ должен быть кратким, точным и содержать только следующие блоки:

Рассуждение:
[Шаг 1 – объяснение и вычисления]
[Шаг 2 – проверка расчетов]
Ответ: [Только одна буква: A, B, C или D].
""" + EOS_TOKEN

                answer = f"**Рассуждение:**\n{cleaned_reasoning}\n**Ответ: {model_answer}**" + EOS_TOKEN

                formatted_data.append({
                    "user": prompt,
                    "assistant": answer
                })

    return Dataset.from_list(formatted_data)

dataset = create_chat_dataset(data_path)
split = dataset.train_test_split(test_size=0.1)
train_dataset = split["train"]
eval_dataset = split["test"]


In [None]:
def simple_formatting_func(examples):
    if isinstance(examples["user"], list):
        texts = []
        for i in range(len(examples["user"])):
            text = f"{examples['user'][i]}{examples['assistant'][i]}"
            texts.append(text)
        return texts
    else:
        text = f"{examples['user']}{examples['assistant']}"
        return [text]

from trl import SFTConfig, SFTTrainer
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_dataset,
    eval_dataset = eval_dataset,
    task_type="QUESTION_ANS",
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    packing = True,
    formatting_func = simple_formatting_func,
    args = SFTConfig(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 8,
        warmup_ratio = 0.1,
        num_train_epochs = 2, 
        max_steps = 200,
        learning_rate = 3e-4,
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.005,
        lr_scheduler_type = "cosine",
        max_grad_norm=0.5,
        seed = 3407,
        output_dir = "outputs",
        report_to = "none",
    ),
)


In [None]:
trainer_stats = trainer.train()
print(">>> Сохранение обученных адаптеров...")
output_dir = "lora_model_qwen3_14b (category_1_math_cs_qwen_with_qdrant)"
trainer.save_model(output_dir)
print(f">>> Модель сохранена в папку: {output_dir}")