In [None]:
import os
from unsloth import FastLanguageModel
import torch
max_seq_length = 3056 
dtype = None 
load_in_4bit = True 

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Qwen3-14B-unsloth-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)


In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 24, 
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 32,
    lora_dropout = 0.05, 
    bias = "none",    
    use_gradient_checkpointing = "unsloth", 
    random_state = 3407,
    use_rslora = False, 
    loftq_config = None, 
)


In [None]:
import json
from datasets import Dataset

data_path = "/home/jupyter/datasphere/project/category_3_social_humanities_qwen.jsonl"

EOS_TOKEN = tokenizer.eos_token

def create_chat_dataset(file_path):
    formatted_data = []
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            record = json.loads(line)
            original_question = record.get("original_question")
            cleaned_reasoning = record.get("cleaned_reasoning")
            model_answer = record.get("model_extracted_answer")
            if original_question and cleaned_reasoning and model_answer:
                system_prompt = (
                    "Реши задачу пошагово на русском языке, объясняя каждое действие. "
                    "Покажи все размышления, вычисления, логические выводы и проверь расчеты перед окончательным ответом.\n"
                    "Твой ответ должен быть кратким, точным и содержать только следующие блоки:\n\n"
                    "Рассуждение:\n"
                    "[Шаг 1 – объяснение и вычисления]\n"
                    "[Шаг 2 – проверка расчетов]\n"
                    "Ответ: [Только одна буква: A, B, C или D]."
                )

                user_prompt_template = "Задача, которую нужно решить: {original_question}"

                assistant_response = (
                    f"**Рассуждение:**\n{cleaned_reasoning}\n**Ответ: {model_answer}**"
                )

                formatted_data.append({
                    "messages": [
                        {"role": "system", "content": system_prompt},
                        {"role": "user", "content": user_prompt_template.format(original_question=original_question)},
                        {"role": "assistant", "content": assistant_response}
                    ]
                })

    return Dataset.from_list(formatted_data)

dataset = create_chat_dataset(data_path)
split = dataset.train_test_split(test_size=0.1)
train_dataset = split["train"]
eval_dataset = split["test"]


In [None]:
def simple_formatting_func(example):
    def format_message(msgs):
        return "".join([f"<|{msg['role']}|>\n{msg['content']}\n" for msg in msgs]) + "<|endoftext|>"

    if isinstance(example["messages"], list) and isinstance(example["messages"][0], dict):
        return [format_message(example["messages"])]
    else:
        return [format_message(msgs) for msgs in example["messages"]]

from trl import SFTConfig, SFTTrainer
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_dataset,
    eval_dataset = eval_dataset,
    task_type="CAUSAL_LM",
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    packing = True,
    formatting_func = simple_formatting_func,
    args = SFTConfig(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 8,
        warmup_ratio = 0.1,
        num_train_epochs = 2, 
        max_steps = 200,
        learning_rate = 3e-4,
        logging_steps = 10,
        optim = "adamw_8bit",
        weight_decay = 0.005,
        lr_scheduler_type = "cosine",
        max_grad_norm=0.5,
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", 
        eval_strategy="steps",               
        eval_steps = 25,                
        save_steps = 25,                  
        load_best_model_at_end = True,      
        save_total_limit = 2, 
    ),
)


In [None]:
trainer_stats = trainer.train()
print("Сохранение обученных адаптеров...")
output_dir = "qwen3_14b (category_3_social_humanities_qwen)"
trainer.save_model(output_dir)
print(f"Модель сохранена в папку: {output_dir}")