In [None]:
!pip install bitsandbytes

In [None]:
import torch
from datasets import load_dataset, Dataset, DatasetDict
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model

In [None]:
# Константы
DATASET_NAME = 'nlile/hendrycks-MATH-benchmark'
MODEL_NAME = 'Qwen/Qwen2.5-Math-1.5B-Instruct'
RANDOM_STATE = 42

In [None]:
# Загружаем датасет
data = load_dataset(DATASET_NAME)
data = data.remove_columns(['subject', 'level', 'unique_id'])
print(data)

In [None]:
# Загружаем модель и токенизатор
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype='auto', device_map='auto', load_in_4bit=True)

In [None]:
# Определяем LoRA адаптер
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=['q_proj', 'v_proj', 'o_proj'],
    lora_dropout=0.05,
    bias='none',
    task_type='CAUSAL_LM'
)
model = get_peft_model(model, lora_config)

In [None]:
# Определяем системный промпт
SYSTEM_PROMPT = 'You are an AI assistant skilled in mathematical reasoning. Please solve the problem using concise step-by-step reasoning process.'
SYSTEM_PROMPT += ' Put your final answer within \\boxed{}.'

# Функция для токенизации и подготовки текста
def preprocess_function(example):
    problem = example['problem']
    solution = example['solution']
    answer = example['answer']

    # Формируем полный промпт
    answer = str(answer).strip()
    prompt = f'<|system|>\n{SYSTEM_PROMPT}\n<|user|>\n{problem}\n<|assistant|>\n'
    if '\\boxed{' in solution:
        full_text = prompt + solution
    else:
        full_text = prompt + solution + f'The final answer is \\boxed{{{answer}}}'

    # Токенизируем полный промпт
    full_text_tokenized = tokenizer(
        full_text,
        max_length=1024,
        truncation=True,
        padding=False,
        return_tensors=None,
        add_special_tokens=False
    )
    # Токенизиурем только промпт без решения
    prompt_tokenized = tokenizer(
        prompt,
        max_length=1024,
        truncation=True,
        padding=False,
        return_tensors=None,
        add_special_tokens=False
    )

    input_ids = full_text_tokenized['input_ids']
    attention_mask = full_text_tokenized['attention_mask']

    # Маскируем часть промпта которая не содержит решения
    labels = input_ids.copy()
    prompt_length = len(prompt_tokenized['input_ids'])
    if prompt_length < len(labels):
        labels[:prompt_length] = [-100] * prompt_length
    else:
        labels = [-100] * len(labels)

    # Возвращаем результат
    return {
        'input_ids': input_ids,
        'attention_mask': attention_mask,
        'labels': labels
    }

In [None]:
# Токенизируем задачи и делаем разделение на train и eval
tokenized_dataset = data.map(
    preprocess_function,
    batched=False,
    desc='Tokenize dataset',
    remove_columns=data['train'].column_names
)
split = tokenized_dataset['train'].train_test_split(test_size=0.2, seed=RANDOM_STATE)
tokenized_dataset = DatasetDict({
    'train': split['train'],
    'eval': split['test'],
    'test': tokenized_dataset['test']
})
print(tokenized_dataset)

In [None]:
class CustomDataCollator:
    def __init__(self, tokenizer, max_length=1024):
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __call__(self, features):
        # Распаковываем в отдельные списки
        input_ids = [f['input_ids'] for f in features]
        attention_mask = [f['attention_mask'] for f in features]
        labels = [f['labels'] for f in features]

        # Определяем максимальную длину в батче
        max_len = min(max([len(i) for i in input_ids]), self.max_length)
        
        padded_input_ids = []
        padded_attention_mask = []
        padded_labels = []

        for ids, mask, label in zip(input_ids, attention_mask, labels):
            # Усекаем последовательности
            ids = ids[:max_len]
            mask = mask[:max_len]
            label = mask[:max_len]

            # Добавляем паддинг
            pad_length = max_len - len(ids)
            if pad_length > 0:
                ids += [self.tokenizer.pad_token_id] * pad_length
                mask += [0] * pad_length
                label += [-100] * pad_length

            padded_input_ids.append(ids)
            padded_attention_mask.append(mask)
            padded_labels.append(label)
        
        # Возвращаем результат
        return {
            'input_ids': torch.tensor(padded_input_ids).long(),
            'attention_mask': torch.tensor(padded_attention_mask).long(),
            'labels': torch.tensor(padded_labels).long()
        }

In [None]:
data_collator = CustomDataCollator(tokenizer)

In [None]:
# Параметры обучения
training_args = TrainingArguments(
    per_device_train_batch_size=1,
    per_device_eval_batch_size=8,
    gradient_accumulation_steps=8,
    num_train_epochs=2,
    learning_rate=1e-5,
    weight_decay=0.01,
    lr_scheduler_type='cosine',
    warmup_ratio=0.1,
    bf16=True,
    label_names=['labels'],

    report_to='none',
    
    do_eval=True,
    metric_for_best_model='eval_loss',
    greater_is_better=False,
    eval_strategy='steps',
    eval_steps=100,
    
    save_strategy='no',
    load_best_model_at_end=True
)

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['eval'],
    data_collator=data_collator,
    tokenizer=tokenizer
)

In [None]:
# Обучение
trainer.train()

In [None]:
# Сохраняем веса LoRA
trainer.model.save_pretrained('lora_adapter')