In [1]:
%pip install unsloth

Defaulting to user installation because normal site-packages is not writeable
Collecting unsloth
  Downloading unsloth-2025.7.6-py3-none-any.whl.metadata (47 kB)
Collecting unsloth_zoo>=2025.7.8 (from unsloth)
  Downloading unsloth_zoo-2025.7.8-py3-none-any.whl.metadata (8.1 kB)
Collecting torch>=2.4.0 (from unsloth)
  Downloading torch-2.7.1-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (29 kB)
Collecting xformers>=0.0.27.post2 (from unsloth)
  Downloading xformers-0.0.31.post1-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (1.1 kB)
Collecting bitsandbytes (from unsloth)
  Downloading bitsandbytes-0.46.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting triton>=3.0.0 (from unsloth)
  Downloading triton-3.3.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (1.5 kB)
Collecting tyro (from unsloth)
  Downloading tyro-0.9.26-py3-none-any.whl.metadata (12 kB)
Collecting transformers!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,>=4.51.3 (from unsloth)

In [3]:
print("goyda")

goyda


In [None]:
# -*- coding: utf-8 -*-
from unsloth import FastLanguageModel
import json
import re
import torch
from peft import PeftModel
from datasets import load_dataset
from tqdm import tqdm
from transformers import TextStreamer, GenerationConfig

class InferenceModelLoader:
    def __init__(self, base_model_name, lora_adapter_path, load_in_4bit=True):
        self.base_model_name = base_model_name
        self.lora_adapter_path = lora_adapter_path
        self.load_in_4bit = load_in_4bit
        self.model = None
        self.tokenizer = None
        self._load_model()

    def _load_model(self):
        print("Загрузка базовой модели...")
        model, tokenizer = FastLanguageModel.from_pretrained(
            model_name=self.base_model_name,
            max_seq_length=2048,
            dtype=None,
            load_in_4bit=self.load_in_4bit,
        )

        print(f"Применение LoRA-адаптера из '{self.lora_adapter_path}'...")
        self.model = PeftModel.from_pretrained(model, self.lora_adapter_path)
        self.tokenizer = tokenizer

        print("Подготовка модели для инференса...")
        FastLanguageModel.for_inference(self.model)

        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
        self.tokenizer.padding_side = "left"
        self.model.config.pad_token_id = self.tokenizer.pad_token_id


class DatasetProcessor:
    def __init__(self, dataset_path):
        self.dataset_path = dataset_path

    def load_and_prepare_data(self):
        print(f"Загрузка и подготовка данных из '{self.dataset_path}'...")
        dataset = load_dataset("json", data_files=self.dataset_path, split="train")
        dataset = dataset.filter(lambda x: 'ground_truth_answer' in x and x['ground_truth_answer'] is not None)
        dataset = dataset.map(lambda x: {"model_answer": x["model_extracted_answer"]})
        dataset = dataset.map(self.build_prompt)
        return dataset

    @staticmethod
    def build_prompt(sample):
        original_question = sample.get("original_question") or sample.get("text")
        cleaned_reasoning = sample.get("cleaned_reasoning")
        model_answer = sample.get("model_extracted_answer")

        if not original_question or not cleaned_reasoning or not model_answer:
            return sample 

        system_prompt = (
            "Реши задачу пошагово на русском языке, объясняя каждое действие. "
            "Покажи все размышления, вычисления, логические выводы и проверь расчеты перед окончательным ответом.\n"
            "Твой ответ должен быть кратким, точным и содержать только следующие блоки:\n\n"
            "Рассуждение:\n"
            "[Шаг 1 – объяснение и вычисления]\n"
            "[Шаг 2 – проверка расчетов]\n"
            "Ответ: [Только одна буква: A, B, C или D]."
        )

        user_prompt = f"Задача, которую нужно решить: {original_question}"

        assistant_response = f"**Рассуждение:**\n{cleaned_reasoning}\n**Ответ: {model_answer}**"

        sample["system_prompt"] = system_prompt
        sample["user_prompt"] = user_prompt
        sample["messages"] = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
            {"role": "assistant", "content": assistant_response}
        ]
        sample["original_question"] = original_question
        # print(sample)
        return sample


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


2025-07-24 09:51:17.938674: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


🦥 Unsloth Zoo will now patch everything to make training faster!


In [None]:
class ModelEvaluator:
    def __init__(self, model_loader, data_processor, batch_size=8, output_path="evaluation_results.jsonl"):
        self.model_loader = model_loader
        self.data_processor = data_processor
        self.stats = {"correct": 0, "incorrect": 0, "no_answer": 0}
        self.batch_size = batch_size
        self.output_path = output_path

    @staticmethod
    def extract_answer(generated_text):
        match = re.search(r"\*\*Ответ:\s*([A-D])", generated_text, re.IGNORECASE)
        return match.group(1).upper() if match else None

    @staticmethod
    def extract_reasoning(generated_text):
        match = re.search(r"\*\*Ответ:", generated_text, re.IGNORECASE)
        if match:
            return generated_text[:match.start()].strip()
        return generated_text.strip()

    def run(self, verbose=False):
        model = self.model_loader.model
        tokenizer = self.model_loader.tokenizer
        dataset = self.data_processor.load_and_prepare_data()

        with open(self.output_path, 'w', encoding='utf-8') as log_file:
            print(f"Результаты будут сохранены в: {self.output_path}")
            progress_bar = tqdm(dataset.iter(batch_size=self.batch_size), desc="Оценка модели", total=len(dataset) // self.batch_size)

            for batch in progress_bar:
                messages_batch = batch["messages"]

                chat_templates = [
                    tokenizer.apply_chat_template(msg, tokenize=False, add_generation_prompt=True)
                    for msg in messages_batch
                ]

                encoded = tokenizer(
                    chat_templates,
                    return_tensors="pt",
                    padding=True,
                    truncation=True,
                    max_length=512,
                    return_attention_mask=True
                )

                input_ids = encoded["input_ids"].to("cuda")
                attention_mask = encoded["attention_mask"].to("cuda")


                outputs = model.generate(
                    input_ids=input_ids,
                    attention_mask=attention_mask,
                    max_new_tokens=2048,
                    use_cache=False,
                    do_sample=False,
                    eos_token_id=tokenizer.eos_token_id,
                    pad_token_id=tokenizer.pad_token_id,
                    repetition_penalty=1.1
                )

                generated_texts = tokenizer.batch_decode(outputs[:, input_ids.shape[1]:], skip_special_tokens=True)

                for i in range(len(generated_texts)):
                    ground_truth = batch['model_answer'][i]
                    full_generated_text = generated_texts[i]
                    model_answer = self.extract_answer(full_generated_text)
                    model_reasoning = self.extract_reasoning(full_generated_text)

                    if model_answer is None:
                        self.stats["no_answer"] += 1
                        result = "NO_ANSWER"
                    elif model_answer == ground_truth:
                        self.stats["correct"] += 1
                        result = "CORRECT"
                    else:
                        self.stats["incorrect"] += 1
                        result = "INCORRECT"

                    log_entry = {
                        "id": batch.get('id', ['N/A']*len(generated_texts))[i],
                        "original_question": batch['original_question'][i],
                        "ground_truth": ground_truth,
                        "model_reasoning": model_reasoning,
                        "model_answer": model_answer,
                        "result": result,
                        "full_generated_text": full_generated_text
                    }
                    log_file.write(json.dumps(log_entry, ensure_ascii=False) + '\n')

                progress_bar.set_postfix({
                    '✅': self.stats['correct'],
                    '❌': self.stats['incorrect'],
                    '❓': self.stats['no_answer']
                })

        self.print_summary()

    def print_summary(self):
        total = sum(self.stats.values())
        if total == 0:
            print("Не было обработано ни одного сэмпла.")
            return

        print("\n" + "#" * 20 + " Итоги оценки " + "#" * 20)
        print(f"Всего обработано: {total}")
        print(f"✅ Верных: {self.stats['correct']} ({self.stats['correct']/total:.2%})")
        print(f"❌ Неверных: {self.stats['incorrect']} ({self.stats['incorrect']/total:.2%})")
        print(f"❓ Без ответа: {self.stats['no_answer']} ({self.stats['no_answer']/total:.2%})")
        print(f"Результаты в: {self.output_path}")
        print("#" * 60)


if __name__ == "__main__":
    BASE_MODEL_NAME = "unsloth/Qwen3-1.7B-unsloth-bnb-4bit"
    ADAPTER_PATH = "/home/jupyter/datasphere/project/Qwen3-1.7B-unsloth-bnb-4bit"
    DATASET_PATH = "/home/jupyter/datasphere/project/proc_eval_results_merged_qwen_14b.jsonl"
    OUTPUT_LOG_PATH = "qwen_1_7_check.jsonl"
    BATCH_SIZE = 64

    try:
        model_loader = InferenceModelLoader(BASE_MODEL_NAME, ADAPTER_PATH)
        data_processor = DatasetProcessor(DATASET_PATH)
        evaluator = ModelEvaluator(model_loader, data_processor, batch_size=BATCH_SIZE, output_path=OUTPUT_LOG_PATH)
        evaluator.run(verbose=False)

    except Exception as e:
        import traceback
        print(f"Ошибка: {e}")
        traceback.print_exc()
        print("Проверьте пути, LoRA и BATCH_SIZE. При ошибке CUDA OOM — уменьшите батч.")

Загрузка базовой модели...
==((====))==  Unsloth 2025.7.6: Fast Qwen3 patching. Transformers: 4.53.3.
   \\   /|    NVIDIA A100-SXM4-80GB. Num GPUs = 1. Max memory: 79.325 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.1+cu126. CUDA: 8.0. CUDA Toolkit: 12.6. Triton: 3.3.1
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.31.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Применение LoRA-адаптера из '/home/jupyter/datasphere/project/Qwen3-1.7B-unsloth-bnb-4bit'...
Подготовка модели для инференса...
Загрузка и подготовка данных из '/home/jupyter/datasphere/project/proc_eval_results_merged_qwen_14b.jsonl'...


Generating train split: 6739 examples [00:00, 9248.07 examples/s] 
Filter: 100%|██████████| 6739/6739 [00:00<00:00, 31219.26 examples/s]
Map: 100%|██████████| 6739/6739 [00:00<00:00, 7122.49 examples/s]
Map: 100%|██████████| 6739/6739 [00:01<00:00, 4183.68 examples/s]


Результаты будут сохранены в: qwen_1_7_check.jsonl


Оценка модели:   0%|          | 0/105 [00:00<?, ?it/s]