In [None]:
!pip install unsloth



In [None]:
import os
import pandas as pd
import numpy as np
import unsloth
import torch
from sklearn.preprocessing import LabelEncoder
from datasets import Dataset, load_dataset
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from unsloth import FastLanguageModel
from trl import SFTTrainer
from unsloth.chat_templates import get_chat_template

ü¶• Unsloth: Will patch your computer to enable 2x faster free finetuning.
ü¶• Unsloth Zoo will now patch everything to make training faster!


In [None]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
VER = 1
model_name ="unsloth/Qwen2.5-Math-7B-bnb-4bit"
EPOCHS = 1
DIR = f"ver_{VER}"
os.makedirs(DIR, exist_ok=True)
MAX_LEN = 2048

In [None]:
train = pd.read_csv('/content/train.csv', on_bad_lines='skip')
#train = pd.read_csv('/kaggle/input/map-charting-student-math-misunderstandings/train.csv')
train.Misconception = train.Misconception.fillna('NA')
train['target'] = train.Category + ":" + train.Misconception

# We use LabelEncoder to convert the 65 unique target strings into integers from 0 to 64.
le = LabelEncoder()
train['label'] = le.fit_transform(train['target'])
n_classes = len(le.classes_)
print(f"Train shape: {train.shape} with {n_classes} target classes")

Train shape: (36696, 9) with 65 target classes


In [None]:
idx = train.apply(lambda row: row.Category.split('_')[0], axis=1) == 'True'
correct = train.loc[idx].copy()
correct['c'] = correct.groupby(['QuestionId', 'MC_Answer']).MC_Answer.transform('count')
correct = correct.sort_values('c', ascending=False)
correct = correct.drop_duplicates(['QuestionId'])
correct = correct[['QuestionId', 'MC_Answer']]
correct['is_correct'] = 1

train = train.merge(correct, on=['QuestionId', 'MC_Answer'], how='left')
train.is_correct = train.is_correct.fillna(0)
train['is_correct'] = train.apply(lambda x: "yes" if x['is_correct'] == 1 else "no", axis=1)

In [None]:
special_character_list = [
    '‚ñ†', '‚ñ°', '‚ñ≤', '‚ñ≥', '‚ñº', '‚ñΩ', '‚óÜ', '‚óá', '‚óã', '‚óè', '‚òÖ', '‚òÜ', '‚ô¶', '‚ô•', '‚ô†', '‚ô£',
    '¬ß', '‚Ä†', '‚Ä°', '‚Äª', '‚àû', '¬±', '‚â†', '‚âà', '‚àö', '‚àë', '‚àè', '‚àÜ', 'Œ©', 'Œº', '‚àÇ', '‚Üí',
    '‚Üê', '‚Üë', '‚Üì', '‚Üî', '‚Üï', '„Äà', '„Äâ', '„Äé', '„Äè', '‚îÇ', '‚îÄ', '‚îå', '‚îê', '‚îî', '‚îò', '‚îº',
    '‚ñà', '‚ñì', '‚ñí', '¬£', '¬•', '‚Ç¨', '‚Ç©', '¬©', '¬Æ', '‚Ñ¢', '‚ô™', '‚ô´', '‚òÄ', '‚òÅ', '‚òÇ', '‚òÉ', '‚òé'
]

# Map the integer label to its corresponding special character
train['special_label'] = train['label'].apply(lambda x: special_character_list[int(x)])

In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=MAX_LEN,
    dtype=None,
    load_in_4bit=True,
)

# Prepare the model for LoRA fine-tuning
model = FastLanguageModel.get_peft_model(
    model,
    r=8, # Increased rank for better learning
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha=64,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=42,
    use_rslora=False,
    loftq_config=None,
)

==((====))==  Unsloth 2026.2.1: Fast Qwen2 patching. Transformers: 4.57.6.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.563 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.10.0+cu128. CUDA: 7.5. CUDA Toolkit: 12.8. Triton: 3.6.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.35. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth 2026.2.1 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [None]:
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "qwen2.5", # Using standard qwen2 chat template
)

# This detailed prompt explicitly tells the model the classification task and the exact output format.
# It provides the mapping from the special character to the full class name.
class_mappings = [f"{special_character_list[i]}: {le.classes_[i]}" for i in range(n_classes)]

In [None]:
SYS_PROMPT = f"""
You are a math education expert specializing in analyzing student reasoning.

Your task is to classify a student's explanation into exactly ONE of the predefined Misconception categories.

Instructions:
- Carefully read the question, selected answer, correctness flag, and student explanation.
- Determine which Misconception category best describes the student's reasoning.
- Output ONLY the single classification label (one character).
- Do NOT include explanations, punctuation, or any additional text.
- Do NOT repeat the prompt.
- Your response must be exactly one character.

Available Misconception Categories:
{', '.join(class_mappings)}
"""


user_prompt_template = """Question: {QuestionText}
Answer: {MC_Answer}
Correct? {CorrectFlag}
Student Explanation: {StudentExplanation}
"""

In [None]:
dataset_chat = [
    [
        {"role": "system", "content": SYS_PROMPT},
        {
            "role": "user",
            "content": user_prompt_template.format(
                QuestionText=row["QuestionText"],
                MC_Answer=row["MC_Answer"],
                CorrectFlag=row["is_correct"],
                StudentExplanation=row["StudentExplanation"],
            )
        },
        {
            "role": "assistant",
            "content": row["special_label"]
        }
    ]
    for _, row in train.iterrows()
]

# Convert to tokenized dataset
def formatting_prompts_func(dataset):
    texts = [
        tokenizer.apply_chat_template(
            ex, tokenize=False, add_generation_prompt=False
        ) for ex in dataset
    ]
    return {"text": texts}

dataset_tokenized = formatting_prompts_func(dataset_chat)

In [None]:
dataset_tokenized = Dataset.from_dict(dataset_tokenized)
dataset_tokenized[0]

{'text': "<|im_start|>system\n\nYou are a math education expert specializing in analyzing student reasoning.\n\nYour task is to classify a student's explanation into exactly ONE of the predefined Misconception categories.\n\nInstructions:\n- Carefully read the question, selected answer, correctness flag, and student explanation.\n- Determine which Misconception category best describes the student's reasoning.\n- Output ONLY the single classification label (one character).\n- Do NOT include explanations, punctuation, or any additional text.\n- Do NOT repeat the prompt.\n- Your response must be exactly one character.\n\nAvailable Misconception Categories:\n‚ñ†: False_Correct:NA, ‚ñ°: False_Misconception:Adding_across, ‚ñ≤: False_Misconception:Adding_terms, ‚ñ≥: False_Misconception:Additive, ‚ñº: False_Misconception:Base_rate, ‚ñΩ: False_Misconception:Certainty, ‚óÜ: False_Misconception:Definition, ‚óá: False_Misconception:Denominator-only_change, ‚óã: False_Misconception:Division, ‚óè: F

In [None]:
from unsloth import is_bfloat16_supported

training_args = TrainingArguments(
    per_device_train_batch_size=2,
    gradient_accumulation_steps=2,
    warmup_ratio=0.05,
    #num_train_epochs=2,
    learning_rate=2e-4,
    fp16=not is_bfloat16_supported(),
    bf16=is_bfloat16_supported(),
    logging_steps=1,
    optim="paged_adamw_8bit",
    weight_decay=0.01,
    lr_scheduler_type="linear",
    seed=42,
    output_dir=f"./{DIR}",
    #save_strategy="epoch",
    save_strategy="steps",
    save_steps=0.10,
    max_steps = 100,
    save_total_limit=5,
    report_to="none",
    torch_compile=False # Disable torch.compile to avoid the graph break error
)

In [None]:
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset_tokenized,
    dataset_text_field="text",
    max_seq_length=MAX_LEN,
    dataset_num_proc=2,
    packing=False, # Important for classification tasks
    args=training_args,
)
from unsloth.chat_templates import train_on_responses_only

trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|im_start|>user\n",
    response_part = "<|im_start|>assistant\n",
)

Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/36696 [00:00<?, ? examples/s]

Map (num_proc=6):   0%|          | 0/36696 [00:00<?, ? examples/s]

Filter (num_proc=6):   0%|          | 0/36696 [00:00<?, ? examples/s]

Unsloth: Removed 329 out of 36696 samples from train_dataset where all labels were -100 (no response found after truncation). This prevents NaN loss during training.


In [None]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 36,367 | Num Epochs = 1 | Total steps = 100
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 2
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 2 x 1) = 4
 "-____-"     Trainable parameters = 20,185,088 of 7,635,801,600 (0.26% trained)


Step,Training Loss
1,10.831
2,11.165
3,10.3957
4,10.9613
5,10.5659
6,9.8164
7,10.0721
8,9.8711
9,8.3554
10,6.694
