In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import os
os.environ["VLLM_USE_V1"] = "0"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import json
import vllm
import torch

In [None]:
model_path = "/kaggle/input/qwen-3-32b-awq"

llm = vllm.LLM(
    model_path,
    quantization='awq',
    tensor_parallel_size=torch.cuda.device_count(),
    gpu_memory_utilization=0.9,
    trust_remote_code=True,
    dtype="half",
    enforce_eager=True,
    max_model_len=2048,
    disable_log_stats=True,
    enable_prefix_caching=True,
)
tokenizer = llm.get_tokenizer()

In [None]:
trn = pd.read_csv("/kaggle/input/map-charting-student-math-misunderstandings/train.csv")

In [None]:
from string import ascii_uppercase

# Identify the correct answers
trn['Correct'] = trn['Category'].apply(lambda x: x.split("_")[0].strip())

trn_qa = trn[['QuestionId', 'QuestionText', 'MC_Answer', 'Correct']].drop_duplicates().reset_index(drop=True).copy()

# Create standardized MCQ choices
trn_qa_choices = []
for qid, g in trn_qa.groupby("QuestionId"):

    for a, q in zip(ascii_uppercase, g.itertuples()):
        trn_qa_choices.append(a)

trn_qa['Choice'] = trn_qa_choices

In [None]:
questions = {}

for qid, g in trn_qa.groupby("QuestionId"):
    questions[qid] = {
        "question": g.QuestionText.iloc[0],
        "choices": g[['Choice', 'MC_Answer']].to_dict(orient='records')
    }

In [None]:
# Enhanced system prompts with more specific instructions
SYS_PROMPT_MATH_ANSWER = """
You are an expert at assessing student performance on multiple choice math questions.

Your task is given a math question to choose the correct answer from a list of choices.

Do NOT include any other text or explanation.
"""


def create_question_prompt(qid:str, math_question:str, math_choices:list[str]) -> str:

    prompt = f"""<QuestionId>{qid}</QuestionId>

<Question>
{math_question}
</Question>

<Choices>
{json.dumps(math_choices)}
</Choices>

<Instruction>
Solve the math question and choose the correct answer from the choices.
When thinking do not repeat say "\nOkay, let's see.".
Be as succinct in your thinking to save time.
These are easy questions, you can solve this simply.
Return only the choice letter.
</Instruction>
"""

    return prompt

In [None]:
question_prompts = []

for qid, q in questions.items():
    question_prompts.append(create_question_prompt(qid, q['question'], q['choices']))

In [None]:
prompts = []
for question_prompt in question_prompts:
    messages = [
        {"role": "system", "content": SYS_PROMPT_MATH_ANSWER},
        {"role": "user", "content": question_prompt}
    ]

    prompt = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        enable_thinking=True,
        tokenize=False,
    )

    prompts.append(prompt)

outputs = llm.generate(
    prompts,
    vllm.SamplingParams(
        seed=0,
        skip_special_tokens=True,
        max_tokens=2048,
        temperature=0
    ),
    use_tqdm=True
)

In [None]:
responses = [output.outputs[0].text.strip() for output in outputs]

In [None]:
answers = [x.split("</think>")[1].strip() for x in responses]

In [None]:
answers

In [None]:
# Note that row 17 is not actually correct even though it was labeled
trn_qa[trn_qa['Correct']=='True']