In [6]:
# BARE MINIMUM TEST - Exact Model Configs from vllm_model.py

from gsm8 import GSM8KLoader, Math500Loader, TriviaQALoader, MMLULoader
from math_trainer import MathTrainer
from math_500_trainer import Math500Trainer
from trivia_trainer import TriviaTrainer
from mmlu_trainer import MMLUTrainer
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

In [7]:
MODEL_CONFIGS = {
    "qwen-base": {
        "model_id": "Qwen/Qwen2.5-7B",
        "temp": 0.7, "top_p": 0.9, "max_tokens": 1000
    },
    "qwen-instruct": {
        "model_id": "Qwen/Qwen2.5-7B-Instruct", 
        "temp": 0.7, "top_p": 0.9, "max_tokens": 100
    },
    "deepseek-distilled": {
        "model_id": "deepseek-ai/deepseek-R1-Distill-Qwen-7B",
        "temp": 0.7, "top_p": 0.9, "max_tokens": 10
    },
    "llama-base": {
        "model_id": "meta-llama/Llama-3.1-8B",
        "temp": 0.7, "top_p": 0.9, "max_tokens": 1000
    },
    "llama-instruct": {
        "model_id": "meta-llama/Llama-3.1-8B-Instruct",
        "temp": 0.7, "top_p": 0.9, "max_tokens": 256
    },
    "llama-distilled": {
        "model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
        "temp": 0.7, "top_p": 0.9, "max_tokens": 1000
    },
    "open-reasoner": {
        "model_id": "Open-Reasoner-Zero/Open-Reasoner-Zero-7B",
        "temp": 0.7, "top_p": 0.9, "max_tokens": 1000
    },
    "llama-rl": {
        "model_id": "hkust-nlp/Llama-3.1-8B-SimpleRL-Zoo",
        "temp": 0.7, "top_p": 0.9, "max_tokens": 256
    }
}

In [8]:
# CHANGE MODEL HERE - EXACT MATCH TO YOUR VLLM CODE
MODEL_NAME = "qwen-instruct"  # Change to any key above

config = MODEL_CONFIGS[MODEL_NAME]
print(f"Loading {MODEL_NAME}: {config['model_id']}")

# Load model with CPU settings
tokenizer = AutoTokenizer.from_pretrained(config["model_id"], trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    config["model_id"], torch_dtype=torch.float32, device_map="cpu", trust_remote_code=True
)

Loading qwen-instruct: Qwen/Qwen2.5-7B-Instruct


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [5]:
trainers = {
    'GSM8K': MathTrainer,
    'Math500': Math500Trainer,
    'TriviaQA': TriviaTrainer, 
    'MMLU': MMLUTrainer
}

Convert the point $(0,3)$ in rectangular coordinates to polar coordinates.  Enter your answer in the form $(r,\theta),$ where $r > 0$ and $0 \le \theta < 2 \pi.$


In [6]:
# 6. TEST LOOP
for name, dataset in datasets.items():
    print(f"\n{'='*50}")
    print(f"TESTING {name}")
    print(f"{'='*50}")
    
    trainer = trainers[name](model, dataset, model_name="test")
    
    for i in range(2):  # Only 2 problems
        item = dataset[i]
        prompt = trainer.prepare_prompt(item)
        response = model.generate(prompt)
        answer = trainer.extract_answer(response)
        truth = trainer.get_ground_truth(item)
        correct = trainer.check_correctness(answer, truth)
        
        print(f"\nPROBLEM {i+1}:")
        print(f"Raw Item: {item}")
        print(f"Prompt: {prompt[:200]}...")
        print(f"Response: {response[:200]}...")
        print(f"Extracted: {answer}")
        print(f"Truth: {truth}")
        print(f"Correct: {correct}")
        print("-" * 50)

We have that $r = \sqrt{0^2 + 3^2} = 3.$  Also, if we draw the line connecting the origin and $(0,3),$ this line makes an angle of $\frac{\pi}{2}$ with the positive $x$-axis.

[asy]
unitsize(0.8 cm);

draw((-0.5,0)--(3.5,0));
draw((0,-0.5)--(0,3.5));
draw(arc((0,0),3,0,90),red,Arrow(6));

dot((0,3), red);
label("$(0,3)$", (0,3), W);
dot((3,0), red);
[/asy]

Therefore, the polar coordinates are $\boxed{\left( 3, \frac{\pi}{2} \right)}.$


\left( 3, \frac{\pi}{2} \right)
