In [None]:
pip install transformers datasets openai accelerate

In [None]:
!pip install --upgrade datasets fsspec huggingface_hub

# STEP 2: Load Dataset – GSM8K
from datasets import load_dataset

gsm8k = load_dataset("gsm8k", "main", download_mode="force_redownload")
sample_problems = gsm8k['train'].select(range(10))

print(sample_problems)

# STEP 3: Load Models
from transformers import pipeline

# Use device_map="auto" for automatic GPU/CPU allocation
qwen = pipeline("text-generation", model="Qwen/Qwen1.5-7B-Chat", device_map="auto")
deepseek = pipeline("text-generation", model="deepseek-ai/deepseek-math-7b-base", device_map="auto")

# STEP 4: Prompting Functions

def direct_prompt(question):
    return f"Q: {question}\nA:"

def cot_prompt(question):
    return f"Q: {question}\nLet's think step by step.\nA:"

# STEP 5: Run Experiments

def run_agent(model_pipeline, question, use_cot=True):
    prompt = cot_prompt(question) if use_cot else direct_prompt(question)
    result = model_pipeline(prompt, max_new_tokens=256)[0]['generated_text']
    return result

# STEP 6: Log Results for Evaluation

results = []

for item in sample_problems:
    q = item['question']
    a = item['answer']
    for model_name, model_pipe in [("Qwen", qwen), ("DeepSeek", deepseek)]:
        for mode in ["direct", "cot"]:
            out = run_agent(model_pipe, q, use_cot=(mode == "cot"))
            results.append({
                "model": model_name,
                "mode": mode,
                "question": q,
                "expected": a,
                "response": out
            })
