<a href="https://colab.research.google.com/github/Mechanics-Mechatronics-and-Robotics/CV-2025/blob/main/Week_14/Hands_on_CoT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# @title Step 1: Install Dependencies
!pip install -q torch transformers accelerate
!pip install -q -U bitsandbytes  # Critical fix

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import re

In [None]:
# @title Test Cases
problems = [
    "A pizza is cut into 8 slices. If 3 people share equally, how many slices per person?",
    "If a recipe calls for 3/4 cup of flour and you want to make half the recipe, how much flour do you need?",
    "A car travels 240 km in 3 hours. What's its speed in km/h?"
]

In [None]:
# @title Step 2: Load Quantized Model (Guaranteed to work)


# Using Microsoft's Phi-2 (2.7B params but fits in T4 GPU with quantization)
model_name = "microsoft/phi-2"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.float16,
    trust_remote_code=True
)

In [None]:
# # @title Step 3: Optimized CoT Function
# def generate_cot_response(prompt):
#     cot_prompt = f"""Instruction: {prompt}
#     Response: Let's think step by step:"""  # Force CoT

#     inputs = tokenizer(cot_prompt, return_tensors="pt").to("cuda")
#     outputs = model.generate(**inputs, max_new_tokens=500)
#     return tokenizer.decode(outputs[0], skip_special_tokens=True)
# @title Phase 1: Direct Answering (No CoT)
def direct_answer(prompt):
    # Force ultra-short, no-explanation answers
    inputs = tokenizer(
        f"{prompt} Answer ONLY with the final number or value, no text or explanation:",
        return_tensors="pt"
    ).to("cuda")

    outputs = model.generate(
        **inputs,
        max_new_tokens=10,
        num_beams=1,
        temperature=0.0  # Completely deterministic
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# @title Phase 2: Explicit Chain-of-Thought
def cot_answer(prompt):
    # Demand structured step-by-step working
    inputs = tokenizer(
        f"PROBLEM: {prompt}\n"
        "SOLUTION STEPS:\n"
        "1. ",
        return_tensors="pt"
    ).to("cuda")

    outputs = model.generate(
        **inputs,
        max_new_tokens=200,
        temperature=0.3  # Slight creativity
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [None]:
# @title Step 4: Test with Math Problems
problems = [
    "If a train travels 400 km in 4 hours, what's its speed?",
    "You have 5 books, give 2 to a friend and buy 3 more. How many do you have?"
]

for problem in problems:
    print(f"\nProblem: {problem}")
    print("Solution:", generate_cot_response(problem))

In [None]:
# @title Direct Answering
def direct_answer(prompt):
    # Force ultra-short answers with output constraints
    inputs = tokenizer(
        f"{prompt} Answer ONLY with the final number, no explanation:",
        return_tensors="pt"
    ).to("cuda")

    outputs = model.generate(
        **inputs,
        max_new_tokens=10,
        num_beams=1,  # Disable creative exploration
        temperature=0.0  # Pure deterministic output
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Test
print("Pizza problem:", direct_answer("A pizza is cut into 8 slices..."))
print("Flour problem:", direct_answer("If a recipe calls for 3/4 cup..."))

In [None]:
def cot_explicit(prompt):
    # Demand structured working
    inputs = tokenizer(
        f"PROBLEM: {prompt}\nSOLUTION STEPS:\n1.",
        return_tensors="pt"
    ).to("cuda")

    outputs = model.generate(
        **inputs,
        max_new_tokens=200,
        do_sample=True,  # Allow some creativity
        temperature=0.3
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Test
print("Pizza problem with CoT:")
print(cot_explicit("A pizza is cut into 8 slices..."))

In [None]:
# @title Run Comparisons
print("DIRECT ANSWERS (No CoT):")
for problem in problems:
    print(f"\nQ: {problem}")
    print(f"A: {direct_answer(problem)}")

print("\n\nCHAIN-OF-THOUGHT ANSWERS:")
for problem in problems:
    print(f"\nQ: {problem}")
    print(cot_answer(problem))

In [None]:

# @title Load Model
model_name = "microsoft/phi-2"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.float16,
    trust_remote_code=True
)

# @title Improved Direct Answer Function
def direct_answer(prompt):
    # More constrained prompt
    full_prompt = f"""Question: {prompt}
    Rules:
    1. You must respond with ONLY the final numerical answer
    2. No explanations or additional text
    3. If the answer is a fraction, write it as x/y

    Answer:"""

    inputs = tokenizer(full_prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(
        **inputs,
        max_new_tokens=10,
        num_beams=1,
        temperature=0.0,
        do_sample=False
    )
    # Extract just the number from output
    full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return re.search(r'\d+\.?\d*|\d+/\d+', full_output).group()

# @title Improved CoT Function
def cot_answer(prompt):
    # Structured prompt template
    full_prompt = f"""Solve this problem step-by-step:

    Problem: {prompt}

    Steps:
    1."""

    inputs = tokenizer(full_prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(
        **inputs,
        max_new_tokens=200,
        temperature=0.3,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# @title Test Cases
problems = [
    "A pizza is cut into 8 slices. If 3 people share equally, how many slices per person?",
    "If a recipe calls for 3/4 cup of flour and you want to make half the recipe, how much flour do you need?",
    "A car travels 240 km in 3 hours. What's its speed in km/h?"
]

# @title Run Tests
print("=== DIRECT ANSWERS ===")
for p in problems:
    print(f"\nQ: {p}")
    print(f"A: {direct_answer(p)}")

print("\n\n=== CHAIN-OF-THOUGHT ===")
for p in problems:
    print(f"\nQ: {p}")
    print(cot_answer(p))