<a href="https://colab.research.google.com/github/Mechanics-Mechatronics-and-Robotics/CV-2025/blob/main/Week_14/Hands_on_CoT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Hands-on Chains of Thoughts
Implementation Note:

This notebook was developed using methodologies suggested by the DeepSeek-V3 (DeepSeek, 2024) language model.

In [None]:
# @title Step 1: Install Dependencies
!pip install -q torch transformers accelerate

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import re

In [None]:
# @title Test Cases
problems = [
    "A pizza is cut into 8 slices. If 3 people share equally, how many slices per person?",
    "Буквы а и б сидели на трубе. А упала, б пропала, какая буква осталась на трубе?",
    "A car travels 240 km in 3 hours. What's its speed in km/h?"
]

In [None]:
# @title Load Quantized Model
# Using Microsoft's Phi-2 (2.7B params but fits in T4 GPU with quantization)
model_name = "microsoft/phi-2"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.float16,
    trust_remote_code=True
)

In [None]:
device = next(model.parameters()).device  # e.g. cuda:0

In [None]:
# @title Direct Answer Function
def direct_answer(prompt: str) -> str:
    template = (
        prompt.strip()
        + "\n\nAnswer only with the final value (no explanations):"
    )
    inputs = tokenizer(template, return_tensors="pt").to(device)
    out = model.generate(
        **inputs,
        max_new_tokens=10,
        num_beams=5,
        early_stopping=True,
        pad_token_id=tokenizer.eos_token_id,
    )
    # slice off prompt tokens
    gen_ids = out[0, inputs["input_ids"].shape[-1]:]
    gen_text = tokenizer.decode(gen_ids, skip_special_tokens=True).strip()
    # return the very first whitespace‑delimited token
    return gen_text.split()[0]

In [None]:
def cot_answer(prompt: str) -> str:
    template = (
        "Problem: " + prompt.strip()
        + "\n\nLet's think step by step:"
    )
    inputs = tokenizer(template, return_tensors="pt").to(device)
    out = model.generate(
        **inputs,
        max_new_tokens=200,
        num_beams=1,
        do_sample=True,
        temperature=0.3,
        pad_token_id=tokenizer.eos_token_id,
        early_stopping=True
    )
    gen_ids = out[0, inputs["input_ids"].shape[-1]:]
    return tokenizer.decode(gen_ids, skip_special_tokens=True).strip()

In [None]:
# @title Run Tests
print("=== DIRECT ANSWERS ===")
for q in problems:
    print(f"Q: {q}\nA: {direct_answer(q)}\n")

print("=== CHAIN-OF-THOUGHT ===")
for q in problems:
    print(f"Q: {q}\n{cot_answer(q)}\n" + "-"*40 + "\n")