# L02: Prompting Strategies Lab

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Digital-AI-Finance/agentic-artificial-intelligence/blob/main/L02_LLM_Foundations_Agents/notebooks/L02_prompting_strategies.ipynb)

**Week 2 - LLM Foundations for Agents**

## Learning Objectives
- Implement Chain-of-Thought, Tree-of-Thoughts, and Self-Consistency
- Compare prompting strategies on reasoning tasks
- Measure accuracy vs. cost trade-offs

In [None]:
# Colab setup
import sys
if 'google.colab' in sys.modules:
    !pip install -q openai python-dotenv
    from google.colab import userdata
    import os
    os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')

from openai import OpenAI
from dotenv import load_dotenv
import json
from collections import Counter

load_dotenv()
client = OpenAI()
print("Ready")

## 1. Chain-of-Thought Prompting

In [None]:
def zero_shot_cot(question: str) -> str:
    """Zero-shot Chain-of-Thought prompting."""
    prompt = f"{question}\n\nLet's think step by step:"
    
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}],
        temperature=0,
        max_tokens=500
    )
    return response.choices[0].message.content

# Test
question = "Roger has 5 tennis balls. He buys 2 cans of 3 balls each. How many tennis balls does he have now?"
print("Question:", question)
print("\nChain-of-Thought Response:")
print(zero_shot_cot(question))

## 2. Self-Consistency Decoding

In [None]:
import re

def extract_answer(response: str) -> str:
    """Extract numerical answer from response."""
    numbers = re.findall(r'\b(\d+)\b', response)
    return numbers[-1] if numbers else None

def self_consistency(question: str, n_samples: int = 5) -> str:
    """Self-consistency with majority voting."""
    prompt = f"{question}\n\nLet's think step by step:"
    answers = []
    
    for _ in range(n_samples):
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.7,  # Higher temperature for diversity
            max_tokens=300
        )
        answer = extract_answer(response.choices[0].message.content)
        if answer:
            answers.append(answer)
    
    # Majority vote
    if answers:
        vote_counts = Counter(answers)
        return vote_counts.most_common(1)[0][0]
    return None

# Test
print("Self-Consistency (5 samples):")
result = self_consistency(question)
print(f"Final Answer: {result}")

## 3. Tree-of-Thoughts (Simplified)

In [None]:
def generate_thoughts(question: str, n_thoughts: int = 3) -> list:
    """Generate multiple initial thoughts."""
    prompt = f"""Question: {question}

Generate {n_thoughts} different initial approaches to solve this problem.
Format as:
Approach 1: ...
Approach 2: ...
Approach 3: ..."""
    
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.7,
        max_tokens=400
    )
    return response.choices[0].message.content

def evaluate_thought(question: str, thought: str) -> float:
    """Evaluate a thought's promise (0-1)."""
    prompt = f"""Question: {question}
Proposed approach: {thought}

Rate this approach's likelihood of success (0.0 to 1.0).
Respond with only the number."""
    
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}],
        temperature=0,
        max_tokens=10
    )
    try:
        return float(response.choices[0].message.content.strip())
    except:
        return 0.5

# Test
print("Tree-of-Thoughts Exploration:")
thoughts = generate_thoughts(question)
print(thoughts)

## 4. Comparison Experiment

Compare strategies on GSM8K-style problems.

In [None]:
test_problems = [
    {"q": "A baker has 24 muffins. He sells 1/3 of them in the morning. How many are left?", "a": "16"},
    {"q": "Lisa has $50. She buys 3 books at $12 each. How much money does she have left?", "a": "14"},
    {"q": "A train travels 60 mph for 2.5 hours. How far does it go?", "a": "150"},
]

def evaluate_strategy(strategy_fn, problems, strategy_name):
    """Evaluate a prompting strategy on test problems."""
    correct = 0
    for p in problems:
        result = strategy_fn(p["q"])
        answer = extract_answer(result) if isinstance(result, str) else result
        if answer == p["a"]:
            correct += 1
    accuracy = correct / len(problems)
    print(f"{strategy_name}: {correct}/{len(problems)} = {accuracy:.1%}")
    return accuracy

# Run comparison
print("Strategy Comparison:")
print("="*40)
evaluate_strategy(zero_shot_cot, test_problems, "Zero-shot CoT")
evaluate_strategy(self_consistency, test_problems, "Self-Consistency")

## 5. Key Takeaways

- **Zero-shot CoT**: Simple, low cost, good for many tasks
- **Self-Consistency**: Higher accuracy, but ~5x cost
- **Tree-of-Thoughts**: Best for complex planning, highest cost

For agents: Use CoT by default, Self-Consistency for critical decisions.