<a href="https://colab.research.google.com/github/201524495/201524495/blob/main/llm_reasoning_cot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


# LLM Reasoning Lab (CoT · GRPO)
실습 내용:
- **Chain-of-Thought (CoT)**: zero-shot / few-shot / CoT 프롬프트 사용 및 모델 성능 비교




In [None]:
# 필요한 것만 설치하세요
# !pip install -q openai
# !pip install -q huggingface_hub

import os, random, numpy as np
USE_PROVIDER = os.getenv("USE_PROVIDER", "openai")

# === TODO: 키 설정  ===
os.environ["OPENAI_API_KEY"] = "YOUR OPENAI API KEY"

# 기본 모델
OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini")

TEMPERATURE = float(os.getenv("TEMPERATURE", "0.2"))
MAX_TOKENS = int(os.getenv("MAX_TOKENS", "1000"))
SEED = 42
random.seed(SEED); np.random.seed(SEED)


## 1) LLM 호출 래퍼 (OpenAI API)
로그인 후 API key 발급

https://openai.com/ko-KR/index/openai-api/

In [None]:
from typing import List, Dict

def format_messages(messages: List[Dict[str, str]]) -> str:
    """messages 리스트를 사람이 읽기 좋은 문자열로 변환"""
    formatted = []
    for m in messages:
        role = m.get("role", "")
        content = m.get("content", "")
        # content가 list인 경우 처리
        if isinstance(content, list):
            text_parts = [c.get("text", "") for c in content if isinstance(c, dict) and c.get("type") == "text"]
            content = "\n".join(text_parts)
        formatted.append(f"{role.upper()}:\n{content.strip()}\n")
    return "\n".join(formatted)


def run_model(messages: List[Dict[str,str]],
              temperature: float = TEMPERATURE,
              max_tokens: int = MAX_TOKENS,
              provider: str = USE_PROVIDER, debug=False) -> str:
      # OpenAI SDK (>=1.0)
      if debug:
          print("=== Model Input ===")
          print(format_messages(messages))
          print("===================\n")

      try:
          from openai import OpenAI
          client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
          # Prefer Responses API; fallback to Chat Completions
          try:
              resp = client.responses.create(
                  model=OPENAI_MODEL,
                  input=messages,
                  temperature=temperature,
                  max_output_tokens=max_tokens,
              )

              # Extract plain text
              for item in resp.output:
                  if item.type == "message":
                      for c in item.content:
                          if c.type == "output_text":
                              if debug:
                                  print("=== Model Output (OpenAI Responses API) ===")
                                  print(c.text)  # 전체 응답 객체 출력
                                  print("===========================================\n")
                                  return
                              return c.text


              return str(resp)
          except Exception:
              chat = client.chat.completions.create(
                  model=OPENAI_MODEL,
                  messages=messages,
                  temperature=temperature,
                  max_tokens=max_tokens,
              )
              return chat.choices[0].message.content
      except Exception as e:
          return f"[OpenAI error] {e}"

## 2) 미니 데이터셋 (10문제)


In [None]:
DATA = [
    {"id": 1, "question": "3 red apples and 5 green apples. You eat 2 green apples. How many apples are left?", "answer": "6"},
    {"id": 2, "question": "Tom has 12 candies and gives 3 to Sara and 4 to Jim. How many candies now?", "answer": "5"},
    {"id": 3, "question": "A train travels 60 km in 1 hour. How far in 3 hours?", "answer": "180"},
    {"id": 4, "question": "If a dozen eggs cost $6, how much do 3 dozens cost?", "answer": "18"},
    {"id": 5, "question": "Jenny read 15 pages on Mon and 23 on Tue. She wants 50 total. Pages left?", "answer": "12"},
    {"id": 6, "question": "There are 10 oranges. You buy 7 more and give away 5. How many now?", "answer": "12"},
    {
        "id": 7,
        "question": "An item costs 15,000 won. A 10% discount is applied, then a 10% tax is added on the discounted price. What is the final price (won)?",
        "answer": "14850"
    },
    {
        "id": 8,
        "question": "A student’s average after 4 tests is 82. What score is needed on the 5th test to raise the average to 85?",
        "answer": "97"
    },
    {
        "id": 9,
        "question": "Worker A can finish a job in 10 hours, and B in 5 hours. They work together for 2 hours, then B leaves. How many more hours does A need to finish?",
        "answer": "4"
    },
    {
        "id": 10,
        "question": "You have 6 liters of a 25% acid solution. How many liters of water must be added to make it a 15% solution?",
        "answer": "4"
    },
]

## 3) Zero-shot / Few-shot / CoT


In [None]:
SYSTEM_PROMPT = "You are a helpful reasoning assistant. Always answer with the final numeric answer only."
q = DATA[0]['question']
messages = [{"role":"system","content":SYSTEM_PROMPT},
                {"role":"user","content":q}]

messages

In [None]:
print(format_messages(messages))

In [None]:
def ask_zero_shot(q: str, debug=False):
    messages = [{"role":"system","content":SYSTEM_PROMPT},
                {"role":"user","content":q}]

    return run_model(messages, debug=debug)

FEW_SHOTS = [
    {"role":"user","content":"There are 2 apples and 3 oranges. Total?"},
    {"role":"assistant","content":"5"},
    {"role":"user","content":"You had 10 candies and gave 4 away. How many now?"},
    {"role":"assistant","content":"6"},
]

def ask_few_shot(q: str, debug=False):
    ## ToDo: few-shot prompt 구현
    msgs = None
    return run_model(msgs, debug=debug)


In [None]:
import re
COT_PROMPT = """
Solve the problem step by step. Show your reasoning.
Return the final numeric answer after the tag <final> like: <final>ANSWER</final>.
"""
def extract_final(text: str) -> str:
    m = re.search(r"<final>\s*(.*?)\s*</final>", text, flags=re.I|re.S)
    return m.group(1).strip() if m else text.strip()

def ask_cot(q: str, debug=False):
    ## ToDo: CoT prompt 구현
    msgs = None
    return run_model(messages, debug=debug)


In [None]:
print(DATA[0]['question'])

In [None]:
# zero-shot response
ask_zero_shot(DATA[0]['question'], debug=True)

In [None]:
# few-shot response (in-context learning)
ask_few_shot(DATA[0]['question'], debug=True)

In [None]:
# CoT response
ask_cot(DATA[0]['question'], debug=True)

In [None]:
from statistics import mean
def evaluate(fn):
    preds, golds = [], []
    for ex in DATA:
        out = fn(ex["question"])
        if fn.__name__ in ("ask_cot","ask_structured","ask_react"):
            out = extract_final(out)
        preds.append(out.strip()); golds.append(ex["answer"].strip())
    acc = mean([p==g for p,g in zip(preds,golds)])
    return acc, list(zip([d["id"] for d in DATA], preds, golds))


In [None]:
# 예시 실행
acc0, _ = evaluate(ask_zero_shot)
acc1, _ = evaluate(ask_few_shot)
acc2, _ = evaluate(ask_cot)
print(acc0, acc1, acc2)


### 정답

In [None]:
def ask_few_shot(q: str, debug=False):
    msgs = [{"role":"system","content":SYSTEM_PROMPT}] + FEW_SHOTS + [{"role":"user","content":q}]
    return run_model(msgs, debug=debug)


def ask_cot(q: str, debug=False):
    messages = [{"role":"system","content":"You are a helpful reasoning assistant."},
                {"role":"user","content": q + "\n\n" + COT_PROMPT}]
    return run_model(messages, debug=debug)
