# Qwen3(4B) 메뉴 조합 점수화 노트북 (Unsloth 스타일)

이 노트북은 **Qwen3-4B-Instruct**를 사용해 아래 4개 카테고리에서 1개씩 뽑은 메뉴 조합의 점수를 계산합니다.

- Appetizer
- Main Dish
- Drink
- Dessert

> Colab에서 바로 실행 가능한 형태로 구성했습니다.


In [None]:
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth  # Do this in local & cloud setups
else:
    import torch; v = re.match(r'[\d]{1,}\.[\d]{1,}', str(torch.__version__)).group(0)
    xformers = 'xformers==' + {'2.10':'0.0.34','2.9':'0.0.33.post1','2.8':'0.0.32.post2'}.get(v, "0.0.34")
    !pip install sentencepiece protobuf "datasets==4.3.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth_zoo bitsandbytes accelerate {xformers} peft trl triton unsloth
!pip install transformers==4.56.2
!pip install --no-deps trl==0.22.2

In [None]:
import json
import random
import itertools
from typing import Dict, List

from unsloth import FastLanguageModel
import torch


In [None]:
# 1) 사용자 지정 데이터셋
DATASET = {
    "appetizer": [
        "salad", "corn soup", "miso soup", "house bread", "cheese",
        "cracker", "Scotch Egg", "mashed potato", "nachos", "pasta"
    ],
    "main_dish": [
        "ramen", "pizza", "fried chicken", "sandwich", "T-bone steak",
        "sushi", "taco", "grilled tofu", "fish and chips", "paella"
    ],
    "drink": [
        "coca-cola", "red wine", "white wine", "sake", "green tea",
        "orange juice", "coke zero", "modelo beer", "coffee", "water"
    ],
    "dessert": [
        "orange", "grape", "pudding", "ice cream", "tart",
        "cheesecake", "macaron", "dango", "muffin", "churro"
    ]
}

print({k: len(v) for k, v in DATASET.items()})
print("총 가능한 조합 수:", len(DATASET["appetizer"]) * len(DATASET["main_dish"]) * len(DATASET["drink"]) * len(DATASET["dessert"]))


In [None]:
# 2) Qwen3 로드 (Unsloth template)
max_seq_length = 2048
candidate_models = [
    "unsloth/Qwen3-4B-Instruct-bnb-4bit",
    "unsloth/Qwen3-4B-unsloth-bnb-4bit",
    "Qwen/Qwen3-4B-Instruct",
]

last_error = None
model = None
tokenizer = None
model_name = None

for candidate in candidate_models:
    try:
        model, tokenizer = FastLanguageModel.from_pretrained(
            model_name = candidate,
            max_seq_length = max_seq_length,
            dtype = None,      # 자동 선택
            load_in_4bit = True,
        )
        model_name = candidate
        break
    except Exception as e:
        last_error = e
        print(f"[skip] {candidate}: {e}")

if model is None:
    raise RuntimeError(
        "Qwen3 모델 로드 실패. candidate_models의 이름을 확인하거나,"
        "허깅페이스 접근 권한/네트워크 상태를 확인하세요."
    ) from last_error

FastLanguageModel.for_inference(model)
print("Loaded:", model_name)


In [None]:
SYSTEM_PROMPT = """You are a food pairing evaluator.
Given exactly one appetizer, one main dish, one drink, and one dessert,
return a strict JSON object with fields:
- score: integer from 0 to 100
- reason: short explanation in Korean
Scoring criteria:
1) Flavor harmony (40)
2) Texture balance (20)
3) Temperature/course flow (20)
4) Overall coherence (20)
Do NOT output any analysis, chain-of-thought, or <think> tags.
Return ONLY one-line JSON like: {"score": 82, "reason": "맛의 밸런스가 안정적입니다."}
"""


def build_user_prompt(combo: Dict[str, str]) -> str:
    return (
        "아래 4개 메뉴 조합을 0~100점으로 평가하세요.\n"
        "반드시 JSON 1줄만 출력하세요. 다른 텍스트/태그 금지.\n"
        '형식: {"score": 0~100 정수, "reason": "한국어 한 줄"}\n\n'
        f"appetizer: {combo['appetizer']}\n"
        f"main_dish: {combo['main_dish']}\n"
        f"drink: {combo['drink']}\n"
        f"dessert: {combo['dessert']}"
    )


def parse_json_from_text(text: str):
    import re

    if not text:
        return None

    cleaned = re.sub(r"<think>[\s\S]*?</think>", "", text, flags=re.IGNORECASE).strip()

    # 1) direct JSON parse
    try:
        data = json.loads(cleaned)
        if isinstance(data, dict) and "score" in data and "reason" in data:
            data["score"] = int(float(data["score"]))
            return data
    except Exception:
        pass

    # 2) parse the biggest {...} chunk
    start = cleaned.find('{')
    end = cleaned.rfind('}')
    if start != -1 and end != -1 and end > start:
        chunk = cleaned[start:end + 1]
        for candidate in (chunk, chunk.replace("'", '"')):
            try:
                data = json.loads(candidate)
                if isinstance(data, dict) and "score" in data and "reason" in data:
                    data["score"] = int(float(data["score"]))
                    return data
            except Exception:
                continue

    # 3) heuristic extraction for semi-structured text
    score_m = re.search(r'"?score"?\s*[:=]\s*"?(\d{1,3})"?', cleaned, flags=re.IGNORECASE)
    reason_m = re.search(r'"?reason"?\s*[:=]\s*"([^"\\n]+)"', cleaned, flags=re.IGNORECASE)
    if not reason_m:
        reason_m = re.search(r'reason\s*[:=]\s*(.+)', cleaned, flags=re.IGNORECASE)

    if score_m and reason_m:
        score = max(0, min(100, int(score_m.group(1))))
        reason = reason_m.group(1).strip()
        return {"score": score, "reason": reason}

    return None


def repair_output_to_json(combo: Dict[str, str], raw_text: str, max_new_tokens: int = 128):
    """파싱 실패 시 모델에게 JSON 정규화만 재요청."""
    repair_messages = [
        {
            "role": "system",
            "content": (
                "Convert the following evaluation to strict JSON only. "
                "Output exactly one line: {\"score\": int(0-100), \"reason\": \"한국어 한 줄\"}."
            ),
        },
        {
            "role": "user",
            "content": (
                f"combo={combo}\n"
                f"raw_output={raw_text}"
            ),
        },
    ]

    repair_inputs = tokenizer.apply_chat_template(
        repair_messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
    ).to(model.device)

    with torch.inference_mode():
        repair_outputs = model.generate(
            input_ids=repair_inputs,
            max_new_tokens=max_new_tokens,
            do_sample=False,
            repetition_penalty=1.05,
        )

    repaired = tokenizer.decode(
        repair_outputs[0][repair_inputs.shape[-1]:], skip_special_tokens=True
    )
    return repaired


def evaluate_combo_with_qwen3(combo: Dict[str, str], max_new_tokens: int = 512):
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": build_user_prompt(combo)},
    ]

    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
    ).to(model.device)

    with torch.inference_mode():
        outputs = model.generate(
            input_ids=inputs,
            max_new_tokens=max_new_tokens,
            do_sample=False,
            repetition_penalty=1.05,
        )

    generated = tokenizer.decode(outputs[0][inputs.shape[-1]:], skip_special_tokens=True)
    parsed = parse_json_from_text(generated)

    # 1차 파싱 실패 시 JSON 변환 재요청(결과 안정성 향상)
    if parsed is None:
        repaired = repair_output_to_json(combo, generated)
        reparsed = parse_json_from_text(repaired)
        if reparsed is not None:
            return reparsed, f"{generated}\n[REPAIRED] {repaired}"

    return parsed, generated



In [None]:
# 3) 카테고리별 1개씩 랜덤으로 뽑아 점수 계산

def sample_combo(seed: int = None) -> Dict[str, str]:
    if seed is not None:
        random.seed(seed)
    return {
        "appetizer": random.choice(DATASET["appetizer"]),
        "main_dish": random.choice(DATASET["main_dish"]),
        "drink": random.choice(DATASET["drink"]),
        "dessert": random.choice(DATASET["dessert"]),
    }

combo = sample_combo(seed=42)
result, raw = evaluate_combo_with_qwen3(combo)

print("선택된 조합:")
print(combo)
print("\n모델 원문 출력:")
print(raw)
print("\n파싱 결과:")
print(result)


In [None]:
# (옵션) 랜덤 3개 조합만 평가해서 고득점 순으로 보기

def evaluate_three_random(seed: int = 0) -> List[Dict]:
    random.seed(seed)
    rows = []

    for _ in range(3):
        combo = sample_combo()
        parsed, raw = evaluate_combo_with_qwen3(combo)
        score = parsed.get("score") if parsed else None
        reason = parsed.get("reason") if parsed else f"파싱 실패: {raw[:160]}"
        rows.append({"combo": combo, "score": score, "reason": reason})

    rows.sort(key=lambda x: x["score"] if x["score"] is not None else -1, reverse=True)
    return rows

ranked = evaluate_three_random(seed=7)
for i, row in enumerate(ranked, 1):
    print(f"#{i} | score={row['score']} | {row['combo']}\n  reason={row['reason']}\n")
