Devpick-Org · suheon98 · Apr 24, 2026 · Apr 24, 2026
diff --git a/app/core/prompts/trend_collection.py b/app/core/prompts/trend_collection.py
@@ -0,0 +1,113 @@
+"""수집 동향 서사 요약 프롬프트 (DP-384)."""
+
+from __future__ import annotations
+
+from typing import Any
+
+SYSTEM_PROMPT = """\
+당신은 개발자 커뮤니티 수집 동향 분석 전문가입니다.
+
+주어진 기간의 수집 통계(태그 빈도·증감, 주요 키워드, 글 수)를 바탕으로
+개발자 커뮤니티가 어떤 주제를 다루고 있는지 1~3 문단의 서사 요약을 작성합니다.
+
+## 작성 원칙
+- 수집 글 수, 태그 빈도, 증감률 등 수치를 자연스럽게 인용해 전문성을 높입니다
+  - 예: "47편의 글 수집", "전주 대비 +23.7%", "Kubernetes(+50%)"
+- 태그 증감/신규를 흐름으로 연결합니다
+  - state=new → "이번 기간 새롭게 등장"
+  - state=up  → "관심 급증", "전 기간 대비 X% 성장"
+  - state=down → "관심 감소"
+- 핵심 키워드는 "이번 기간 수집 글 전반에서 자주 등장한 표현"으로 자연스럽게 서술합니다
+  - "TF-IDF", "통계 분석" 같은 기술 명칭은 출력에 포함하지 않습니다
+  - 예: "이번 주 수집 글에서 자주 언급된 키워드로는 ..."
+  - 키워드를 흐름 서술에 녹여 씁니다 — 단순 나열 금지
+- 광고성·추천성 표현을 사용하지 않습니다
+- 특정 글을 직접 언급하지 않습니다
+- 200~400자 내외로 작성합니다
+- 추천 액션은 출력하지 않습니다 (Insight/주간 리포트와 역할 분리)
+- 이전 기간 요약이 제공된 경우, 마지막 문단에서 이전 기간과의 차이점을 자연스럽게 서술합니다
+- 이전 기간 요약이 없으면 현재 기간 서술만 합니다
+"""
+
+TOOL_SAVE_COLLECTION_SUMMARY = {
+    "name": "save_collection_summary",
+    "description": "수집 동향 서사 요약을 저장한다.",
+    "input_schema": {
+        "type": "object",
+        "properties": {
+            "collection_summary": {
+                "type": "string",
+                "description": "수집 동향 서사 요약 (200~400자, 1~3 문단, 수치 포함)",
+            }
+        },
+        "required": ["collection_summary"],
+    },
+}
+
+
+def build_user_prompt(signals: Any) -> str:
+    """TrendSignals → LLM 사용자 프롬프트 문자열 변환."""
+    unit = signals.unit
+    if unit == "weekly":
+        period_label = "이번 주"
+    elif unit == "monthly":
+        period_label = "이번 달"
+    else:
+        period_label = "이번 기간"
+
+    lines: list[str] = [
+        f"기간: {period_label} ({signals.period_start} ~ {signals.period_end})",
+        "",
+        "## 수집 현황",
+        f"- 이번 기간: {signals.cur_content_count}편",
+        f"- 이전 기간: {signals.prev_content_count}편",
+    ]
+
+    if signals.prev_content_count > 0:
+        delta = signals.cur_content_count - signals.prev_content_count
+        rate = delta / signals.prev_content_count * 100
+        lines.append(f"- 증감: {delta:+d}편 ({rate:+.1f}%)")
+    lines.append("")
+
+    if signals.top_tags:
+        lines.append("## Top 10 태그 (이번 기간 빈도 순)")
+        lines.append("태그 | 이번 | 이전 | 증감 | 상태")
+        for tf in signals.top_tags:
+            delta_str = f"{tf.delta:+d}" if tf.delta != 0 else "0"
+            lines.append(
+                f"{tf.keyword} | {tf.cur_count} | {tf.prev_count}"
+                f" | {delta_str} | {tf.state}"
+            )
+        lines.append("")
+
+        new_tags = [tf.keyword for tf in signals.top_tags if tf.state == "new"]
+        if new_tags:
+            lines.append("## 신규 등장 태그")
+            lines.append(", ".join(new_tags))
+            lines.append("")
+
+        up_tags = sorted(
+            [tf for tf in signals.top_tags if tf.state == "up"],
+            key=lambda x: x.delta,
+            reverse=True,
+        )[:5]
+        if up_tags:
+            lines.append("## 급증 태그 Top 5 (state=up, delta 상위)")
+            for tf in up_tags:
+                rate_str = (
+                    f"+{tf.growth_rate:.1f}%" if tf.growth_rate is not None else "신규"
+                )
+                lines.append(f"- {tf.keyword}: +{tf.delta}건 ({rate_str})")
+            lines.append("")
+
+    if signals.tfidf_keywords:
+        lines.append("## 이번 기간 주요 키워드 (수집 글 전반 빈도 기반, 상위 15)")
+        lines.append(", ".join(signals.tfidf_keywords[:15]))
+        lines.append("")
+
+    if signals.prev_summary:
+        lines.append("## 이전 기간 collection_summary (참고용)")
+        lines.append(signals.prev_summary)
+        lines.append("")
+
+    return "\n".join(lines).rstrip()
diff --git a/app/services/trend/collection_summary.py b/app/services/trend/collection_summary.py
@@ -0,0 +1,95 @@
+"""수집 동향 서사 요약 생성 서비스 (DP-384)."""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass, field
+
+import boto3
+from botocore.config import Config
+from botocore.exceptions import ClientError, EndpointConnectionError, ReadTimeoutError
+
+from app.core.bedrock import to_tool_config
+from app.core.exceptions import AIInternalError
+from app.core.prompts.trend_collection import (
+    SYSTEM_PROMPT,
+    TOOL_SAVE_COLLECTION_SUMMARY,
+    build_user_prompt,
+)
+from app.services.trend.frequency import TagFrequency
+
+logger = logging.getLogger(__name__)
+
+_TOOL_NAME = "save_collection_summary"
+
+
+@dataclass
+class TrendSignals:
+    """CollectionSummaryGenerator 입력 데이터 — 오케스트레이터(DP-386)가 조립."""
+
+    unit: str
+    period_start: str
+    period_end: str
+    cur_content_count: int
+    prev_content_count: int = 0
+    top_tags: list[TagFrequency] = field(default_factory=list)
+    tfidf_keywords: list[str] = field(default_factory=list)
+    prev_summary: str | None = None
+
+
+class CollectionSummaryGenerator:
+    """기간 수집 통계를 기반으로 개발자 커뮤니티 동향을 LLM이 서사 요약한다."""
+
+    def __init__(
+        self,
+        aws_region: str = "ap-northeast-2",
+        model: str = "global.anthropic.claude-sonnet-4-6",
+    ) -> None:
+        self._client = boto3.client(
+            "bedrock-runtime",
+            region_name=aws_region,
+            config=Config(read_timeout=120, retries={"max_attempts": 0}),
+        )
+        self._model = model
+
+    def generate(self, signals: TrendSignals) -> str | None:
+        """수집 동향 서사 요약을 생성한다.
+
+        unit="daily" → None 반환 (collection_summary는 주/월 단위만).
+        LLM 실패 → None 반환 (스냅샷 저장 계속 진행).
+        tool_use 블록 없음 → AIInternalError 전파.
+        """
+        if signals.unit == "daily":
+            return None
+
+        user_prompt = build_user_prompt(signals)
+
+        try:
+            response = self._client.converse(
+                modelId=self._model,
+                system=[
+                    {
+                        "text": SYSTEM_PROMPT,
+                        "cacheControl": {"type": "ephemeral"},
+                    }
+                ],
+                messages=[{"role": "user", "content": [{"text": user_prompt}]}],
+                toolConfig=to_tool_config(TOOL_SAVE_COLLECTION_SUMMARY, _TOOL_NAME),
+                inferenceConfig={"maxTokens": 1024, "temperature": 0.4},
+            )
+        except ReadTimeoutError as exc:
+            logger.warning("collection_summary 타임아웃 — skip: %s", exc)
+            return None
+        except EndpointConnectionError as exc:
+            logger.warning("collection_summary 연결 실패 — skip: %s", exc)
+            return None
+        except ClientError as exc:
+            logger.warning("collection_summary LLM 에러 — skip: %s", exc)
+            return None
+
+        content_blocks = response["output"]["message"]["content"]
+        tool_use_block = next((b for b in content_blocks if "toolUse" in b), None)
+        if not tool_use_block:
+            raise AIInternalError("tool_use 블록 없음")
+
+        return tool_use_block["toolUse"]["input"].get("collection_summary")
diff --git a/tests/test_trend_collection_summary.py b/tests/test_trend_collection_summary.py
@@ -0,0 +1,154 @@
+"""CollectionSummaryGenerator 단위 테스트 — mock 기반, 실제 API 호출 없음 (DP-384)."""
+
+from __future__ import annotations
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+from botocore.exceptions import EndpointConnectionError
+
+from app.core.exceptions import AIInternalError
+from app.services.trend.collection_summary import (
+    CollectionSummaryGenerator,
+    TrendSignals,
+)
+from app.services.trend.frequency import TagFrequency
+
+
+def _make_generator(
+    summary_text: str = "이번 주 수집 동향 요약",
+) -> tuple[CollectionSummaryGenerator, MagicMock]:
+    with patch("boto3.client"):
+        gen = CollectionSummaryGenerator(aws_region="us-east-1")
+
+    mock_client = MagicMock()
+    mock_client.converse.return_value = {
+        "output": {
+            "message": {
+                "content": [
+                    {
+                        "toolUse": {
+                            "toolUseId": "tool-1",
+                            "name": "save_collection_summary",
+                            "input": {"collection_summary": summary_text},
+                        }
+                    }
+                ]
+            }
+        }
+    }
+    gen._client = mock_client
+    return gen, mock_client
+
+
+def _make_signals(unit: str = "weekly", **kwargs) -> TrendSignals:
+    defaults = dict(
+        unit=unit,
+        period_start="2026-04-14",
+        period_end="2026-04-21",
+        cur_content_count=47,
+        prev_content_count=38,
+        top_tags=[
+            TagFrequency(
+                keyword="Kubernetes",
+                cur_count=12,
+                prev_count=8,
+                delta=4,
+                growth_rate=50.0,
+                state="up",
+            )
+        ],
+        tfidf_keywords=["서비스 메시", "무중단 배포", "헬스 체크"],
+    )
+    defaults.update(kwargs)
+    return TrendSignals(**defaults)
+
+
+# ── 정상 흐름 ─────────────────────────────────────────────────────────────────
+
+
+def test_generate_returns_summary_on_success() -> None:
+    gen, _ = _make_generator("이번 주 수집 동향 요약")
+
+    result = gen.generate(_make_signals())
+
+    assert result == "이번 주 수집 동향 요약"
+
+
+# ── daily skip ────────────────────────────────────────────────────────────────
+
+
+def test_generate_returns_none_for_daily() -> None:
+    gen, mock_client = _make_generator()
+
+    result = gen.generate(_make_signals(unit="daily"))
+
+    assert result is None
+    mock_client.converse.assert_not_called()
+
+
+# ── LLM 실패 → None ──────────────────────────────────────────────────────────
+
+
+def test_generate_returns_none_on_llm_failure() -> None:
+    gen, mock_client = _make_generator()
+    mock_client.converse.side_effect = EndpointConnectionError(
+        endpoint_url="http://test"
+    )
+
+    result = gen.generate(_make_signals())
+
+    assert result is None
+
+
+# ── 프롬프트 검증 ─────────────────────────────────────────────────────────────
+
+
+def test_generate_prompt_includes_content_count() -> None:
+    gen, mock_client = _make_generator()
+
+    gen.generate(_make_signals(cur_content_count=47, prev_content_count=38))
+
+    user_text = mock_client.converse.call_args.kwargs["messages"][0]["content"][0][
+        "text"
+    ]
+    assert "47" in user_text
+    assert "38" in user_text
+
+
+def test_generate_prompt_includes_tag_stats() -> None:
+    gen, mock_client = _make_generator()
+
+    gen.generate(_make_signals())
+
+    user_text = mock_client.converse.call_args.kwargs["messages"][0]["content"][0][
+        "text"
+    ]
+    assert "Kubernetes" in user_text
+    assert "12" in user_text
+
+
+def test_generate_with_prev_summary_includes_in_prompt() -> None:
+    gen, mock_client = _make_generator()
+    prev = "지난 주에는 React 중심의 프론트엔드 글이 많았습니다."
+
+    gen.generate(_make_signals(prev_summary=prev))
+
+    user_text = mock_client.converse.call_args.kwargs["messages"][0]["content"][0][
+        "text"
+    ]
+    assert "이전 기간 collection_summary" in user_text
+    assert prev in user_text
+
+
+# ── tool_use 없음 → AIInternalError ──────────────────────────────────────────
+
+
+def test_generate_raises_on_missing_tool_use() -> None:
+    gen, mock_client = _make_generator()
+    mock_client.converse.return_value = {
+        "output": {"message": {"content": [{"text": "일반 텍스트 응답"}]}}
+    }
+
+    with pytest.raises(AIInternalError):
+        gen.generate(_make_signals())