python3: can't open file '/content/risk-light-model-v2.py': [Errno 2] No such file or directory


## 250810 0.3ver(최종)

In [88]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from __future__ import annotations
import os, io, sys, json, zipfile, time, hashlib, threading, queue
import requests
from datetime import datetime, timedelta, timezone
from typing import List, Dict, Any, Optional, Tuple
import xml.etree.ElementTree as ET
import argparse

# OpenAI (키는 환경변수 OPENAI_API_KEY 로 주입)
try:
    from openai import OpenAI
except Exception:
    OpenAI = None  # 라이브러리 미설치 환경 대비

# -----------------------------
# 설정
# -----------------------------
KST = timezone(timedelta(hours=9))
HTTP_TIMEOUT = 15
DEFAULT_DAYS = 50
DEFAULT_MAX_ITEMS = 40
USER_AGENT = "CorpSentimentCLI/1.0 (+https://example.local)"

DART_API_KEY = os.getenv("DART_API_KEY", "")
NAVER_ID = os.getenv("NAVER_CLIENT_ID", "")
NAVER_SECRET = os.getenv("NAVER_CLIENT_SECRET", "")
NO_FINBERT = os.getenv("NO_FINBERT", "0") == "1"  # 환경변수로 비활성화 가능
FINBERT_MAX_LOAD_SEC = int(os.getenv("FINBERT_MAX_LOAD_SEC", "10"))
client = OpenAI(api_key="")



In [91]:


# -----------------------------
# 공통 유틸
# -----------------------------
def _sha1(text: str) -> str:
    import hashlib
    return hashlib.sha1((text or "").encode("utf-8", errors="ignore")).hexdigest()

def to_kst(dt: datetime) -> datetime:
    return (dt.replace(tzinfo=timezone.utc) if dt.tzinfo is None else dt).astimezone(KST)

def parse_rfc2822_date(s: str) -> Optional[datetime]:
    try:
        from email.utils import parsedate_to_datetime
        return parsedate_to_datetime(s)
    except Exception:
        return None

def clip_days(dt: datetime, days: int = DEFAULT_DAYS) -> bool:
    return (to_kst(datetime.utcnow()) - to_kst(dt)) <= timedelta(days=days)

def _is_spc_like(name: str) -> bool:
    low = (name or "").lower()
    return any(p.lower() in low for p in SPC_PATTERNS)

def _reason_from_metrics(corp_name: str, neg_ratio: float, red_hits: int, label: str, news_count: int) -> str:
    """OPENAI_API_KEY 없거나 GPT 실패 시 폴백 한 줄 설명."""
    if news_count == 0:
        return f"최근 기사 신호가 없어 불확실성이 커 {label} 판단이며, 추이를 관망하는 편이 안전합니다."
    if label == "긍정":
        return f"최근 보도에서 부정 신호가 드물어({neg_ratio*100:.1f}%·레드키워드 {red_hits}건) 전반적으로 {label}입니다."
    if label == "중립":
        return f"보도 내 긍·부정 신호가 혼재해({neg_ratio*100:.1f}%·레드키워드 {red_hits}건) {label}으로 판단됩니다."
    return f"부정 신호 비중이 높아({neg_ratio*100:.1f}%·레드키워드 {red_hits}건) {label}입니다."

# -----------------------------
# 1) DART corpCode 로딩/검색
# -----------------------------
def load_corp_list_from_dart(DART_API_KEY: str) -> List[Dict[str, str]]:
    if not DART_API_KEY:
        raise RuntimeError("❌ DART_API_KEY 환경변수가 필요합니다.")
    print("⏳ DART 기업목록 로딩 중...", flush=True)
    api_key = DART_API_KEY  # 오타/스코프 안전 보정
    url = f"https://opendart.fss.or.kr/api/corpCode.xml?crtfc_key={api_key}"
    headers = {"User-Agent": USER_AGENT}
    res = requests.get(url, headers=headers, timeout=HTTP_TIMEOUT)
    res.raise_for_status()
    zf = zipfile.ZipFile(io.BytesIO(res.content))
    with zf.open("CORPCODE.xml") as f:
        tree = ET.parse(f)
    root = tree.getroot()
    out = []
    for el in root.findall("list"):
        out.append({
            "corp_code": (el.findtext("corp_code") or "").strip(),
            "corp_name": (el.findtext("corp_name") or "").strip(),
            "stock_code": (el.findtext("stock_code") or "").strip(),
            "modify_date": (el.findtext("modify_date") or "").strip(),
        })
    print(f"✅ 기업목록 {len(out):,}건 로드 완료", flush=True)
    return out

def find_corp_candidates(corps: List[Dict[str, str]], keyword: str, limit: int = 20) -> List[Dict[str, str]]:
    kw = keyword.strip().lower()
    print(f"🔎 후보 검색: '{keyword}'", flush=True)
    cand = [c for c in corps if kw in c["corp_name"].lower()]
    def score(c):
        name = c["corp_name"]; sc = 0
        if name == keyword: sc += 100
        if name.startswith(keyword): sc += 50
        if c.get("stock_code"): sc += 10
        sc += max(0, 20 - abs(len(name) - len(keyword)))
        return -sc
    cand.sort(key=score)
    print(f"✅ 후보 {len(cand)}건", flush=True)
    return cand[:limit]

# -----------------------------
# 2) 뉴스 수집
# -----------------------------
def fetch_news_naver(query: str, display: int = DEFAULT_MAX_ITEMS) -> List[Dict[str, Any]]:
    if not (NAVER_ID and NAVER_SECRET):
        raise RuntimeError("NAVER 키 없음")
    print(f"⏳ NAVER 뉴스 수집: '{query}'", flush=True)
    headers = {
        "X-Naver-Client-Id": NAVER_ID,
        "X-Naver-Client-Secret": NAVER_SECRET,
        "User-Agent": USER_AGENT
    }
    params = {"query": query, "display": min(100, display), "start": 1, "sort": "date"}
    url = "https://openapi.naver.com/v1/search/news.json"
    r = requests.get(url, headers=headers, params=params, timeout=HTTP_TIMEOUT)
    r.raise_for_status()
    data = r.json()
    items = []
    for it in data.get("items", []):
        dt = parse_rfc2822_date(it.get("pubDate","")) or to_kst(datetime.utcnow())
        if not clip_days(dt, DEFAULT_DAYS):
            continue
        title = (it.get("title") or "").replace("<b>","").replace("</b>","")
        desc  = (it.get("description") or "").replace("<b>","").replace("</b>","")
        link  = it.get("link") or it.get("originallink") or ""
        items.append({"title": title, "description": desc, "link": link, "date": dt.isoformat()})
        if len(items) >= DEFAULT_MAX_ITEMS:
            break
    print(f"✅ NAVER {len(items)}건", flush=True)
    return items

def fetch_news_google_rss(query: str, max_items: int = DEFAULT_MAX_ITEMS) -> List[Dict[str, Any]]:
    print(f"⏳ Google RSS 수집(백업): '{query}'", flush=True)
    q = requests.utils.quote(query)
    url = f"https://news.google.com/rss/search?q={q}+when:{DEFAULT_DAYS}d&hl=ko&gl=KR&ceid=KR:ko"
    headers = {"User-Agent": USER_AGENT}
    r = requests.get(url, headers=headers, timeout=HTTP_TIMEOUT)
    r.raise_for_status()
    root = ET.fromstring(r.text)
    ch = root.find("channel")
    out = []
    if ch is None: return out
    for item in ch.findall("item"):
        title = item.findtext("title") or ""
        desc  = item.findtext("description") or ""
        link  = item.findtext("link") or ""
        pub   = item.findtext("{http://purl.org/dc/elements/1.1/}date") or item.findtext("pubDate") or ""
        dt    = parse_rfc2822_date(pub) or to_kst(datetime.utcnow())
        if not clip_days(dt, DEFAULT_DAYS):
            continue
        out.append({"title": title, "description": desc, "link": link, "date": dt.isoformat()})
        if len(out) >= max_items:
            break
    print(f"✅ Google RSS {len(out)}건", flush=True)
    return out

def fetch_news(query: str) -> List[Dict[str, Any]]:
    try:
        items = fetch_news_naver(query, display=DEFAULT_MAX_ITEMS)
        if items:
            return items
    except Exception as e:
        print(f"⚠️ NAVER 실패: {e}", flush=True)
    try:
        return fetch_news_google_rss(query, max_items=DEFAULT_MAX_ITEMS)
    except Exception as e:
        print(f"⚠️ Google RSS 실패: {e}", flush=True)
        return []

# ---------------------------
# 3) 펀더멘털 리스크 점수 (기존 함수 유지)
# ---------------------------
def score_fundamental(corp_code: str, prefer_year: int | None = None) -> dict:
    import requests
    from datetime import datetime
    BASE = "https://opendart.fss.or.kr"
    REPRT_PRIORITY = ["11014", "11012", "11013"]  # 3Q > 2Q > 1Q
    API_KEY = os.getenv("DART_API_KEY", "")
    if not API_KEY:
        return {"year": prefer_year or datetime.now().year, "reprt_code": "11011", "flag": 0, "reasons": []}

    session = requests.Session()
    def _j(path, **kw):
        p = {"crtfc_key": API_KEY}; p.update(kw)
        try:
            r = session.get(f"{BASE}{path}", params=p, timeout=HTTP_TIMEOUT)
            r.raise_for_status()
            data = r.json()
            if str(data.get("status", "000")) != "000":
                return []
            return data.get("list", []) or []
        except Exception:
            return []

    y0 = prefer_year or datetime.now().year
    chosen = None
    for y in [y0, y0 - 1]:
        for rc in REPRT_PRIORITY:
            if any([
                _j("/api/empSttus.json", corp_code=corp_code, bsns_year=str(y), reprt_code=rc),
                _j("/api/mrhlSttus.json", corp_code=corp_code, bsns_year=str(y), reprt_code=rc),
                _j("/api/hyslrChgSttus.json", corp_code=corp_code, bsns_year=str(y), reprt_code=rc),
                _j("/api/cprndNrdmpBlce.json", corp_code=corp_code, bsns_year=str(y), reprt_code=rc),
            ]):
                chosen = (y, rc); break
        if chosen: break
    if not chosen:
        return {"year": y0, "reprt_code": "11011", "flag": 0, "reasons": []}
    year, rc = chosen

    flag = 0; reasons = []

    irds = _j("/api/irdsSttus.json", corp_code=corp_code, bsns_year=str(year), reprt_code=rc)
    if irds: flag += 1; reasons.append("감자 공시")

    hyslr = _j("/api/hyslrChgSttus.json", corp_code=corp_code, bsns_year=str(year), reprt_code=rc)
    if hyslr: flag += 1; reasons.append("최대주주 변동")

    mr = _j("/api/mrhlSttus.json", corp_code=corp_code, bsns_year=str(year), reprt_code=rc)
    hold = None
    for r in mr:
        if (r.get("se") or "").strip() == "소액주주":
            try:
                s = str(r.get("hold_stock_rate")).replace(",","").strip()
                hold = float(s) if s not in ("","-") else None
            except Exception:
                hold = None
    if hold is not None and hold >= 70:
        flag += 1; reasons.append(f"소액주주 {hold:.1f}%")

    cur = _j("/api/empSttus.json", corp_code=corp_code, bsns_year=str(year), reprt_code=rc)
    prv = _j("/api/empSttus.json", corp_code=corp_code, bsns_year=str(year - 1), reprt_code=rc)
    def _emp_total(rows):
        if not rows: return None
        s=0; seen=False
        for r in rows:
            try:
                v = float(str(r.get("sm")).replace(",","").strip())
                s += v; seen=True
            except Exception:
                pass
        return s if seen else None
    tcur, tprv = _emp_total(cur), _emp_total(prv)
    if tcur and tprv and tprv > 0:
        yoy = (tcur - tprv) / tprv * 100
        if yoy <= -10:
            flag += 1; reasons.append(f"직원수 YoY {yoy:.1f}%")

    bd = _j("/api/cprndNrdmpBlce.json", corp_code=corp_code, bsns_year=str(year), reprt_code=rc)
    def _tof2(x):
        try: s=str(x).replace(",","").strip(); return float(s) if s not in ("","-") else None
        except: return None
    if bd:
        sm = _tof2(bd[0].get("sm")); s1 = _tof2(bd[0].get("yy1_below"))
        if sm and s1 and sm > 0 and s1/sm > 0.5:
            flag += 1; reasons.append(f"1년내 상환 {s1/sm*100:.1f}%")

    ot = _j("/api/otrCprInvstmntSttus.json", corp_code=corp_code, bsns_year=str(year), reprt_code=rc)
    def _tof3(x):
        try: s=str(x).replace(",","").strip(); return float(s) if s not in ("","-") else None
        except: return None
    if any(((_tof3(r.get("recent_bsns_year_fnnr_sttus_thstrm_ntpf")) or 0) < 0) for r in ot):
        flag += 1; reasons.append("적자 피투자법인 존재")

    return {"year": year, "reprt_code": rc, "flag": flag, "reasons": reasons}

# -----------------------------
# 4) FinBERT (타임아웃/옵션) + 백업룰
# -----------------------------
_FINBERT = {"tok": None, "mdl": None, "device": "cpu"}
_SENT_CACHE: Dict[str, float] = {}

def _load_finbert_worker(out_q: "queue.Queue"):
    try:
        import torch
        from transformers import AutoTokenizer, AutoModelForSequenceClassification
        name = "yiyanghkust/finbert-tone"
        tok = AutoTokenizer.from_pretrained(name)
        mdl = AutoModelForSequenceClassification.from_pretrained(name)
        device = "cuda" if torch.cuda.is_available() else "cpu"
        mdl = mdl.to(device)
        out_q.put((tok, mdl, device, None))
    except Exception as e:
        out_q.put((None, None, None, e))

def _ensure_finbert_loaded(timeout_sec: int = FINBERT_MAX_LOAD_SEC) -> bool:
    if _FINBERT["tok"] is not None: return True
    if NO_FINBERT: return False
    print(f"⏳ FinBERT 로딩 시도(최대 {timeout_sec}s)...", flush=True)
    out_q: "queue.Queue" = queue.Queue(maxsize=1)
    t = threading.Thread(target=_load_finbert_worker, args=(out_q,), daemon=True)
    t.start()
    try:
        tok, mdl, device, err = out_q.get(timeout=timeout_sec)
    except queue.Empty:
        print("⚠️ FinBERT 로딩 타임아웃 → 백업룰로 전환", flush=True)
        return False
    if err is not None or tok is None or mdl is None:
        print(f"⚠️ FinBERT 로딩 실패 → 백업룰로 전환 ({err})", flush=True)
        return False
    _FINBERT["tok"], _FINBERT["mdl"], _FINBERT["device"] = tok, mdl, device
    print("✅ FinBERT 로딩 완료", flush=True)
    return True

def analyze_sentiment(texts: List[str]) -> List[float]:
    if not texts:
        return []
    keys = [_sha1(t) for t in texts]
    out: List[Optional[float]] = [None] * len(texts)
    pending = []
    for i, (k, t) in enumerate(zip(keys, texts)):
        if k in _SENT_CACHE:
            out[i] = _SENT_CACHE[k]
        else:
            pending.append(i)

    use_model = _ensure_finbert_loaded()
    if use_model and pending:
        import torch
        tok, mdl, device = _FINBERT["tok"], _FINBERT["mdl"], _FINBERT["device"]
        bs = 16
        for s in range(0, len(pending), bs):
            idxs = pending[s:s+bs]
            batch = [(texts[i] or "")[:1024] for i in idxs]
            inputs = tok(batch, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)
            with torch.no_grad():
                logits = mdl(**inputs).logits
            probs = torch.softmax(logits, dim=1)[:,2].detach().cpu().numpy().tolist()
            for i, p in zip(idxs, probs):
                out[i] = float(p); _SENT_CACHE[keys[i]] = out[i]

    # 백업룰
    for i in range(len(out)):
        if out[i] is None:
            low = (texts[i] or "").lower()
            hits = sum(1 for w in BACKUP_NEG_LEXICON if w in low)
            out[i] = min(1.0, hits/3.0)
            _SENT_CACHE[keys[i]] = out[i]

    return [float(x or 0.0) for x in out]

# -----------------------------
# 5) 베이지안 스무딩 및 라벨
# -----------------------------
def smoothed_neg_ratio(neg_probs: List[float], thr: float=0.5, alpha: float=1.0, beta: float=3.0) -> float:
    if not neg_probs:
        return alpha / (alpha + beta)
    n = len(neg_probs)
    hits = sum(1 for p in neg_probs if p >= thr)
    return (hits + alpha) / (n + alpha + beta)

def judge_sentiment_label(neg_ratio: float) -> str:
    if neg_ratio >= 0.60: return "부정"
    if neg_ratio >= 0.30: return "중립"
    return "긍정"

def summarize_company(c: Dict[str, str]) -> str:
    listed = "상장" if c.get("stock_code") else "비상장"
    sx = f"{c['corp_name']} ({listed}"
    if c.get("stock_code"):
        sx += f", 종목코드 {c['stock_code']}"
    sx += f", corp_code {c['corp_code']})"
    return sx

# -----------------------------
# 6) 종합 평가(뉴스+펀더멘털+개황) 헬퍼
# -----------------------------
def _build_prior_context(corp: Dict[str,str], news_count: int) -> dict:
    name = corp.get("corp_name","")
    return {
        "corp_name": name,
        "corp_code": corp.get("corp_code",""),
        "is_listed": "상장" if corp.get("stock_code") else "비상장",
        "name_pattern": "SPC 의심" if _is_spc_like(name) else "일반법인",
        "parent_hint": "쿠팡 관련 사명" if "쿠팡" in name else "정보없음",
        "age_hint": "정보없음",  # (추후 연혁/설립연도 붙이면 채움)
        "news_count": news_count,
    }

def _compute_news_metrics(items: List[Dict[str,Any]]) -> Tuple[float, str, int, List[Tuple[float,Dict[str,Any]]]]:
    texts = [f"{it.get('title','')} {it.get('description','')}".strip() for it in items]
    neg_probs = analyze_sentiment(texts) if texts else []
    neg_ratio = smoothed_neg_ratio(neg_probs, thr=0.5, alpha=1.0, beta=3.0)
    label_news = judge_sentiment_label(neg_ratio)
    red_hits = 0
    for t in texts:
        low = (t or "").lower()
        red_hits += sum(1 for w in RED_KEYWORDS if w.lower() in low)
    scored = sorted([(p, it) for it, p in zip(items, neg_probs)], key=lambda x: x[0], reverse=True) if items else []
    top_neg = [s for s in scored[:3] if s[0] >= 0.5]
    return neg_ratio, label_news, red_hits, top_neg

def _combine_risk_label(fund_flag: int, label_news: str, news_count: int) -> Tuple[str,int]:
    """
    간단 합산 규칙:
      - 뉴스 라벨 점수: 긍정0 / 중립1 / 부정2
      - total_score = fund_flag + news_score (+ 뉴스없음이면 news_score=1로 관망 반영)
      - 최종라벨: total>=4 부정, total>=2 중립, else 긍정
    """
    news_score = {"긍정":0, "중립":1, "부정":2}.get(label_news, 1)
    if news_count == 0:
        news_score = 1  # 관망 페널티
    total = fund_flag + news_score
    if total >= 4: return "부정", total
    if total >= 2: return "중립", total
    return "긍정", total

# --- 총평: 규칙 & GPT ---
def _overall_comment_fallback(combined: Dict[str,Any],
                              news: Dict[str,Any],
                              fundamental: Dict[str,Any],
                              prior: Dict[str,Any]) -> str:
    """
    위험도 %와 신호에 기반한 규칙형 총평. 1~2문장.
    """
    pct = float(combined.get("risk_pct", 0.0))
    label = combined.get("final_label", "중립")
    fund_flag = int(fundamental.get("flag", 0) or 0)
    red = int(news.get("red_hits", 0) or 0)
    ncnt = int(news.get("news_count", 0) or 0)

    # 버킷
    if pct >= 75:
        bucket = "매우 높음(경계)"
        advice = "단기 노출 축소·관망 권고"
    elif pct >= 50:
        bucket = "높음(주의)"
        advice = "보수적 접근 및 모니터링 강화"
    elif pct >= 25:
        bucket = "보통"
        advice = "중립 유지, 이벤트 체크"
    else:
        bucket = "낮음"
        advice = "기본 유지, 이슈 발생 감시"

    # 보정 힌트
    hint = []
    if ncnt == 0:
        hint.append("최근 뉴스 부재")
    if red > 0:
        hint.append(f"레드키워드 {red}건")
    if fund_flag >= 2:
        hint.append(f"펀더멘털 플래그 {fund_flag}건")

    tail = f"({', '.join(hint)})" if hint else ""
    return f"위험도 {pct:.1f}%({bucket})로 {label} 판단. {advice}{(' ' + tail) if tail else ''}."

def _gpt_overall_comment(company: Dict[str,str],
                         fundamental: Dict[str,Any],
                         news: Dict[str,Any],
                         prior: Dict[str,Any],
                         combined: Dict[str,Any]) -> str:
    """
    JSON 컨텍스트 기반 GPT 총평. 실패 시 규칙형 폴백.
    """
    api_key = os.getenv(OPENAI_API_KEY)
    if not (OpenAI and api_key):
        return _overall_comment_fallback(combined, news, fundamental, prior)
    try:
        client = OpenAI(api_key=api_key)
        sys_prompt = (
            "너는 한국 기업 리스크를 종합 분석 후 요약하는 전문 애널리스트다. "
            "입력 JSON을 바탕으로 1~2문장, 40자 이내로 총평을 작성하라. "
            "과장 표현은 피하고, 위험도%·라벨·핵심 근거(1개 내외)·간단 권고를 포함하라."
        )
        payload = {
            "company": company,
            "fundamental": {
                "flag": fundamental.get("flag", 0),
                "reasons": fundamental.get("reasons", []),
            },
            "news": {
                "news_count": news.get("news_count", 0),
                "neg_ratio": round(news.get("neg_ratio", 0.0), 4),
                "label_news": news.get("label_news", "중립"),
                "red_hits": news.get("red_hits", 0),
            },
            "prior": {
                "is_listed": prior.get("is_listed"),
                "name_pattern": prior.get("name_pattern"),
                "parent_hint": prior.get("parent_hint"),
                "age_hint": prior.get("age_hint"),
            },
            "combined": {
                "final_label": combined.get("final_label", "중립"),
                "total_score": combined.get("total_score", 0),
                "risk_pct": combined.get("risk_pct", 0.0),
            }
        }
        user_prompt = "다음 JSON을 요약해 총평을 작성:\n\n" + json.dumps(payload, ensure_ascii=False, indent=2)
        resp = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{"role":"system","content":sys_prompt},
                      {"role":"user","content":user_prompt}],
            max_tokens=120,
            temperature=0.3
        )
        msg = (resp.choices[0].message.content or "").strip()
        return msg or _overall_comment_fallback(combined, news, fundamental, prior)
    except Exception:
        return _overall_comment_fallback(combined, news, fundamental, prior)

# --- 기존 이유(GPT) ---
def _gpt_reason(company: Dict[str,str],
                fundamental: Dict[str,Any],
                news: Dict[str,Any],
                prior: Dict[str,Any],
                combined: Dict[str,Any]) -> str:
    """
    변수명 일치: company, fundamental.flag, fundamental.reasons,
                 news.news_count, news.neg_ratio, news.red_hits, news.label_news,
                 prior.is_listed, prior.name_pattern, prior.parent_hint, prior.age_hint,
                 combined.final_label, combined.total_score, combined.risk_pct
    """
    api_key = os.getenv("OPENAI_API_KEY")
    if not (OpenAI and api_key):
        return _reason_from_metrics(company.get("corp_name",""), news.get("neg_ratio",0.25),
                                    news.get("red_hits",0), combined.get("final_label","중립"),
                                    news.get("news_count",0))
    try:
        client = OpenAI(api_key=api_key)
        sys_prompt = (
            "너는 한국 기업의 널리스트다. "
            "단정 대신 근거 기반으로 요약하되, 최대 2문장(각 40자 내외)으로 간결히 쓰고, "
            "필요시 ‘관망’ 권고 톤을 사용하라."
        )
        user_payload = {
            "company": company,
            "fundamental": {
                "flag": fundamental.get("flag", 0),
                "reasons": fundamental.get("reasons", []),
            },
            "news": {
                "news_count": news.get("news_count", 0),
                "neg_ratio": round(news.get("neg_ratio", 0.0), 4),
                "label_news": news.get("label_news", "중립"),
                "red_hits": news.get("red_hits", 0),
            },
            "prior": {
                "is_listed": prior.get("is_listed"),
                "name_pattern": prior.get("name_pattern"),
                "parent_hint": prior.get("parent_hint"),
                "age_hint": prior.get("age_hint"),
            },
            "combined": {
                "final_label": combined.get("final_label", "중립"),
                "total_score": combined.get("total_score", 0),
                "risk_pct": combined.get("risk_pct", 0.0),
            }
        }
        user_prompt = (
            "아래 JSON을 종합해 최종 라벨의 이유를 요약해줘. "
            "핵심 근거 1~2개만 언급하고 과장 표현은 피하라.\n\n"
            + json.dumps(user_payload, ensure_ascii=False, indent=2)
        )
        resp = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{"role":"system","content":sys_prompt},
                      {"role":"user","content":user_prompt}],
            max_tokens=120,
            temperature=0.3
        )
        msg = (resp.choices[0].message.content or "").strip()
        return msg or _reason_from_metrics(company.get("corp_name",""), news.get("neg_ratio",0.25),
                                           news.get("red_hits",0), combined.get("final_label","중립"),
                                           news.get("news_count",0))
    except Exception:
        return _reason_from_metrics(company.get("corp_name",""), news.get("neg_ratio",0.25),
                                    news.get("red_hits",0), combined.get("final_label","중립"),
                                    news.get("news_count",0))

# -----------------------------
# 7) 서비스 엔트리포인트들
# -----------------------------
def run_once(keyword: str, choice_idx: Optional[int] = None) -> dict:
    """
    백엔드에서 바로 호출 가능한 단일 함수(후보 선택도 내부에서 처리).
    main()에서는 이 함수를 직접 쓰지 않고, run_once_with_corp를 사용해
    사용자가 선택한 후보를 정확히 반영하도록 한다.
    """
    corps = load_corp_list_from_dart(DART_API_KEY)
    cand = find_corp_candidates(corps, keyword, limit=20)
    if not cand:
        return {"matches": [], "message": "해당 키워드로 기업을 찾지 못했습니다."}

    corp = cand[choice_idx] if (choice_idx is not None and 0 <= choice_idx < len(cand)) else cand[0]
    return run_once_with_corp(corp, cand=cand)

def run_once_with_corp(corp: dict, cand: Optional[List[dict]] = None) -> dict:
    """
    사용자가 고른 'corp' 객체를 바로 넣어 분석만 수행.
    """
    # 뉴스
    items = fetch_news(corp["corp_name"])
    neg_ratio, label_news, red_hits, top_neg = _compute_news_metrics(items)
    news = {
        "news_count": len(items),
        "neg_ratio": neg_ratio,
        "label_news": label_news,
        "red_hits": red_hits,
    }

    # 펀더멘털
    fundamental = score_fundamental(corp["corp_code"])

    # prior 컨텍스트
    prior = _build_prior_context(corp, news_count=len(items))

    # 종합 라벨
    final_label, total_score = _combine_risk_label(fundamental.get("flag",0), label_news, len(items))
    # 정규화 퍼센트
    risk_pct = 0.0
    try:
        risk_pct = round((total_score / MAX_TOTAL_SCORE) * 100.0, 1) if MAX_TOTAL_SCORE > 0 else 0.0
    except Exception:
        risk_pct = 0.0
    combined = {"final_label": final_label, "total_score": total_score, "risk_pct": risk_pct}

    # 이유(GPT 또는 폴백)
    reason = _gpt_reason(
        company={"corp_name": corp["corp_name"], "corp_code": corp["corp_code"], "stock_code": corp.get("stock_code","")},
        fundamental=fundamental, news=news, prior=prior, combined=combined
    )
    # 총평(GPT 또는 폴백)
    overall_comment = _gpt_overall_comment(
        company={"corp_name": corp["corp_name"], "corp_code": corp["corp_code"], "stock_code": corp.get("stock_code","")},
        fundamental=fundamental, news=news, prior=prior, combined=combined
    )

    return {
        "company": corp,
        "fundamental": fundamental,
        "news": news,
        "prior": prior,
        "combined": combined,
        "reason": reason,
        "overall_comment": overall_comment,
        "top_neg": top_neg,
        "items": items,
        "matches": cand or [],
    }

# -----------------------------
# 8) CLI (입력/후보선택/출력 전담)
# -----------------------------
def main():
    print("🔎 검색할 기업명 일부를 입력하세요:", flush=True)
    try:
        keyword = input("> ").strip()
    except EOFError:
        print("❌ 표준입력이 없어 대기 중이었을 수 있습니다. 터미널에서 실행하거나 파이프라인 입력을 제공하세요.")
        sys.exit(1)

    if not keyword:
        print("❌ 기업명 키워드를 입력하세요.")
        sys.exit(1)

    # 후보 조회
    try:
        corps = load_corp_list_from_dart(DART_API_KEY)
        cand = find_corp_candidates(corps, keyword, limit=20)
    except Exception as e:
        print("❌ DART 기업목록/후보 조회 실패:", e)
        sys.exit(1)

    if not cand:
        print("❌ 해당 키워드로 기업을 찾지 못했습니다.")
        sys.exit(0)

    if len(cand) == 1:
        choice_idx = 0
        print(f"✅ 자동 선택: {summarize_company(cand[0])}", flush=True)
    else:
        print("\n다음 중 기업을 선택하세요:")
        for i, c in enumerate(cand):
            print(f"[{i}] {summarize_company(c)}")
        while True:
            try:
                _in = input("번호 입력: ").strip()
                choice_idx = int(_in)
                if 0 <= choice_idx < len(cand):
                    break
            except Exception:
                pass
            print("유효한 번호를 입력하세요.", flush=True)

    corp = cand[choice_idx]

    # 선택한 corp로 바로 분석 수행
    try:
        result = run_once_with_corp(corp, cand=cand)
    except Exception as e:
        print("❌ 실행 실패:", e)
        sys.exit(1)

    if not result.get("company"):
        print(result.get("message","기업을 찾지 못했습니다."))
        sys.exit(0)

    # 결과 출력
    corp = result["company"]
    news = result["news"]
    fundamental = result["fundamental"]
    combined = result["combined"]

    print(f"\n📰 수집된 뉴스: {news['news_count']}건 (최근 {DEFAULT_DAYS}일)")
    print("\n===== 결과 =====")
    print("기업:", summarize_company(corp))
    print(f"[뉴스] 부정비율(스무딩): {news['neg_ratio']*100:.1f}% | 레드키워드: {news['red_hits']}")
    print(f"[펀더멘털] flag: {fundamental.get('flag',0)} | 사유: {', '.join(fundamental.get('reasons',[])) or '없음'}")
    print(f"[종합판정] 라벨: {combined['final_label']} | 총점: {combined['total_score']}/{MAX_TOTAL_SCORE} | 위험도(정규화): {combined['risk_pct']:.1f}%")
    print(f"이유: {result['reason']}")
    print(f"총평: {result['overall_comment']}")

    top_neg = result.get("top_neg", [])
    if top_neg:
        print("\n⚠️ 부정 확률 높은 기사 Top 3")
        for p, it in top_neg:
            dt = it.get("date","")[:19].replace("T"," ")
            print(f"- {p*100:5.1f}% | {dt} | {it.get('title','').strip()}")

    print("\n(메모) NAVER 실패 시 Google RSS 백업, FinBERT 지연/실패 시 백업룰 즉시 사용.")
    if NO_FINBERT:
        print("(메모) NO_FINBERT=1 환경변수로 FinBERT 비활성화 상태입니다.")

if __name__ == "__main__":
    main()


🔎 검색할 기업명 일부를 입력하세요:
> 스마일게이트
⏳ DART 기업목록 로딩 중...
✅ 기업목록 113,057건 로드 완료
🔎 후보 검색: '스마일게이트'
✅ 후보 8건

다음 중 기업을 선택하세요:
[0] 스마일게이트게임즈 (비상장, corp_code 00934275)
[1] 스마일게이트스토브 (비상장, corp_code 01205107)
[2] 스마일게이트알피지 (비상장, corp_code 00961756)
[3] 스마일게이트홀딩스 (비상장, corp_code 00868194)
[4] 스마일게이트메가포트 (비상장, corp_code 01015911)
[5] 스마일게이트자산운용 (비상장, corp_code 01314104)
[6] 스마일게이트인베스트먼트 (비상장, corp_code 00432272)
[7] 스마일게이트엔터테인먼트 (비상장, corp_code 00809049)
번호 입력: 3
⏳ NAVER 뉴스 수집: '스마일게이트홀딩스'
✅ NAVER 40건
⏳ FinBERT 로딩 시도(최대 10s)...
✅ FinBERT 로딩 완료

📰 수집된 뉴스: 40건 (최근 50일)

===== 결과 =====
기업: 스마일게이트홀딩스 (비상장, corp_code 00868194)
[뉴스] 부정비율(스무딩): 2.3% | 레드키워드: 0
[펀더멘털] flag: 0 | 사유: 없음
[종합판정] 라벨: 긍정 | 총점: 0/8 | 위험도(정규화): 0.0%
이유: 최근 보도에서 부정 신호가 드물어(2.3%·레드키워드 0건) 전반적으로 긍정입니다.
총평: 위험도 0.0%(낮음)로 긍정 판단. 기본 유지, 이슈 발생 감시.

(메모) NAVER 실패 시 Google RSS 백업, FinBERT 지연/실패 시 백업룰 즉시 사용.
