In [1]:
# pip install -U langchain langgraph langchain-openai tiktoken transformers torch rank-bm25 joblib scikit-learn
import os
import json
import re
import requests
import joblib
import numpy as np
import torch
from typing import TypedDict, List, Optional, Dict, Any, Tuple
from dotenv import load_dotenv
from transformers import AutoTokenizer, AutoModel
from rank_bm25 import BM25Okapi

from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.tools import tool
from langgraph.graph import StateGraph, END
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage

# ---------- 0) Config ----------
load_dotenv()
MODEL_NAME = "gpt-4o-mini"
naver_client_id = os.getenv("NAVER_CLIENT_ID")
naver_client_secret = os.getenv("NAVER_CLIENT_SECRET")
openai_api_key = os.getenv("OPENAI_API_KEY")

# ---------- 1) State ----------
class AgentState(TypedDict):
    messages: List[BaseMessage]
    next: Optional[str]
    router_json: Optional[Dict[str, Any]]
    parsed: Optional[Dict[str, Any]]  # LLM_parser 결과
    filtered: Optional[Dict[str, Any]]  # 메타필터링 결과
    recommendations: Optional[List[Dict]]  # ML_agent 결과
    price_info: Optional[str]  # price_agent 결과

# ---------- 2) Meta Filtering Functions ----------
def filter_brand(brand_value):
    """브랜드 필터링 함수"""
    valid_brands = [
        '겔랑', '구찌', '끌로에', '나르시소 로드리게즈', '니샤네', '도르세', '디올', '딥티크', '랑콤',
        '로라 메르시에', '로에베', '록시땅', '르 라보', '메모', '메종 마르지엘라', '메종 프란시스 커정',
        '멜린앤게츠', '미우미우', '바이레도', '반클리프 아펠', '버버리', '베르사체', '불가리', '비디케이',
        '산타 마리아 노벨라', '샤넬', '세르주 루텐', '시슬리 코스메틱', '아쿠아 디 파르마', '에따 리브르 도량쥬',
        '에르메스', '에스티 로더', '엑스 니힐로', '이니시오 퍼퓸', '이솝', '입생로랑', '제르조프', '조 말론',
        '조르지오 아르마니', '줄리엣 헤즈 어 건', '지방시', '질 스튜어트', '크리드', '킬리안', '톰 포드',
        '티파니앤코', '퍼퓸 드 말리', '펜할리곤스', '프라다', '프레데릭 말'
    ]
    
    if brand_value is None:
        return None
    
    return brand_value if brand_value in valid_brands else None


def filter_concentration(concentration_value):
    """농도 필터링 함수"""
    valid_concentrations = ['솔리드 퍼퓸', '엑스트레 드 퍼퓸', '오 드 뚜왈렛', '오 드 코롱', '오 드 퍼퓸', '퍼퓸']
    
    if concentration_value is None:
        return None
    
    return concentration_value if concentration_value in valid_concentrations else None


def filter_day_night_score(day_night_value):
    """사용시간 필터링 함수"""
    valid_day_night = ["day", "night"]
    
    if day_night_value is None:
        return None
    
    # 쉼표로 분리된 값들 처리
    if isinstance(day_night_value, str) and ',' in day_night_value:
        values = [v.strip() for v in day_night_value.split(',')]
        filtered_values = [v for v in values if v in valid_day_night]
        return ','.join(filtered_values) if filtered_values else None
    
    return day_night_value if day_night_value in valid_day_night else None


def filter_gender(gender_value):
    """성별 필터링 함수"""
    valid_genders = ['Female', 'Male', 'Unisex', 'unisex ']
    
    if gender_value is None:
        return None
    
    return gender_value if gender_value in valid_genders else None


def filter_season_score(season_value):
    """계절 필터링 함수"""
    valid_seasons = ['winter', 'spring', 'summer', 'fall']
    
    if season_value is None:
        return None
    
    return season_value if season_value in valid_seasons else None


def filter_sizes(sizes_value):
    """용량 필터링 함수"""
    valid_sizes = ['50', '75']
    
    if sizes_value is None:
        return None
    
    # 숫자만 추출 (예: "50ml" -> "50")
    if isinstance(sizes_value, str):
        import re
        numbers = re.findall(r'\d+', sizes_value)
        for num in numbers:
            if num in valid_sizes:
                return num
    
    return sizes_value if str(sizes_value) in valid_sizes else None


def apply_meta_filters(parsed_json):
    """전체 JSON에 메타필터링 적용"""
    if not parsed_json or "error" in parsed_json:
        return parsed_json
    
    filtered_result = {
        'brand': filter_brand(parsed_json.get('brand')),
        'concentration': filter_concentration(parsed_json.get('concentration')),
        'day_night_score': filter_day_night_score(parsed_json.get('day_night_score')),
        'gender': filter_gender(parsed_json.get('gender')),
        'season_score': filter_season_score(parsed_json.get('season_score')),
        'sizes': filter_sizes(parsed_json.get('sizes'))
    }
    
    return filtered_result

# ---------- 3) PerfumeRecommender Class ----------
class PerfumeRecommender:
    """향수 추천 시스템 클래스"""
    
    def __init__(self, 
                 model_pkl_path: str = "./models.pkl", 
                 perfume_json_path: str = "perfumes.json",
                 model_name: str = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
                 max_len: int = 256):
        
        self.model_name = model_name
        self.max_len = max_len
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"[Device] {self.device}")
        
        # 모델 및 데이터 로드
        self._load_ml_model(model_pkl_path)
        self._load_transformer_model()
        self._load_perfume_data(perfume_json_path)
        self._build_bm25_index()
    
    def _load_ml_model(self, pkl_path: str):
        """저장된 ML 모델 불러오기"""
        try:
            data = joblib.load(pkl_path)
            self.clf = data["classifier"]
            self.mlb = data["mlb"]
            self.thresholds = data["thresholds"]
            print(f"[Loaded model from {pkl_path}]")
            print(f"Labels: {list(self.mlb.classes_)}")
        except Exception as e:
            print(f"[Warning] Could not load ML model: {e}")
            self.clf = None
            self.mlb = None
            self.thresholds = None
    
    def _load_transformer_model(self):
        """Transformer 모델 로드"""
        try:
            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
            self.base_model = AutoModel.from_pretrained(self.model_name).to(self.device)
            self.base_model.eval()
        except Exception as e:
            print(f"[Warning] Could not load transformer model: {e}")
            self.tokenizer = None
            self.base_model = None
    
    def _load_perfume_data(self, json_path: str):
        """향수 데이터 로드"""
        try:
            with open(json_path, "r", encoding="utf-8") as f:
                self.perfumes = json.load(f)
            print(f"[Loaded {len(self.perfumes)} perfumes from {json_path}]")
        except Exception as e:
            print(f"[Warning] Could not load perfume data: {e}")
            self.perfumes = []
    
    def _build_bm25_index(self):
        """BM25 인덱스 구축"""
        if not self.perfumes:
            self.bm25 = None
            return
        
        self.corpus = [item.get("fragrances", "") for item in self.perfumes]
        tokenized_corpus = [doc.lower().split() for doc in self.corpus]
        self.bm25 = BM25Okapi(tokenized_corpus)
        print("[BM25 index built]")
    
    def encode_texts(self, texts: List[str], batch_size: int = 32) -> np.ndarray:
        """텍스트를 임베딩으로 변환"""
        if not self.tokenizer or not self.base_model:
            return np.array([])
            
        all_embeddings = []
        
        for i in range(0, len(texts), batch_size):
            batch = texts[i:i+batch_size]
            enc = self.tokenizer(
                batch, 
                padding=True, 
                truncation=True, 
                max_length=self.max_len, 
                return_tensors="pt"
            ).to(self.device)
            
            with torch.no_grad():
                model_out = self.base_model(**enc)
                emb = model_out.last_hidden_state.mean(dim=1)
            
            all_embeddings.append(emb.cpu().numpy())
        
        return np.vstack(all_embeddings)
    
    def predict_labels(self, text: str, topk: int = 3, use_thresholds: bool = True) -> List[str]:
        """텍스트에서 향수 라벨 예측"""
        if not self.clf or not self.mlb:
            return []
            
        emb = self.encode_texts([text], batch_size=1)
        if emb.size == 0:
            return []
            
        proba = self.clf.predict_proba(emb)[0]
        
        if use_thresholds and self.thresholds:
            pick = [
                i for i, p in enumerate(proba) 
                if p >= self.thresholds.get(self.mlb.classes_[i], 0.5)
            ]
            if not pick:
                pick = np.argsort(-proba)[:topk]
        else:
            pick = np.argsort(-proba)[:topk]
        
        return [self.mlb.classes_[i] for i in pick]
    
    def search_perfumes(self, labels: List[str], top_n: int = 5) -> List[Tuple[int, float, Dict]]:
        """BM25를 사용해 향수 검색"""
        if not self.bm25 or not labels:
            return []
            
        query = " ".join(labels)
        tokenized_query = query.lower().split()
        scores = self.bm25.get_scores(tokenized_query)
        
        top_idx = np.argsort(scores)[-top_n:][::-1]
        
        results = []
        for idx in top_idx:
            results.append((idx, scores[idx], self.perfumes[idx]))
        
        return results
    
    def filter_perfumes_by_meta(self, perfumes: List[Dict], filters: Dict[str, Any]) -> List[Dict]:
        """메타 필터로 향수 리스트 필터링"""
        if not filters or not perfumes:
            return perfumes
            
        filtered = []
        for perfume in perfumes:
            include = True
            
            # 각 필터 조건 확인
            if filters.get('brand') and perfume.get('brand') != filters['brand']:
                include = False
            if filters.get('concentration') and perfume.get('concentration') != filters['concentration']:
                include = False
            if filters.get('gender') and perfume.get('gender') != filters['gender']:
                include = False
            if filters.get('season_score') and perfume.get('season_score') != filters['season_score']:
                include = False
            if filters.get('sizes') and str(perfume.get('sizes', '')) != str(filters['sizes']):
                include = False
            if filters.get('day_night_score'):
                perfume_day_night = perfume.get('day_night_score', '')
                if filters['day_night_score'] not in str(perfume_day_night):
                    include = False
            
            if include:
                filtered.append(perfume)
                
        return filtered
    
    
    def recommend(self, 
                  user_text: str, 
                  meta_filters: Optional[Dict[str, Any]] = None,
                  topk_labels: int = 4, 
                  top_n_perfumes: int = 5,
                  use_thresholds: bool = True,
                  use_vector_search: bool = True) -> Dict:
        """전체 추천 파이프라인 (Vector DB + BM25 하이브리드)"""
        
        # 1. ML 모델로 라벨 예측
        predicted_labels = self.predict_labels(
            user_text, 
            topk=topk_labels, 
            use_thresholds=use_thresholds
        )
        
        candidate_perfumes = []
        
        # 2. Vector 검색 (Pinecone)
        if use_vector_search and self.pinecone_index:
            query_embedding = self.encode_texts([user_text])
            if query_embedding.size > 0:
                vector_results = self.search_perfumes_vector(query_embedding, top_k=top_n_perfumes * 2)
                # Vector 검색 결과를 perfumes.json과 매칭
                for result in vector_results:
                    # metadata에서 향수 정보 추출 또는 ID로 매칭
                    # 여기서 어떻게 vector 결과를 perfumes.json과 연결할지 확인이 필요합니다
                    pass
        
        # 3. BM25 검색 (Fallback 또는 보완)
        bm25_results = self.search_perfumes_bm25(predicted_labels, top_n=top_n_perfumes * 2)
        bm25_perfumes = [perfume for idx, score, perfume in bm25_results]
        candidate_perfumes.extend(bm25_perfumes)
        
        # 4. 메타필터링 적용
        if meta_filters:
            active_filters = {k: v for k, v in meta_filters.items() if v is not None}
            if active_filters:
                candidate_perfumes = self.filter_perfumes_by_meta(candidate_perfumes, active_filters)
        
        # 5. 최종 결과 생성
        final_recommendations = candidate_perfumes[:top_n_perfumes]
        
        return {
            "user_input": user_text,
            "predicted_labels": predicted_labels,
            "meta_filters_applied": meta_filters or {},
            "total_candidates": len(candidate_perfumes),
            "search_method": "hybrid" if use_vector_search else "bm25_only",
            "recommendations": [
                {
                    "rank": rank,
                    "brand": perfume.get('brand', 'N/A'),
                    "name": perfume.get('name_perfume', 'N/A'),
                    "fragrances": perfume.get('fragrances', 'N/A'),
                    "gender": perfume.get('gender', 'N/A'),
                    "concentration": perfume.get('concentration', 'N/A'),
                    "sizes": perfume.get('sizes', 'N/A'),
                    "season_score": perfume.get('season_score', 'N/A'),
                    "day_night_score": perfume.get('day_night_score', 'N/A'),
                    "perfume_data": perfume
                }
                for rank, perfume in enumerate(final_recommendations, 1)
            ]
        }

# ---------- 4) Global Instances ----------
# 전역 인스턴스 (실제 사용시에는 더 나은 방법으로 관리)
perfume_recommender = None

def initialize_recommender():
    """추천 시스템 초기화"""
    global perfume_recommender
    if perfume_recommender is None:
        perfume_recommender = PerfumeRecommender()

# ---------- 5) Tools ----------
@tool
def price_tool(user_query: str) -> str:
    """A tool that uses the Naver Shopping API to look up perfume prices"""
    
    url = "https://openapi.naver.com/v1/search/shop.json"
    headers = {
        "X-Naver-Client-Id": naver_client_id,
        "X-Naver-Client-Secret": naver_client_secret
    }
    params = {"query": user_query, "display": 5, "sort": "sim"}
    
    try:
        response = requests.get(url, headers=headers, params=params)
    except Exception as e:
        return f"❌ 요청 오류: {e}"
    
    if response.status_code != 200:
        return f"❌ API 오류: {response.status_code}"
    
    data = response.json()
    if not data or "items" not in data or len(data["items"]) == 0:
        return f"😔 '{user_query}'에 대한 검색 결과가 없습니다."
    
    def remove_html_tags(text: str) -> str:
        return re.sub(r"<[^>]+>", "", text)
    
    products = data["items"][:3]
    output = f"🔍 '{user_query}' 검색 결과:\n\n"
    for i, item in enumerate(products, 1):
        title = remove_html_tags(item.get("title", ""))
        lprice = item.get("lprice", "0")
        mall = item.get("mallName", "정보 없음")
        link = item.get("link", "정보 없음")
        
        output += f"📦 {i}. {title}\n"
        if lprice != "0":
            output += f"   💰 가격: {int(lprice):,}원\n"
        output += f"   🏪 판매처: {mall}\n"
        output += f"   🔗 링크: {link}\n\n"
    
    return output

# ---------- 6) Supervisor (Router) ----------
SUPERVISOR_SYSTEM_PROMPT = """
You are the "Perfume Recommendation Supervisor (Router)". Analyze the user's query (Korean or English) and route to exactly ONE agent below.

[Agents]
- LLM_parser         : Parses/normalizes multi-facet queries (2+ product facets).
- FAQ_agent          : Perfume knowledge / definitions / differences / general questions.
- human_fallback     : Non-perfume or off-topic queries.
- price_agent        : Price-only intents (cheapest, price, buy, discount, etc.).
- ML_agent           : Single-preference recommendations (mood/season vibe like "fresh summer", "sweet", etc.).

[Facets to detect ("product facets")]
- brand            (e.g., Chanel, Dior, Creed)
- season           (spring/summer/fall/winter; "for summer/winter")
- gender           (male/female/unisex)
- sizes            (volume in ml: 30/50/100 ml)
- day_night_score  (day/night/daily/office/club, etc.)
- concentration    (EDT/EDP/Extrait/Parfum/Cologne)

[Price intent keywords (not exhaustive)]
- Korean: 가격, 최저가, 얼마, 가격대, 구매, 판매, 할인, 어디서 사, 배송비
- English: price, cost, cheapest, buy, purchase, discount

[FAQ examples]
- Differences between EDP vs EDT, note definitions, longevity/projection, brand/line info.

[Single-preference (ML_agent) examples]
- "Recommend a cool perfume for summer", "Recommend a sweet scent", "One citrusy fresh pick"
  (= 0–1 of the above facets mentioned; primarily taste/mood/situation).

[Routing rules (priority)]
1) Non-perfume / off-topic → human_fallback
2) Clear price-only intent (even if one facet is present as context) → price_agent
   e.g., "Chanel No. 5 50ml cheapest price?" → price_agent
3) Count product facets in the query:
   - If facets ≥ 2 → LLM_parser
4) Otherwise (single-topic queries):
   - Perfume knowledge/definitions → FAQ_agent
   - Single taste/mood recommendation → ML_agent
5) Tie-breakers:
   - If price intent is clear → price_agent
   - If facets ≥ 2 → LLM_parser
   - Else: knowledge → FAQ_agent, taste → ML_agent

[Output format]
Return ONLY this JSON (no extra text):
{{
  "next": "<LLM_parser|FAQ_agent|human_fallback|price_agent|ML_agent>",
  "reason": "<one short English sentence>",
  "facet_count": <integer>,
  "facets": {{
    "brand": "<value or null>",
    "season": "<value or null>",
    "gender": "<value or null>",
    "sizes": "<value or null>",
    "day_night_score": "<value or null>",
    "concentration": "<value or null>"
  }},
  "scent_vibe": "<value if detected, else null>",
  "query_intent": "<price|faq|scent_pref|non_perfume|other>"
}}
""".strip()

llm = ChatOpenAI(model=MODEL_NAME, temperature=0)
router_prompt = ChatPromptTemplate.from_messages([
    ("system", SUPERVISOR_SYSTEM_PROMPT),
    ("user", "{query}")
])

def supervisor_node(state: AgentState) -> AgentState:
    """Call the router LLM and return parsed JSON + routing target."""
    user_query = None
    for m in reversed(state["messages"]):
        if isinstance(m, HumanMessage):
            user_query = m.content
            break
    if not user_query:
        user_query = "(empty)"

    chain = router_prompt | llm
    ai = chain.invoke({"query": user_query})
    text = ai.content

    chosen = "human_fallback"
    parsed: Dict[str, Any] = {}
    try:
        parsed = json.loads(text)
        maybe = parsed.get("next")
        if isinstance(maybe, str) and maybe in {"LLM_parser","FAQ_agent","human_fallback","price_agent","ML_agent"}:
            chosen = maybe
    except Exception:
        parsed = {"error": "invalid_json", "raw": text}

    msgs = state["messages"] + [AIMessage(content=text)]
    return {
        "messages": msgs,
        "next": chosen,
        "router_json": parsed,
        "parsed": state.get("parsed"),
        "filtered": state.get("filtered"),
        "recommendations": state.get("recommendations"),
        "price_info": state.get("price_info")
    }

# ---------- 7) Agent Nodes ----------
def llm_parser_node(state: AgentState) -> AgentState:
    """복합 쿼리 파싱 및 메타필터링"""
    user_query = None
    for m in reversed(state["messages"]):
        if isinstance(m, HumanMessage):
            user_query = m.content
            break
    
    if not user_query:
        user_query = "(empty)"

    # LangChain LLM 사용으로 변경
    parse_prompt = ChatPromptTemplate.from_messages([
        ("system", """너는 향수 쿼리 파서야.
사용자의 질문에서 다음 정보를 JSON 형식으로 추출해줘:
- brand: 브랜드명 (예: 샤넬, 디올, 입생로랑 등)
- concentration: 농도 (오 드 퍼퓸, 오 드 뚜왈렛, 퍼퓸, 코롱 등)
- day_night_score: 사용시간 (day, night 등)
- gender: 성별 (Female, Male, Unisex 등)
- season_score: 계절 (spring, summer, fall, winter)
- sizes: 용량 (50, 75 등 숫자만)

없는 값은 null로 두고, 반드시 유효한 JSON 형식으로만 응답해줘.

예시:
{{"brand": "샤넬", "gender": "Female", "sizes": "50", "season_score": "winter", "concentration": null, "day_night_score": null}}"""),
        ("user", "{query}")
    ])
    
    try:
        chain = parse_prompt | llm
        ai_response = chain.invoke({"query": user_query})
        
        # JSON 파싱 시도
        response_text = ai_response.content.strip()
        
        # JSON 부분만 추출 (마크다운 코드블록 제거)
        if "```json" in response_text:
            response_text = response_text.split("```json")[1].split("```")[0].strip()
        elif "```" in response_text:
            response_text = response_text.split("```")[1].strip()
        
        parsed_result = json.loads(response_text)
        
        # 필드 검증
        expected_fields = ["brand", "concentration", "day_night_score", "gender", "season_score", "sizes"]
        for field in expected_fields:
            if field not in parsed_result:
                parsed_result[field] = None
                
    except json.JSONDecodeError as e:
        parsed_result = {"error": f"JSON 파싱 오류: {str(e)}", "raw_response": response_text}
    except Exception as e:
        parsed_result = {"error": f"파싱 중 오류: {str(e)}"}
    
    # 메타필터링 적용
    if "error" not in parsed_result:
        filtered_result = apply_meta_filters(parsed_result)
    else:
        filtered_result = parsed_result
    
    # 결과 포맷팅 - 파싱만 하고 추천은 안함
    result_text = f"🔍 **쿼리 분석 완료**\n\n"
    if "error" not in parsed_result:
        # 원본 파싱 결과 표시
        result_text += "**🎯 파싱된 조건:**\n"
        found_original = False
        for key, value in parsed_result.items():
            if value and value != "null":
                korean_labels = {
                    "brand": "브랜드",
                    "concentration": "농도", 
                    "day_night_score": "사용시간",
                    "gender": "성별",
                    "season_score": "계절", 
                    "sizes": "용량"
                }
                result_text += f"• {korean_labels.get(key, key)}: {value}\n"
                found_original = True
        
        if not found_original:
            result_text += "• 구체적인 조건이 감지되지 않았습니다.\n"
        
        # 필터링된 결과 표시
        result_text += "\n**✅ 유효한 조건 (메타필터링 적용):**\n"
        found_filtered = False
        for key, value in filtered_result.items():
            if value and value != "null":
                korean_labels = {
                    "brand": "브랜드",
                    "concentration": "농도", 
                    "day_night_score": "사용시간",
                    "gender": "성별",
                    "season_score": "계절", 
                    "sizes": "용량"
                }
                result_text += f"• {korean_labels.get(key, key)}: {value}\n"
                found_filtered = True
        
        if not found_filtered:
            result_text += "• 유효한 조건이 없습니다.\n"
            
    else:
        result_text += f"❌ 파싱 오류: {parsed_result['error']}"
    
    msgs = state["messages"] + [AIMessage(content=result_text)]
    return {
        "messages": msgs,
        "next": None,  # LLM_parser는 독립적으로 종료
        "router_json": state.get("router_json"),
        "parsed": parsed_result,
        "filtered": filtered_result,
        "recommendations": state.get("recommendations"),
        "price_info": state.get("price_info")
    }

def faq_agent_node(state: AgentState) -> AgentState:
    """향수 FAQ 처리"""
    user_query = None
    for m in reversed(state["messages"]):
        if isinstance(m, HumanMessage):
            user_query = m.content
            break
    
    # 간단한 FAQ 매칭
    faq_responses = {
        "edp": "🌟 **EDP vs EDT 차이점**\n\nEDP (Eau de Parfum): 향료 농도 15-20%, 지속시간 6-8시간\nEDT (Eau de Toilette): 향료 농도 5-15%, 지속시간 3-5시간\n\nEDP가 더 진하고 오래가지만, EDT가 더 가볍고 상쾌해요!",
        "노트": "🎵 **향수 노트 구성**\n\n• **탑노트**: 첫 인상 (5-15분)\n• **미들노트**: 메인 향 (30분-2시간)\n• **베이스노트**: 마무리 향 (2시간 이상)\n\n시간이 지나면서 향이 변화하는 것이 향수의 매력이에요!",
        "지속": "⏰ **향수 지속시간**\n\n• Parfum: 8시간 이상\n• EDP: 6-8시간\n• EDT: 3-5시간\n• EDC: 2-3시간\n\n피부타입과 보관상태에 따라 차이가 있어요!"
    }
    
    response = "❓ **향수 관련 질문**\n\n구체적인 질문을 해주시면 더 정확한 답변을 드릴 수 있어요!\n\n💡 예시: EDP와 EDT 차이, 노트 구성, 지속시간 등"
    
    query_lower = user_query.lower()
    for keyword, answer in faq_responses.items():
        if keyword in query_lower:
            response = answer
            break
    
    msgs = state["messages"] + [AIMessage(content=response)]
    return {
        "messages": msgs,
        "next": None,
        "router_json": state.get("router_json"),
        "parsed": state.get("parsed"),
        "filtered": state.get("filtered"),
        "recommendations": state.get("recommendations"),
        "price_info": state.get("price_info")
    }

def human_fallback_node(state: AgentState) -> AgentState:
    """일반적인 폴백"""
    response = "❓ 향수와 관련된 질문을 해주세요!\n\n💡 예시:\n• 향수 추천\n• 향수 가격 문의\n• 향수 지식 문답"
    
    msgs = state["messages"] + [AIMessage(content=response)]
    return {
        "messages": msgs,
        "next": None,
        "router_json": state.get("router_json"),
        "parsed": state.get("parsed"),
        "filtered": state.get("filtered"),
        "recommendations": state.get("recommendations"),
        "price_info": state.get("price_info")
    }

def price_agent_node(state: AgentState) -> AgentState:
    """가격 조회"""
    user_query = None
    for m in reversed(state["messages"]):
        if isinstance(m, HumanMessage):
            user_query = m.content
            break
    
    if not user_query:
        user_query = "향수"
    
    if not naver_client_id or not naver_client_secret:
        price_result = "❌ 네이버 API 설정이 필요합니다."
    else:
        price_result = price_tool.invoke({"user_query": user_query})
    
    msgs = state["messages"] + [AIMessage(content=price_result)]
    return {
        "messages": msgs,
        "next": None,
        "router_json": state.get("router_json"),
        "parsed": state.get("parsed"),
        "filtered": state.get("filtered"),
        "recommendations": state.get("recommendations"),
        "price_info": price_result
    }

def ml_agent_node(state: AgentState) -> AgentState:
    """ML 기반 향수 추천 (메타필터링 포함)"""
    initialize_recommender()
    
    user_query = None
    for m in reversed(state["messages"]):
        if isinstance(m, HumanMessage):
            user_query = m.content
            break
    
    if not user_query:
        user_query = "향수 추천"
    
    # 이전 단계에서 파싱된 필터 정보 가져오기 (LLM_parser를 거쳐온 경우)
    meta_filters = state.get("filtered")
    
    if perfume_recommender is None:
        response = "❌ 추천 시스템을 로드할 수 없습니다."
        recommendations = []
    else:
        try:
            result = perfume_recommender.recommend(
                user_text=user_query,
                meta_filters=meta_filters,
                topk_labels=4,
                top_n_perfumes=5,
                use_thresholds=True
            )
            
            recommendations = result.get("recommendations", [])
            predicted_labels = result.get("predicted_labels", [])
            meta_filters_applied = result.get("meta_filters_applied", {})
            total_candidates = result.get("total_candidates", 0)
            
            if recommendations:
                response = f"🎯 **'{user_query}' 추천 결과**\n\n"
                response += f"🏷️ **예측된 향 특성**: {', '.join(predicted_labels)}\n"
                
                # 적용된 메타필터 표시
                if meta_filters_applied and any(v for v in meta_filters_applied.values() if v is not None):
                    response += f"🔍 **적용된 필터**: "
                    active_filters = []
                    korean_labels = {
                        "brand": "브랜드",
                        "concentration": "농도",
                        "day_night_score": "사용시간", 
                        "gender": "성별",
                        "season_score": "계절",
                        "sizes": "용량"
                    }
                    for k, v in meta_filters_applied.items():
                        if v is not None:
                            active_filters.append(f"{korean_labels.get(k, k)}={v}")
                    response += ", ".join(active_filters) + "\n"
                
                response += f"📊 **총 후보**: {total_candidates}개 향수\n\n"
                
                for rec in recommendations:
                    response += f"🌟 **{rec['rank']}위**\n"
                    response += f"   브랜드: {rec['brand']}\n"
                    response += f"   제품명: {rec['name']}\n"
                    response += f"   향 특성: {rec['fragrances']}\n"
                    response += f"   성별: {rec['gender']}, 농도: {rec['concentration']}\n"
                    response += f"   계절: {rec['season_score']}, 용량: {rec['sizes']}ml\n"
                    response += f"   사용시간: {rec['day_night_score']}\n\n"
            else:
                response = "😔 조건에 맞는 향수를 찾지 못했습니다."
                if meta_filters_applied:
                    response += "\n💡 필터 조건을 완화해 보시거나, 다른 키워드로 검색해보세요."
                
        except Exception as e:
            response = f"❌ 추천 중 오류가 발생했습니다: {str(e)}"
            recommendations = []
    
    msgs = state["messages"] + [AIMessage(content=response)]
    return {
        "messages": msgs,
        "next": None,
        "router_json": state.get("router_json"),
        "parsed": state.get("parsed"),
        "filtered": state.get("filtered"),
        "recommendations": recommendations,
        "price_info": state.get("price_info")
    }

# ---------- 8) Final Answer Node ----------
def final_answer_node(state: AgentState) -> AgentState:
    """최종 답변 통합"""
    user_query = None
    for m in reversed(state["messages"]):
        if isinstance(m, HumanMessage):
            user_query = m.content
            break
    
    # 이전 단계들의 결과를 통합
    filtered_data = state.get("filtered")
    recommendations = state.get("recommendations")
    router_decision = state.get("router_json", {}).get("next", "unknown")
    
    # 파싱 결과가 있고 추천이 필요한 경우
    if filtered_data and "error" not in filtered_data and not recommendations:
        # ML 추천 실행
        initialize_recommender()
        
        if perfume_recommender is not None:
            try:
                result = perfume_recommender.recommend(
                    user_text=user_query,
                    meta_filters=filtered_data,
                    topk_labels=4,
                    top_n_perfumes=5,
                    use_thresholds=True
                )
                
                recommendations = result.get("recommendations", [])
                predicted_labels = result.get("predicted_labels", [])
                meta_filters_applied = result.get("meta_filters_applied", {})
                total_candidates = result.get("total_candidates", 0)
                
                if recommendations:
                    final_response = f"🎯 **'{user_query}' 최종 추천 결과**\n\n"
                    final_response += f"🏷️ **예측된 향 특성**: {', '.join(predicted_labels)}\n"
                    
                    # 적용된 메타필터 표시
                    if meta_filters_applied and any(v for v in meta_filters_applied.values() if v is not None):
                        final_response += f"🔍 **적용된 필터**: "
                        active_filters = []
                        korean_labels = {
                            "brand": "브랜드",
                            "concentration": "농도",
                            "day_night_score": "사용시간", 
                            "gender": "성별",
                            "season_score": "계절",
                            "sizes": "용량"
                        }
                        for k, v in meta_filters_applied.items():
                            if v is not None:
                                active_filters.append(f"{korean_labels.get(k, k)}={v}")
                        final_response += ", ".join(active_filters) + "\n"
                    
                    final_response += f"📊 **총 후보**: {total_candidates}개 향수\n\n"
                    
                    for rec in recommendations:
                        final_response += f"🌟 **{rec['rank']}위**\n"
                        final_response += f"   브랜드: {rec['brand']}\n"
                        final_response += f"   제품명: {rec['name']}\n"
                        final_response += f"   향 특성: {rec['fragrances']}\n"
                        final_response += f"   성별: {rec['gender']}, 농도: {rec['concentration']}\n"
                        final_response += f"   계절: {rec['season_score']}, 용량: {rec['sizes']}ml\n"
                        final_response += f"   사용시간: {rec['day_night_score']}\n\n"
                        
                    final_response += "💡 **추가 정보가 필요하시면 언제든 말씀해 주세요!**"
                else:
                    final_response = "😔 조건에 맞는 향수를 찾지 못했습니다.\n💡 필터 조건을 완화해 보시거나, 다른 키워드로 검색해보세요."
                
            except Exception as e:
                final_response = f"❌ 추천 중 오류가 발생했습니다: {str(e)}"
        else:
            final_response = "❌ 추천 시스템을 로드할 수 없습니다."
    else:
        # 기본적으로 마지막 메시지를 최종 답변으로 사용
        final_response = ""
        for m in reversed(state["messages"]):
            if isinstance(m, AIMessage) and m.content.strip():
                final_response = m.content
                break
        
        if not final_response:
            final_response = "죄송합니다. 적절한 답변을 생성할 수 없었습니다."
    
    msgs = state["messages"] + [AIMessage(content=final_response)]
    return {
        "messages": msgs,
        "next": None,
        "router_json": state.get("router_json"),
        "parsed": state.get("parsed"),
        "filtered": state.get("filtered"),
        "recommendations": recommendations,
        "price_info": state.get("price_info")
    }

# ---------- 9) Build Graph ----------
graph = StateGraph(AgentState)

# 모든 노드 추가
graph.add_node("supervisor", supervisor_node)
graph.add_node("LLM_parser", llm_parser_node)
graph.add_node("FAQ_agent", faq_agent_node)
graph.add_node("human_fallback", human_fallback_node)
graph.add_node("price_agent", price_agent_node)
graph.add_node("ML_agent", ml_agent_node)
graph.add_node("final_answer", final_answer_node)

# 시작점 설정
graph.set_entry_point("supervisor")

# supervisor에서 각 에이전트로 라우팅
def router_edge(state: AgentState) -> str:
    return state["next"] or "human_fallback"

graph.add_conditional_edges(
    "supervisor",
    router_edge,
    {
        "LLM_parser": "LLM_parser",
        "FAQ_agent": "FAQ_agent", 
        "human_fallback": "human_fallback",
        "price_agent": "price_agent",
        "ML_agent": "ML_agent",
    },
)

# LLM_parser는 final_answer로 이동 (추천 로직은 final_answer에서 처리)
graph.add_edge("LLM_parser", "final_answer")

# 다른 에이전트들은 바로 종료
graph.add_edge("FAQ_agent", END)
graph.add_edge("human_fallback", END) 
graph.add_edge("price_agent", END)
graph.add_edge("ML_agent", END)

# final_answer도 종료
graph.add_edge("final_answer", END)

app = graph.compile()

# ---------- 10) Main Function ----------
def process_query(query: str) -> Dict[str, Any]:
    """쿼리를 처리하고 결과를 반환"""
    init: AgentState = {
        "messages": [HumanMessage(content=query)],
        "next": None,
        "router_json": None,
        "parsed": None,
        "filtered": None,
        "recommendations": None,
        "price_info": None
    }
    
    result = app.invoke(init)
    
    # 결과 정리
    final_response = ""
    for msg in reversed(result["messages"]):
        if isinstance(msg, AIMessage):
            final_response = msg.content
            break
    
    return {
        "query": query,
        "response": final_response,
        "router_decision": result.get("router_json", {}),
        "parsed_data": result.get("parsed"),
        "filtered_data": result.get("filtered"),
        "recommendations": result.get("recommendations"),
        "price_info": result.get("price_info")
    }

# ---------- 11) Test Function ----------
def run_tests():
    """테스트 실행"""
    TEST_QUERIES = [
        "입생로랑 여성용 50ml 겨울용 향수 추천해줘.",
        "디올 EDP로 가을 밤(야간)에 쓸 만한 향수 있어?",
        "EDP랑 EDT 차이가 뭐야?",
        "탑노트·미들노트·베이스노트가 각각 무슨 뜻이야?",
        "오늘 점심 뭐 먹을까?",
        "샤넬 넘버5 50ml 최저가 알려줘.",
        "여름에 시원한 향수 추천해줘.",
        "달달한 향 추천해줘.",
        "샤넬 여성용 오 드 퍼퓸 추천해줘.",  # 메타필터링 테스트
        "구찌 남성용 향수 중에 봄에 쓸 만한 거 있어?",  # 복합 조건 테스트
    ]
    
    for query in TEST_QUERIES:
        print("=" * 80)
        print(f"Query: {query}")
        result = process_query(query)
        print(f"Router: {result['router_decision'].get('next', 'unknown')}")
        
        if result.get('parsed_data'):
            print(f"Parsed: {result['parsed_data']}")
        if result.get('filtered_data'):
            print(f"Filtered: {result['filtered_data']}")
            
        print(f"Response: {result['response']}")
        print()

if __name__ == "__main__":
    run_tests()

  from .autonotebook import tqdm as notebook_tqdm


Query: 입생로랑 여성용 50ml 겨울용 향수 추천해줘.
[Device] cpu


  Loading from a raw memory buffer (like pickle in Python, RDS in R) on a CPU-only
  machine. Consider using `save_model/load_model` instead. See:

    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

  for more details about differences between saving model and serializing.  Changing `tree_method` to `hist`.
  setstate(state)
  setstate(state)
  setstate(state)
  setstate(state)


[Loaded model from ./models.pkl]
Labels: ['Amber', 'Aromatic', 'Blossom', 'Bouquet', 'Citrus', 'Classical', 'Crisp', 'Dry', 'Floral', 'Flower', 'Fougère', 'Fresh', 'Fresher', 'Fruity', 'Gourmand', 'Green', 'Iris', 'Jasmine', 'Lily', 'Mossy', 'Musk', 'Orange', 'Rich', 'Richer', 'Rose', 'Soft', 'Spicy', 'Tuberose', 'Valley', 'Violet', 'Water', 'White', 'Woods', 'Woody']
[Loaded 26319 perfumes from perfumes.json]
[BM25 index built]
Router: LLM_parser
Parsed: {'brand': '입생로랑', 'gender': 'Female', 'sizes': 50, 'season_score': 'winter', 'concentration': None, 'day_night_score': None}
Filtered: {'brand': '입생로랑', 'concentration': None, 'day_night_score': None, 'gender': 'Female', 'season_score': 'winter', 'sizes': 50}
Response: ❌ 추천 중 오류가 발생했습니다: 'PerfumeRecommender' object has no attribute 'pinecone_index'

Query: 디올 EDP로 가을 밤(야간)에 쓸 만한 향수 있어?
Router: LLM_parser
Parsed: {'brand': '디올', 'gender': None, 'sizes': None, 'season_score': 'fall', 'concentration': '오 드 퍼퓸', 'day_night_score': 'night'