In [None]:
!pip install -q --upgrade llama-index transformers accelerate bitsandbytes
!pip install -q llama-index-llms-huggingface llama-index-embeddings-huggingface
!pip install -q chromadb llama-index-vector-stores-chroma
!pip install -q sentence-transformers

In [None]:
!pip -q install llama-index-retrievers-bm25
!pip -q install llama-index-core

In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
!hf auth login


In [None]:
DOCX_PATH = "/content/drive/MyDrive/DTCNTT/DMS-5.pdf"
CACHE_FILE = "/content/drive/MyDrive/DTCNTT/data/cache/pipeline_cache.json"
INDEX_STORAGE = "/content/drive/MyDrive/DTCNTT/data/index_store_V6_maximalist"
COLLECTION_NAME = "dsm5_V6_store"

In [None]:
# --- C√°c th∆∞ vi·ªán h·ªá th·ªëng & ti·ªán √≠ch ---
import traceback
import gc
import torch
import chromadb
torch.cuda.empty_cache()
# --- Hugging Face & Sentence Transformers ---
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from sentence_transformers import CrossEncoder

# --- LlamaIndex Core & LLM ---
from llama_index.core import Settings, VectorStoreIndex, StorageContext
from llama_index.core.schema import TextNode
from llama_index.core.llms import ChatMessage
from llama_index.core.memory import ChatMemoryBuffer

# --- LlamaIndex Retrievers & Storage ---
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.retrievers.bm25 import BM25Retriever
from llama_index.core.retrievers import VectorIndexRetriever, QueryFusionRetriever

In [None]:
!nvidia-smi


In [None]:
def setup_llm_and_embedding():
    global embed_model, llm, model

    # D·ªçn VRAM
    print("üßπ Clearing VRAM...")
    for obj in ["llm", "model", "embed_model"]:
        if obj in globals():
            del globals()[obj]
    gc.collect()
    torch.cuda.empty_cache()
    print("‚úÖ VRAM cleared.\n")

    # ------------------------------
    # 1. Load Embedding
    # ------------------------------
    print("‚è≥ Loading embedding...")
    embed_model = HuggingFaceEmbedding(
        model_name="AITeamVN/Vietnamese_Embedding",
        device="cpu"
    )
    Settings.embed_model = embed_model
    print("‚úÖ Embedding ready.\n")

    # ------------------------------
    # 2. Load Tokenizer t·ª´ HF HUB
    # ------------------------------
    REPO_ID = "letri345/llama3-8b-merge"  # <‚Äî s·ª≠a t√™n repo ·ªü ƒë√¢y

    print(f"‚è≥ Loading tokenizer from {REPO_ID}...")
    tokenizer = AutoTokenizer.from_pretrained(REPO_ID, trust_remote_code=True)

    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    # Gi·ªØ template chat ƒë√£ fine-tune
    tokenizer.chat_template = (
        "<|begin_of_text|>"
        "{% for message in messages %}"
        "{% if message['role'] == 'user' %}"
            "<|start_header_id|>user<|end_header_id|>\n\n{{ message['content'] }}<|eot_id|>"
        "{% elif message['role'] == 'system' %}"
            "<|start_header_id|>system<|end_header_id|>\n\n{{ message['content'] }}<|eot_id|>"
        "{% elif message['role'] == 'assistant' %}"
            "<|start_header_id|>assistant<|end_header_id|>\n\n{{ message['content'] }}<|eot_id|>"
        "{% endif %}"
        "{% endfor %}"
    )

    # ------------------------------
    # 3. Load Model 4-bit t·ª´ HF HUB
    # ------------------------------
    print("‚ö° Loading 4-bit model from HuggingFace Hub...")

    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_use_double_quant=True,
    )

    model = AutoModelForCausalLM.from_pretrained(
        REPO_ID,
        device_map="auto",
        quantization_config=bnb_config,
        trust_remote_code=True,
    )

    print("‚úÖ Model loaded successfully (4-bit).")

    # ------------------------------
    # 4. Wrap cho LlamaIndex
    # ------------------------------
    eos_id = tokenizer.eos_token_id
    eot_id = tokenizer.convert_tokens_to_ids("<|eot_id|>")

    llm = HuggingFaceLLM(
        model=model,
        tokenizer=tokenizer,
        context_window=8192,
        max_new_tokens=512,
        generate_kwargs={"temperature": 0.3},
        stopping_ids=[eos_id, eot_id],
    )

    Settings.llm = llm
    print("\nüéâ Llama 3 8B ƒë√£ s·∫µn s√†ng s·ª≠ d·ª•ng t·ª´ HuggingFace Hub!\n")


In [None]:
setup_llm_and_embedding()



In [None]:
!nvidia-smi

In [None]:
db = chromadb.PersistentClient(path=INDEX_STORAGE)
col = db.get_collection(COLLECTION_NAME)
print("üì¶ S·ªë l∆∞·ª£ng vectors trong Chroma:", col.count())


In [None]:
def print_page(page_number):
    print(f"\n==================== PAGE {page_number} ====================\n")

    db = chromadb.PersistentClient(path=INDEX_STORAGE)
    col = db.get_collection(COLLECTION_NAME)

    results = col.get(
        where={"page_label": str(page_number)},
        include=["documents", "metadatas"]
    )

    if not results["documents"]:
        print("‚ö†Ô∏è Kh√¥ng t√¨m th·∫•y trang n√†y trong Chroma.")
        return

    text = results["documents"][0]
    meta = results["metadatas"][0]

    # In text
    print("üìÑ **N·ªòI DUNG TRANG:**")
    print("-" * 60)
    print(text[:1500])
    print("-" * 60)

    # In metadata
    print("\nüîñ **METADATA:**")
    for key, val in meta.items():
        print(f"  ‚Ä¢ {key}: {val}")

    # üî¢ Th√™m ph·∫ßn ƒë·∫øm ƒë·ªô d√†i
    print("\nüî¢ **ƒê·ªò D√ÄI DOCUMENT:**")
    print(f"  ‚Ä¢ S·ªë k√Ω t·ª±: {len(text)}")
    print(f"  ‚Ä¢ S·ªë t·ª´: {len(text.split())}")

    print("\n============================================================\n")


In [None]:
print_page(40)

In [None]:
import re

def should_use_rag(message: str, last_user_message: str = "") -> bool:
    print("üß† [Gate] ƒêang ph√¢n t√≠ch √Ω ƒë·ªãnh (Expanded)...")

    msg = message.lower().strip()
    last_msg = last_user_message.lower().strip()

    # =================================================================
    # üö® NH√ìM 0: KH·ª¶NG HO·∫¢NG & AN TO√ÄN (CRISIS)
    # C√°c t·ª´ kh√≥a n√†y b√°o hi·ªáu nguy hi·ªÉm -> C·∫ßn x·ª≠ l√Ω ngay (Force RAG ho·∫∑c Alert)
    # =================================================================
    crisis_keywords = [
        "t·ª± t·ª≠", "t·ª± s√°t", "suicidal", "mu·ªën ch·∫øt", "k·∫øt th√∫c cu·ªôc ƒë·ªùi",
        "r·∫°ch tay", "t·ª± h·∫°i", "self-harm", "nh·∫£y l·∫ßu", "u·ªëng thu·ªëc ng·ªß",
        "gi·∫øt ng∆∞·ªùi", "h·∫°i ng∆∞·ªùi", "kh√¥ng mu·ªën s·ªëng n·ªØa", "tuy·ªát v·ªçng"
    ]

    if any(k in msg for k in crisis_keywords):
        print("üß† [Gate] ‚ö†Ô∏è PH√ÅT HI·ªÜN T√çN HI·ªÜU KH·∫®N C·∫§P.")
        return True

    # =================================================================
    # üìö NH√ìM 1: T√äN R·ªêI LO·∫†N (DISORDERS) - M·ªü r·ªông theo ch∆∞∆°ng DSM-5
    # =================================================================
    disorders = [
        # --- T√¢m tr·∫°ng & Lo √¢u ---
        "tr·∫ßm c·∫£m", "depression", "u u·∫•t", "kh√≠ s·∫Øc",
        "l∆∞·ª°ng c·ª±c", "bipolar", "h∆∞ng c·∫£m", "mania", "hypomania",
        "lo √¢u", "anxiety", "ho·∫£ng lo·∫°n", "panic", "s·ª£ x√£ h·ªôi", "√°m ·∫£nh s·ª£",

        # --- OCD & Stress ---
        "ocd", "√°m ·∫£nh c∆∞·ª°ng b·ª©c", "nghi th·ª©c", "t√≠ch tr·ªØ", "hoarding",
        "ptsd", "sang ch·∫•n", "h·∫≠u ch·∫•n th∆∞∆°ng", "stress c·∫•p t√≠nh", "th√≠ch ·ª©ng",

        # --- Lo·∫°n th·∫ßn (Psychotic) ---
        "t√¢m th·∫ßn ph√¢n li·ªát", "schizophrenia", "hoang t∆∞·ªüng", "·∫£o gi√°c",
        "nghe ti·∫øng n√≥i", "lo·∫°n th·∫ßn", "psychosis", "catatonia", "cƒÉng tr∆∞∆°ng l·ª±c",

        # --- Ph√°t tri·ªÉn th·∫ßn kinh ---
        "t·ª± k·ª∑", "autism", "asd", "tƒÉng ƒë·ªông", "gi·∫£m ch√∫ √Ω", "adhd",
        "khi·∫øm khuy·∫øt tr√≠ tu·ªá", "ch·∫≠m ph√°t tri·ªÉn", "tik", "tourette",

        # --- ƒÇn u·ªëng (Feeding & Eating) ---
        "ch√°n ƒÉn", "anorexia", "ƒÉn v√¥ ƒë·ªô", "bulimia", "binge eating", "pica",

        # --- Nh√¢n c√°ch (Personality) ---
        "r·ªëi lo·∫°n nh√¢n c√°ch", "borderline", "ranh gi·ªõi", "√°i k·ª∑", "narcissistic",
        "ch·ªëng ƒë·ªëi x√£ h·ªôi", "antisocial", "tr√°nh n√©", "ph·ª• thu·ªôc", "ƒëa nh√¢n c√°ch",

        # --- Gi·∫•c ng·ªß & Kh√°c ---
        "m·∫•t ng·ªß", "insomnia", "ng·ªß r≈©", "narcolepsy", "√°c m·ªông",
        "nghi·ªán", "cai nghi·ªán", "ma t√∫y", "r∆∞·ª£u", "ch·∫•t k√≠ch th√≠ch",
        "r·ªëi lo·∫°n t√¨nh d·ª•c", "lo·∫°n d·ª•c", "gi·ªõi t√≠nh", "dysphoria",
        "m·∫•t tr√≠ nh·ªõ", "alzheimer", "gi·∫£ b·ªánh"
    ]

    # =================================================================
    # ü©∫ NH√ìM 2: TRI·ªÜU CH·ª®NG & BI·ªÇU HI·ªÜN (SYMPTOMS)
    # Nh·ªØng t·ª´ m√¥ t·∫£ tr·∫°ng th√°i b·ªánh l√Ω c·ª• th·ªÉ
    # =================================================================
    symptoms = [
        "·∫£o thanh", "·∫£o ·∫£nh", "m·∫•t ki·ªÉm so√°t", "b·ªëc ƒë·ªìng", "v√¥ c·∫£m",
        "k√≠ch ƒë·ªông", "g√¢y g·ªï", "thu m√¨nh", "s·ª£ ƒë√°m ƒë√¥ng", "r·ª≠a tay li√™n t·ª•c",
        "ki·ªÉm tra li√™n t·ª•c", "nh·ªõ l·∫°i", "flashback", "√°c m·ªông",
        "m·ªát m·ªèi kinh ni√™n", "s·ª•t c√¢n", "tƒÉng c√¢n", "m·∫•t ng·ªß k√©o d√†i",
        "tim ƒë·∫≠p nhanh", "kh√≥ th·ªü", "ng·∫•t", "run tay", "v√£ m·ªì h√¥i",
        "tr·ªëng r·ªóng", "b·ªè r∆°i", "ƒëa nghi", "ghen tu√¥ng hoang t∆∞·ªüng"
    ]

    # =================================================================
    # üîç NH√ìM 3: √ù ƒê·ªäNH TRA C·ª®U (DIAGNOSTIC INTENT)
    # C√°c t·ª´ kh√≥a th·ªÉ hi·ªán user mu·ªën t√¨m ki·∫øn th·ª©c
    # =================================================================
    diagnostic_intent = [
        "ti√™u chu·∫©n", "d·∫•u hi·ªáu", "bi·ªÉu hi·ªán", "tri·ªáu ch·ª©ng",
        "ch·∫©n ƒëo√°n", "diagnose", "criteria", "x√©t nghi·ªám",
        "dsm", "dsm-5", "icd", "m√£ b·ªánh",
        "c√≥ ph·∫£i l√†", "c√≥ ph·∫£i b·ªã", "t√¥i b·ªã g√¨", "b·ªánh g√¨",
        "ph√¢n bi·ªát", "kh√°c nhau", "nguy√™n nh√¢n", "y·∫øu t·ªë nguy c∆°",
        "th·ªùi gian k√©o d√†i", "bao l√¢u th√¨", "ti√™n l∆∞·ª£ng"
    ]

    # --- LOGIC KI·ªÇM TRA ---

    # 1. Check tr·ª±c ti·∫øp trong tin nh·∫Øn hi·ªán t·∫°i
    # G·ªôp t·∫•t c·∫£ keywords ƒë·ªÉ check 1 l·∫ßn cho nhanh
    all_keywords = disorders + symptoms + diagnostic_intent

    # D√πng v√≤ng l·∫∑p check t·ª´ng t·ª´ (c√≥ th·ªÉ t·ªëi ∆∞u b·∫±ng regex n·∫øu c·∫ßn ch√≠nh x√°c tuy·ªát ƒë·ªëi)
    for kw in all_keywords:
        if kw in msg:
            print(f"üß† [Gate] C·∫¶N RAG (Keyword: '{kw}').")
            return True

    # 2. Check Context (C√¢u tr∆∞·ªõc ƒë√≥ c√≥ n√≥i v·ªÅ b·ªánh kh√¥ng?)
    # N·∫øu c√¢u tr∆∞·ªõc c√≥ keyword b·ªánh -> C√¢u n√†y kh·∫£ nƒÉng cao l√† follow-up
    context_keywords = disorders + symptoms # Ch·ªâ quan t√¢m t√™n b·ªánh/tri·ªáu ch·ª©ng ·ªü context
    has_medical_context = any(kw in last_msg for kw in context_keywords)

    if has_medical_context:
        # Danh s√°ch t·ª´ ƒë·ªÉ NG·∫ÆT RAG (n·∫øu user mu·ªën d·ª´ng)
        stop_words = ["c·∫£m ∆°n", "ok", "ƒë∆∞·ª£c r·ªìi", "hi·ªÉu r·ªìi", "bye", "t·∫°m bi·ªát", "kh√¥ng sao"]
        if any(w == msg for w in stop_words):
            print("üß† [Gate] Context c√≥ b·ªánh, nh∆∞ng User d·ª´ng -> KH√îNG RAG.")
            return False

        # N·∫øu kh√¥ng ph·∫£i t·ª´ d·ª´ng, m√† context ƒëang n√≥i v·ªÅ b·ªánh -> Ti·∫øp t·ª•c tra c·ª©u
        print("üß† [Gate] FORCE RAG (Theo ng·ªØ c·∫£nh h·ªôi tho·∫°i c≈©).")
        return True

    # 3. N·∫øu kh√¥ng d√≠nh keyword n√†o -> Small Talk / General Chat
    print("üß† [Gate] Kh√¥ng t√¨m th·∫•y y·∫øu t·ªë chuy√™n m√¥n -> KH√îNG RAG.")
    return False

In [None]:
# B·∫°n c·∫ßn import traceback n·∫øu ch∆∞a c√≥:
import traceback

def setup_chatbot_environment():
    """Load Embedding + LLaMA + Chroma + Hybrid RAG"""
    global memory, reranker_model, vector_index_chat, all_nodes
    global bm25_retriever, semantic_retriever, fusion_retriever, llm_chat
    global embed_model, llm  # ƒê·∫£m b·∫£o bi·∫øn phase 1 truy c·∫≠p ƒë∆∞·ª£c

    try:
        print("\n" + "="*60)
        print("üîπ GIAI ƒêO·∫†N 2: SETUP CHATBOT HYBRID (sau reset kernel)")
        print("="*60 + "\n")

        # ---------------------------------------------------------
        # üî• KI·ªÇM TRA ‚Äì Giai ƒëo·∫°n 1 ƒë√£ ch·∫°y ch∆∞a?
        # ---------------------------------------------------------
        if 'embed_model' not in globals() or embed_model is None:
            print("‚ùó embed_model ch∆∞a ƒë∆∞·ª£c load. H√£y ch·∫°y setup_llm_and_embedding() tr∆∞·ªõc!")
            return False

        if 'llm' not in globals() or llm is None:
            print("‚ùó llm ch∆∞a ƒë∆∞·ª£c load. H√£y ch·∫°y setup_llm_and_embedding() tr∆∞·ªõc!")
            return False

        # 1) Embedding cho query
        Settings.embed_model = embed_model

        # 2) LLaMA l√†m LLM chat
        llm_chat = llm
        Settings.llm = llm_chat

        # ---------------------------------------------------------
        # 3) K·∫øt n·ªëi ChromaDB
        # ---------------------------------------------------------
        print(f"‚è≥ ƒêang k·∫øt n·ªëi ChromaDB t·∫°i: {INDEX_STORAGE}")
        db = chromadb.PersistentClient(path=INDEX_STORAGE)
        chroma_collection = db.get_collection(COLLECTION_NAME)

        print("üì¶ S·ªë l∆∞·ª£ng vectors trong Chroma:", chroma_collection.count())

        vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
        storage_context = StorageContext.from_defaults(vector_store=vector_store)

        # ---------------------------------------------------------
        # 4) Load VectorStoreIndex t·ª´ Chroma
        # ---------------------------------------------------------
        vector_index_chat = VectorStoreIndex.from_vector_store(
            vector_store=vector_store,
            embed_model=embed_model,
        )
        print("‚úÖ Vector Index ƒë√£ loaqud t·ª´ Chroma.\n")

        # ---------------------------------------------------------
        # 5) L·∫§Y NODES TR·ª∞C TI·∫æP T·ª™ CHROMA (KH√îNG D√ôNG docstore)
        # ---------------------------------------------------------
        print("‚è≥ ƒêang l·∫•y nodes t·ª´ Chroma collection...")
        results = chroma_collection.get(
            include=["metadatas", "documents"]
        )

        all_nodes = []
        for doc, meta, _id in zip(results["documents"], results["metadatas"], results["ids"]):
            node = TextNode(
                text=doc,
                id_=_id,
                metadata=meta or {},
            )
            all_nodes.append(node)

        print(f"üì¶ T·ªïng s·ªë nodes reconstruct t·ª´ Chroma: {len(all_nodes)}\n")

        # ---------------------------------------------------------
        # 6) BM25 Retriever
        # ---------------------------------------------------------
        bm25_nodes = []
        for n in all_nodes:
            bm25_text = n.metadata.get("bm25_text", n.text)
            n_bm25 = TextNode(
                text=bm25_text,
                id_=n.node_id,
                metadata=n.metadata,
            )
            bm25_nodes.append(n_bm25)

        print("‚è≥ T·∫°o BM25Retriever (lexical)...")
        bm25_retriever = BM25Retriever.from_defaults(
            nodes=bm25_nodes,
            similarity_top_k=5,
        )
        print("‚úÖ BM25Retriever s·∫µn s√†ng.\n")

        # ---------------------------------------------------------
        # 7) Semantic Retriever (vector)
        # ---------------------------------------------------------
        print("‚è≥ T·∫°o Semantic Retriever (vector)...")
        semantic_retriever = VectorIndexRetriever(
            index=vector_index_chat,
            similarity_top_k=5,
        )
        print("‚úÖ Semantic Retriever s·∫µn s√†ng.\n")

        # ---------------------------------------------------------
        # 8) Fusion Retriever
        # ---------------------------------------------------------
        print("‚è≥ T·∫°o QueryFusionRetriever (Hybrid)...")
        fusion_retriever = QueryFusionRetriever(
            retrievers=[bm25_retriever, semantic_retriever],
            similarity_top_k=5,
            num_queries=2,
            mode="reciprocal_rerank",
        )
        print("‚úÖ Hybrid Fusion Retriever s·∫µn s√†ng.\n")

        # ---------------------------------------------------------
        # 9) Reranker
        # ---------------------------------------------------------
        print("‚è≥ T·∫£i Cross-Encoder Reranker...")
        # ƒê·∫∑t thi·∫øt b·ªã m·∫∑c ƒë·ªãnh l√† "cpu"
        reranker_model = CrossEncoder(
            "cross-encoder/ms-marco-MiniLM-L6-v2",
        )
        print("‚úÖ Reranker ƒë√£ s·∫µn s√†ng.\n")
        

        # ---------------------------------------------------------
        # 10) Memory + System Prompt
        # ---------------------------------------------------------
        SYSTEM_PROMPT = """\
B·∫°n l√† m·ªôt **chuy√™n gia t√¢m l√Ω AI** (Tr·ª£ l√Ω) ƒë∆∞·ª£c ph√°t tri·ªÉn b·ªüi **AI VIETNAM**.
Nhi·ªám v·ª• c·ªßa b·∫°n l√† tr√≤ chuy·ªán, theo d√µi v√† t∆∞ v·∫•n cho ng∆∞·ªùi d√πng (User) v·ªÅ s·ª©c kh·ªèe t√¢m th·∫ßn.
Lu√¥n lu√¥n h√†nh ƒë·ªông v·ªõi t∆∞ c√°ch l√† Tr·ª£ l√Ω, th·ªÉ hi·ªán s·ª± ƒë·ªìng c·∫£m v√† chuy√™n nghi·ªáp.

QUY T·∫ÆC RAG:
- N·∫øu User h·ªèi v·ªÅ th√¥ng tin chuy√™n m√¥n (tri·ªáu ch·ª©ng, DSM-5, r·ªëi lo·∫°n...), b·∫°n S·∫º nh·∫≠n ƒë∆∞·ª£c th√¥ng tin tham kh·∫£o trong m·ªôt tin nh·∫Øn System.
- H√£y D·ª∞A V√ÄN ho√†n to√†n v√†o th√¥ng tin ƒë√≥ ƒë·ªÉ tr·∫£ l·ªùi.
- N·∫øu User ch·ªâ tr√≤ chuy·ªán, b·∫°n s·∫Ω kh√¥ng nh·∫≠n ƒë∆∞·ª£c th√¥ng tin tham kh·∫£o, h√£y c·ª© tr√≤ chuy·ªán b√¨nh th∆∞·ªùng.

QUY T·∫ÆC B·∫¢O V·ªÜ (R·∫§T QUAN TR·ªåNG):
- B·∫°n **TUY·ªÜT ƒê·ªêI KH√îNG ƒê∆Ø·ª¢C** tr·∫£ l·ªùi c√°c c√¢u h·ªèi kh√¥ng li√™n quan ƒë·∫øn t√¢m l√Ω ho·∫∑c s·ª©c kh·ªèe t√¢m th·∫ßn.
- C√°c ch·ªß ƒë·ªÅ C·∫§M bao g·ªìm (nh∆∞ng kh√¥ng gi·ªõi h·∫°n): th·ªùi ti·∫øt, n·∫•u ƒÉn, ch√≠nh tr·ªã, th·ªÉ thao, tin t·ª©c, to√°n h·ªçc, l·∫≠p tr√¨nh...
- N·∫øu b·ªã h·ªèi nh·ªØng ch·ªß ƒë·ªÅ n√†y, h√£y l·ªãch s·ª± t·ª´ ch·ªëi v√† l√°i cu·ªôc tr√≤ chuy·ªán quay l·∫°i ch·ªß ƒë·ªÅ t√¢m l√Ω.
  (V√≠ d·ª•: "T√¥i xin l·ªói, t√¥i ch·ªâ ƒë∆∞·ª£c ƒë√†o t·∫°o v·ªÅ s·ª©c kh·ªèe t√¢m th·∫ßn. Ch√∫ng ta c√≥ th·ªÉ quay l·∫°i ch·ªß ƒë·ªÅ b·∫°n ƒëang quan t√¢m kh√¥ng?")
"""

        memory = ChatMemoryBuffer.from_defaults(token_limit=4096)
        memory.put(ChatMessage(role="system", content=SYSTEM_PROMPT))

        print("üéâ Giai ƒëo·∫°n 2: Setup HYBRID ho√†n t·∫•t.\n")
        return True

    except Exception as e:
        print(f"‚ùå L·ªñI GIAI ƒêO·∫†N 2 (SETUP): {e}")
        traceback.print_exc()
        return False


In [None]:
def rag_retrieve_and_rerank(message: str, top_k=3, final_top=2) -> str:
    print(f"üîç [RAG HYBRID] Truy v·∫•n: '{message}'")

    # 1) Hybrid retrieve
    retrieved_docs = fusion_retriever.retrieve(message)
    if not retrieved_docs:
        print("‚ö†Ô∏è Kh√¥ng t√¨m th·∫•y t√†i li·ªáu li√™n quan.")
        return ""

    print(f"üîç [RAG HYBRID] Fusion tr·∫£ v·ªÅ {len(retrieved_docs)} docs. ƒêang rerank...")

    # Ch·ªâ rerank t·ªëi ƒëa 6 documents ƒë·ªÉ tƒÉng t·ªëc
    retrieved_docs = retrieved_docs[:6]

    # 2) Chu·∫©n b·ªã input cho CrossEncoder
    doc_texts = [doc.get_content() for doc in retrieved_docs]
    pairs = [(message, text) for text in doc_texts]

    # 3) Rerank
    try:
        scores = reranker_model.predict(pairs)
        ranked = sorted(
            zip(scores, retrieved_docs),
            key=lambda x: x[0],
            reverse=True,
        )
        ranked_docs = [doc for _, doc in ranked]
    except Exception as e:
        print(f"‚ùå Rerank l·ªói: {e}")
        ranked_docs = retrieved_docs

    # 4) Top N cu·ªëi c√πng
    top_docs = ranked_docs[:final_top]
    context_text = "\n\n---\n\n".join([doc.get_content() for doc in top_docs])

    # 5) T√≥m t·∫Øt n·∫øu qu√° d√†i
    if len(context_text.split()) > 700:
        print("‚úÇÔ∏è Context qu√° d√†i ‚Äì ƒëang t√≥m t·∫Øt (d√πng llm_chat)...")
        try:
            summary_messages = [
                ChatMessage(
                    role="system",
                    content=(
                        "B·∫°n l√† AI t√≥m t·∫Øt t√†i li·ªáu DSM-5. "
                        "T√≥m t·∫Øt ng·∫Øn g·ªçn, ch√≠nh x√°c, ch·ªâ gi·ªØ ph·∫ßn li√™n quan t·ªõi c√¢u h·ªèi."
                    ),
                ),
                ChatMessage(
                    role="user",
                    content=f"C√¢u h·ªèi: {message}\n\nN·ªôi dung t√†i li·ªáu:\n{context_text}",
                ),
            ]
            summary_response = llm_chat.chat(summary_messages)
            context_text = summary_response.message.content.strip()
        except Exception as e:
            print(f"‚ùå L·ªói t√≥m t·∫Øt: {e}")
            context_text = " ".join(context_text.split()[:700])

    return f"--- Th√¥ng tin tham kh·∫£o t·ª´ DSM-5 ---\n{context_text}\n--- H·∫øt th√¥ng tin tham kh·∫£o ---"


In [None]:
setup_chatbot_environment()

In [None]:
# 7. V√íNG L·∫∂P CHAT CH√çNH
# =====================================================
def chat_conversation():
    print("\n" + "="*60)
    print("ü§ñ Chatbot DSM-5 HYBRID ƒë√£ s·∫µn s√†ng. G√µ 'quit' ƒë·ªÉ tho√°t.")
    print("="*60 + "\n")

    while True:
        try:
            user_message = input("B·∫°n: ").strip()
            if not user_message:
                continue

            if user_message.lower() in ["quit", "exit"]:
                print("Chatbot: C·∫£m ∆°n b·∫°n ƒë√£ tr√≤ chuy·ªán. H·∫πn g·∫∑p l·∫°i üå∑")
                break

            new_user_message = ChatMessage(role="user", content=user_message)
            messages_to_send = memory.get_all()

            # N·∫øu c·∫ßn RAG ‚Üí g·∫Øn context DSM-5 v√†o system
            if should_use_rag(user_message):
                context = rag_retrieve_and_rerank(user_message)
                if context:
                    messages_to_send.append(
                        ChatMessage(role="system", content=context)
                    )
                else:
                    messages_to_send.append(
                        ChatMessage(
                            role="system",
                            content=(
                                "--- Th√¥ng tin tham kh·∫£o t·ª´ DSM-5 ---\n"
                                "Kh√¥ng t√¨m th·∫•y ƒëo·∫°n n√†o th·ª±c s·ª± ph√π h·ª£p.\n"
                                "--- H·∫øt th√¥ng tin tham kh·∫£o ---"
                            ),
                        )
                    )

            messages_to_send.append(new_user_message)

            # G·ªçi LLaMA tr·∫£ l·ªùi
            response = llm_chat.chat(messages_to_send)
            response_text = response.message.content.strip()

            # Fix n·∫øu model tr·∫£ v·ªÅ ki·ªÉu 'assistant: ...'
            if response_text.lower().startswith("assistant"):
                response_text = (
                    response_text[len("assistant"):].lstrip(":").strip()
                )

            if not response_text:
                response_text = (
                    "M√¨nh ch∆∞a ch·∫Øc ch·∫Øn l·∫Øm v·ªÅ ƒëi·ªÅu n√†y, "
                    "b·∫°n c√≥ th·ªÉ n√≥i r√µ h∆°n c·∫£m gi√°c ho·∫∑c c√¢u h·ªèi c·ªßa b·∫°n kh√¥ng?"
                )

            # L∆∞u v√†o memory
            memory.put(new_user_message)
            memory.put(ChatMessage(role="assistant", content=response_text))

            print(f"Chatbot: {response_text}\n")

        except KeyboardInterrupt:
            print("\nüõë ƒê√£ d·ª´ng cu·ªôc tr√≤ chuy·ªán.")
            break
        except Exception as e:
            print(f"‚ùå L·ªói kh√¥ng mong mu·ªën trong v√≤ng l·∫∑p chat: {e}")
            traceback.print_exc()
            break

In [None]:
chat_conversation()

In [None]:

import os
import openai

client = openai.OpenAI(
    api_key=os.environ.get("OPENAI_API_KEY"),
)

In [None]:
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings

# ----------------------------
# Load GPT-4o-mini
# ----------------------------
llm_gptmini = OpenAI(
    model="gpt-4o-mini",
    temperature=0.2,   # gi·∫£m ƒë·ªô s√°ng t·∫°o, t·∫≠p trung fact-based
    max_tokens=512
)

# G√°n cho Settings ƒë·ªÉ d√πng trong ingest + transformation
Settings.llm = llm_gptmini

# N·∫øu mu·ªën d√πng GPT cho ƒë√°nh gi√° c√¢u h·ªèi c≈©ng l·∫•y llm n√†y


In [None]:
import json
import pandas as pd

In [None]:
def generate_questions_from_nodes(nodes, max_q_per_node=1):
    questions = []
    for node in nodes:
        prompt = f"""
B·∫°n l√† gi·∫£ng vi√™n t√¢m l√Ω. D·ª±a v√†o ƒëo·∫°n vƒÉn b·∫£n DSM-5:
"{node.text[:1000]}"

So·∫°n {max_q_per_node} c√¢u h·ªèi t·ª± lu·∫≠n (Essay):
- Kh√¥ng Yes/No
- Ng·∫Øn g·ªçn, t·ªïng h·ª£p th√¥ng tin
- Tr·∫£ v·ªÅ JSON List: ["C√¢u h·ªèi 1", ...]
"""
        resp = Settings.llm.chat([ChatMessage(role="user", content=prompt)])
        content = resp.message.content.strip()

        # Clean JSON n·∫øu c√≥ Markdown
        if content.startswith("```json"):
            content = content.replace("```json"," ").replace("```"," ")
        try:
            q_list = json.loads(content)
            for q in q_list:
                questions.append({"question": q, "reference_node_text": node.text})
        except:
            print(f"‚ö†Ô∏è Node {node.node_id} kh√¥ng parse ƒë∆∞·ª£c JSON.")
    return pd.DataFrame(questions)

In [None]:
def answer_with_gpt(question, top_k=3, final_top=2):
    if should_use_rag(question):
        context = rag_retrieve_and_rerank(question, top_k=top_k, final_top=final_top)
        system_prompt_content = f"B·∫°n l√† tr·ª£ l√Ω t√¢m l√Ω AI. Tr·∫£ l·ªùi d·ª±a tr√™n th√¥ng tin sau:\n{context}"
    else:
        system_prompt_content = "B·∫°n l√† tr·ª£ l√Ω t√¢m l√Ω AI. Tr·∫£ l·ªùi tr·ª±c ti·∫øp c√¢u h·ªèi m·ªôt c√°ch ƒë·ªìng c·∫£m v√† ch√≠nh x√°c."

    messages = [
        ChatMessage(role="system", content=system_prompt_content),
        ChatMessage(role="user", content=question)
    ]
    resp = Settings.llm.chat(messages)
    return resp.message.content.strip()

# ===============================================



In [None]:
def evaluate_with_gpt(question, answer, reference_text=None):
    """
    ƒê√°nh gi√° c√¢u tr·∫£ l·ªùi GPT d·ª±a tr√™n c√¢u h·ªèi v√† vƒÉn b·∫£n tham chi·∫øu (n·∫øu c√≥).
    Tr·∫£ ƒëi·ªÉm d·∫°ng float ƒë·ªÉ ƒë√°nh gi√° m∆∞·ª£t h∆°n.
    """

    # C·∫Øt theo chunk size n·∫øu c√≥ reference
    ref_text_snippet = reference_text[:1000] if reference_text else None

    eval_prompt = f"""
B·∫°n l√† gi√°m kh·∫£o t√¢m l√Ω AI. H√£y ƒë√°nh gi√° c√¢u tr·∫£ l·ªùi sau:

Question: "{question}"
Answer: "{answer}"
"""
    if ref_text_snippet:
        eval_prompt += f"""
VƒÉn b·∫£n tham chi·∫øu (reference):
"{ref_text_snippet}"

H√£y ch·∫•m ƒëi·ªÉm d·ª±a tr√™n vi·ªác c√¢u tr·∫£ l·ªùi c√≥ ƒë√∫ng v√† trung th·ª±c v·ªõi reference_text.
"""

    eval_prompt += """
TI√äU CH√ç (d·∫°ng FLOAT):
1. Correctness: 0.0‚Äì5.0
2. Faithfulness: 0.0‚Äì1.0
3. Relevancy: 0.0‚Äì1.0

Tr·∫£ v·ªÅ JSON:
{"correctness": float, "faithfulness": float, "relevancy": float}
"""

    # GPT tr·∫£ l·ªùi
    resp = Settings.llm.chat([ChatMessage(role="user", content=eval_prompt)])
    content = resp.message.content.strip()

    # Clean JSON n·∫øu c√≥ Markdown
    if content.startswith("```"):
        content = content.replace("```json", "").replace("```", "")

    # Parse JSON
    try:
        data = json.loads(content)

        # Convert t·∫•t c·∫£ sang float an to√†n
        result = {
            "correctness": float(data.get("correctness", 0)),
            "faithfulness": float(data.get("faithfulness", 0)),
            "relevancy": float(data.get("relevancy", 0))
        }
        return result

    except Exception:
        return {
            "correctness": 0.0,
            "faithfulness": 0.0,
            "relevancy": 0.0,
            "reasoning": "Parse error"
        }


In [None]:
# ===============================================
# 5Ô∏è‚É£ In ƒëi·ªÉm trung b√¨nh
# ===============================================
def print_average_scores(df_eval):
    print("üìä ƒêi·ªÉm trung b√¨nh:")
    print(f"Correctness: {df_eval['correctness'].mean():.2f} / 5")
    print(f"Faithfulness: {df_eval['faithfulness'].mean():.2f} / 1")
    print(f"Relevancy: {df_eval['relevancy'].mean():.2f} / 1")


In [None]:
# 5Ô∏è‚É£ L∆∞u k·∫øt qu·∫£ ra CSV
# ===============================================
def save_eval_results(df_eval, filename="eval_results.csv"):
    df_eval.to_csv(filename, index=False, encoding="utf-8-sig")
    print(f"üíæ K·∫øt qu·∫£ ƒë√°nh gi√° ƒë√£ l∆∞u: {filename}")

In [None]:
def run_full_pipeline(nodes, max_questions=100):
    """
    Pipeline ƒë√°nh gi√° h·ªá th·ªëng RAG/GPT:
    1) Sinh c√¢u h·ªèi t·ª´ nodes
    2) Tr·∫£ l·ªùi c√¢u h·ªèi
    3) ƒê√°nh gi√° d·ª±a tr√™n node g·ªëc
    4) T·ªïng h·ª£p k·∫øt qu·∫£, in ƒëi·ªÉm trung b√¨nh, l∆∞u CSV
    """
    print("\nüöÄ B·∫Øt ƒë·∫ßu pipeline ƒë√°nh gi√° t·ª± ƒë·ªông...")

    # 1Ô∏è‚É£ T·∫°o c√¢u h·ªèi t·ª´ nodes
    df_questions_all = generate_questions_from_nodes(nodes, max_q_per_node=1)

    # 2Ô∏è‚É£ Ch·ªçn ng·∫´u nhi√™n max_questions c√¢u
    if len(df_questions_all) > max_questions:
        df_questions = df_questions_all.sample(n=max_questions, random_state=42).reset_index(drop=True)
    else:
        df_questions = df_questions_all.copy()

    print(f"üì¶ Ch·ªçn ng·∫´u nhi√™n {len(df_questions)} c√¢u h·ªèi ƒë·ªÉ ƒë√°nh gi√°.")

    # 3Ô∏è‚É£ Tr·∫£ l·ªùi + ƒë√°nh gi√° GPT d·ª±a tr√™n node g·ªëc
    results = []
    for idx, row in df_questions.iterrows():
        question = row['question']
        reference_text = row['reference_node_text']

        # Tr·∫£ l·ªùi
        answer = answer_with_gpt(question)

        # ƒê√°nh gi√° d·ª±a tr√™n question, answer v√† reference node
        eval_res = evaluate_with_gpt(
            question=question,
            answer=answer,
            reference_text=reference_text  # <-- th√™m tham chi·∫øu node
        )

        # L∆∞u k·∫øt qu·∫£
        eval_res.update({
            "question": question,
            "answer": answer,
            "reference_text": reference_text
        })
        results.append(eval_res)

    df_result = pd.DataFrame(results)

    # 4Ô∏è‚É£ In ƒëi·ªÉm trung b√¨nh
    if not df_result.empty:
        print_average_scores(df_result)

    # 5Ô∏è‚É£ L∆∞u k·∫øt qu·∫£
    save_eval_results(df_result)

    print("üéâ Pipeline ƒë√°nh gi√° ho√†n t·∫•t!")
    return df_result, df_questions


In [None]:
!pip install -q nest_asyncio

In [None]:
import nest_asyncio
nest_asyncio.apply()

In [None]:
df_result, df_questions = run_full_pipeline(all_nodes)

In [None]:
print("\nüîπ Sample k·∫øt qu·∫£:")
print(df_result.head())

In [None]:
print_average_scores(df_result)

In [None]:
# 1Ô∏è‚É£ Th√¥ng tin t·ªïng quan
print("=== Th√¥ng tin DataFrame ===")
print(df_result.info())
print("\n=== Th·ªëng k√™ m√¥ t·∫£ c√°c c·ªôt s·ªë ===")
print(df_result.describe())

In [None]:
# Hi·ªÉn th·ªã 5 m·∫´u c√¢u h·ªèi + c√¢u tr·∫£ l·ªùi
sample_df = df_result[['question', 'answer', 'correctness', 'faithfulness', 'relevancy']].sample(10, random_state=42)
sample_df
