In [1]:
import os
import pickle
from typing import List, Optional
from langchain.schema import Document
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from transformers import AutoTokenizer
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from groq import Groq
from IPython.display import display, Markdown

In [2]:
class DocumentLoader:
    @staticmethod
    def load_docs(file_paths: List[str]) -> List[Document]:
        """
        ໂຫລດ PDF documents ໂດຍໃຊ້ LangChain PyPDFLoader ເພາະຍັງແອັດຈັງມັກເພາະມັນສ້າງ Metadata ໃຫ້ Auto 

        Metadata ຄືຍັງ ? 
        Metadata ຄືຂໍ້ມູນເພີ່ມເຕີມກ່ຽວກັບເອກະສານ
        ແຕ່ລະ Document ຈະມີ 2 ສ່ວນຫຼັກ:
        1. page_content: ເນື້ອໃນຂໍ້ຄວາມຈິງໆ
        2. metadata: ຂໍ້ມູນລາຍລະອຽດກ່ຽວກັບເອກະສານ ເພື່ອບົງບອກວ່າ ເວລາເຮົາ ຄົ້ນຫາຂໍ້ມູນ ແຫຼ່ງຂໍ້ມູນນັ້ນມາຈາກໄສ
        
        Args:
            file_paths (list): List of PDF file paths
        
        Returns:
            List[Document]: List of LangChain Document objects
        """
        
        all_docs = []
        
        for file_path in file_paths:
            if not os.path.exists(file_path):
                print(f"Warning: File {file_path} not found. Skipping...")
                continue
                
            try:
                file_extension = os.path.splitext(file_path)[1].lower()
                
                # Check if file is PDF
                if file_extension != '.pdf':
                    print(f"Warning: {file_path} is not a PDF file. Skipping...")
                    continue
                
                # Load PDF using LangChain PyPDFLoader
                loader = PyPDFLoader(file_path)
                documents = loader.load()
                
                # Add enhanced metadata to all documents
                for doc in documents:
                    if doc.metadata is None:
                        doc.metadata = {}
                        
                    doc.metadata.update({
                        'source_file': os.path.basename(file_path),
                        'file_type': file_extension,
                        'file_path': file_path,
                        'file_size': os.path.getsize(file_path) if os.path.exists(file_path) else 0,
                    })
                
                all_docs.extend(documents)
                print(f"✅ Processed PDF: {file_path} ({len(documents)} pages)")
                
            except Exception as e:
                print(f"❌ Error processing {file_path}: {str(e)}")
                continue
        
        print(f"📚 Total PDF documents loaded: {len(all_docs)}")
        return all_docs
    
    @staticmethod
    def chunk_documents_standard(
        docs: List[Document], 
        chunk_size: int = 1000,
        chunk_overlap: int = 200,
        tokenizer_model: str = "D:/model/BAAI-bge-m3",
        max_token_limit: int = 8192
    ) -> List[Document]:
        """
        ໃຊ້ Lanchain ໃນການເຮັດ chunking ຂໍ້ມູນ ເພື່ອການຄົ້ນຫາຂໍ້ມູນດ້ວຍ FAISS  

        Chunk_size: ແມ່ນຈຳນວນຂໍ້ມູນທີ່ຈະເຮັດ chunking ຕໍ່ຫນ່ວຍ ເພາະຍັງ ເຮົາບໍ່ສາມາດເອົາເອກະສານທັ້ງໝົດໃຫ້ AI ຕອບໄດ້ ເນື່ອງຈາກບາງເອກະສານມີຫລາຍຫນ້າ
        Chunk_overlap: ແມ່ນຈຳນວນຂໍ້ມູນທີ່ຈະເຮັດ chunking ຕໍ່ຫນ່ວຍ ເພາະຍັງ ເຮົາບໍ່ສາມາດເອົາເອກະສານທັ້ງໝົດໃຫ້ AI ຕອບໄດ້ ເນື່ອງຈາກບາງເອກະສານມີຫລາຍຫນ້າ
        Tokenizer_model: ແມ່ນ Model ທີ່ເຮົາຈະໃຊ້ໃນການເຮັດ chunking ຂໍ້ມູນ ເພື່ອການຄົ້ນຫາຂໍ້ມູນດ້ວຍ FAISS
        Max_token_limit: ແມ່ນການແບ່ງສັດສ່ວນໃຫ້ເຫມາະສົມກັບ chunk_size
        
        Args:
            docs: List of LangChain Document objects
            chunk_size: Target size for each chunk in tokens
            chunk_overlap: Number of overlapping tokens between chunks
            tokenizer_model: Path to tokenizer model
            max_token_limit: Maximum tokens allowed
            
        Returns:
            List of chunked LangChain Document objects
        """
        
        if not docs:
            print("⚠️  No documents provided for chunking")
            return []
        
        # Load tokenizer
        try:
            tokenizer = AutoTokenizer.from_pretrained(tokenizer_model)
            print(f"✅ Loaded tokenizer: {tokenizer_model}")
        except Exception as e:
            print(f"❌ Error loading tokenizer: {e}") 
        
        # Validate parameters
        if chunk_size >= max_token_limit:
            chunk_size = max_token_limit - 500  # Safe buffer
            print(f"⚠️  Adjusted chunk_size to {chunk_size} for safety")
        
        if chunk_overlap >= chunk_size:
            chunk_overlap = chunk_size // 5  # 20% overlap
            print(f"⚠️  Adjusted chunk_overlap to {chunk_overlap}")
        
        # Create tokenizer-aware text splitter
        text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer(
            tokenizer=tokenizer,
            chunk_size=chunk_size,
            chunk_overlap=chunk_overlap,
            strip_whitespace=True,
            separators=[
                "\n\n",      # Paragraph breaks
                "\n",        # Line breaks
                ". ",        # Sentence endings
                "! ",        # Exclamation endings  
                "? ",        # Question endings
                "; ",        # Semicolon breaks
                ", ",        # Comma breaks
                " ",         # Word breaks
                ""           # Character level
            ]
        )
        
        # Split documents
        print(f"🔄 Chunking {len(docs)} documents...")
        chunked_docs = text_splitter.split_documents(docs)
        
        # Validate token counts and add metadata
        validated_chunks = []
        max_tokens_found = 0
        
        for i, chunk in enumerate(chunked_docs):
            # Count actual tokens
            token_count = len(tokenizer.encode(chunk.page_content))
            max_tokens_found = max(max_tokens_found, token_count)
            
            # Add chunk metadata
            if chunk.metadata is None:
                chunk.metadata = {}
                
            chunk.metadata.update({
                'chunk_id': i,
                'token_count': token_count,
                'char_count': len(chunk.page_content),
                'chunk_method': 'tokenizer_based'
            })
            
            # Skip if too large
            if token_count > max_token_limit:
                print(f"⚠️  Skipping oversized chunk {i}: {token_count} tokens")
                continue
                
            validated_chunks.append(chunk)
        
        # Print summary
        print(f"✅ Created {len(validated_chunks)} chunks")
        print(f"📊 Max tokens in any chunk: {max_tokens_found}")
        
        return validated_chunks
    
    @staticmethod
    def create_vector_store(
        chunked_docs: List[Document],
        embedding_model: str = "D:/model/BAAI-bge-m3",
        index_name: str = "pdf_documents",
        persist_directory: str = "./faiss_db"
    ) -> FAISS:
        """
        ສ້າງ Vector Store ດ້ວຍ FAISS ຈາກ chunked documents
        
        Embedding_model: ແມ່ນ Model ທີ່ໃຊ້ໃນການເຮັດ Embedding ເພື່ອປ່ຽນຂໍ້ຄວາມເປັນ Vector
        Index_name: ແມ່ນຊື່ຂອງ FAISS index ສາມາດສ້າງຕາມໃຈ
        Persist_directory: ແມ່ນໂຟລເດີຢຸ້ບັນທຶກ FAISS index
        
        Args:
            chunked_docs: List of chunked Document objects
            embedding_model: Path to embedding model
            index_name: Name for FAISS index
            persist_directory: Directory to save FAISS index
            
        Returns:
            FAISS vector store object
        """
        
        if not chunked_docs:
            print("⚠️  No chunked documents provided")
            return None
        
        # Create embeddings
        try:
            print(f"🔄 Loading embedding model: {embedding_model}")
            embeddings = HuggingFaceEmbeddings(
                model_name=embedding_model,
                model_kwargs={'device': 'cpu'},  # ປ່ຽນເປັນ 'cuda' ຖ້າມີ GPU
                encode_kwargs={'normalize_embeddings': True}
            )
            print(f"✅ Loaded embedding model successfully")
        except Exception as e:
            print(f"❌ Error loading embedding model: {e}")
            return None
        
        # Create FAISS vector store
        try:
            # ສ້າງໂຟລເດີຖ້າຍັງບໍ່ມີ
            os.makedirs(persist_directory, exist_ok=True)
            
            print(f"🔄 Creating FAISS vector store with {len(chunked_docs)} documents...")
            
            # ສ້າງ FAISS vector store
            vector_store = FAISS.from_documents(
                documents=chunked_docs,
                embedding=embeddings
            )
            
            # ບັນທຶກ FAISS index
            faiss_path = os.path.join(persist_directory, index_name)
            vector_store.save_local(faiss_path)
            print(f"💾 FAISS index saved to: {faiss_path}")
            
            # ສະແດງສະຖິຕິ
            print(f"✅ Created FAISS vector store")
            print(f"📊 Total vectors: {len(chunked_docs)}")
            print(f"📚 Index name: {index_name}")
            
            return vector_store
                
        except Exception as e:
            print(f"❌ Error creating FAISS vector store: {e}")
            return None
        
    @staticmethod
    def load_existing_vector_store(
        embedding_model: str = "D:/model/BAAI-bge-m3",
        index_name: str = "pdf_documents", 
        persist_directory: str = "./faiss_db"
    ) -> Optional[FAISS]:
        """
        ໂຫຼດ Vector Store ທີ່ມີຢູ່ແລ້ວຈາກ FAISS
        
        Args:
            embedding_model: Path to embedding model
            index_name: Name of FAISS index
            persist_directory: Directory where FAISS index is saved
            
        Returns:
            FAISS vector store object or None
        """
        
        try:
            # ກວດສອບວ່າມີໂຟລເດີຫຼືບໍ່
            faiss_path = os.path.join(persist_directory, index_name)
            if not os.path.exists(faiss_path):
                print(f"❌ FAISS index not found: {faiss_path}")
                return None
            
            # ກວດສອບວ່າມີໄຟລ໌ທີ່ຈຳເປັນຫຼືບໍ່
            index_file = os.path.join(faiss_path, "index.faiss")
            pkl_file = os.path.join(faiss_path, "index.pkl")
            
            if not os.path.exists(index_file) or not os.path.exists(pkl_file):
                print(f"❌ FAISS files not found in: {faiss_path}")
                return None
            
            # ໂຫຼດ embedding model
            embeddings = HuggingFaceEmbeddings(
                model_name=embedding_model,
                model_kwargs={'device': 'cpu'},
                encode_kwargs={'normalize_embeddings': True}
            )
            
            # ໂຫຼດ FAISS vector store
            vector_store = FAISS.load_local(
                faiss_path, 
                embeddings,
                allow_dangerous_deserialization=True
            )
            
            # ກວດສອບວ່າມີຂໍ້ມູນຫຼືບໍ່
            if hasattr(vector_store, 'index') and vector_store.index.ntotal > 0:
                print(f"✅ Loaded existing FAISS vector store: {index_name}")
                print(f"📊 Total vectors: {vector_store.index.ntotal}")
                return vector_store
            else:
                print(f"⚠️  FAISS index '{index_name}' is empty")
                return None
                
        except Exception as e:
            print(f"❌ Error loading FAISS vector store: {e}")
            return None
        
    @staticmethod
    def search_similar_documents(
        vector_store: FAISS,
        query: str,
        k: int = 5
    ) -> List[tuple]:
        """
        ຄົ້ນຫາເອກະສານທີ່ຄ້າຍຄືກັນ
        vector_store: ແມ່ນຂໍ້ມູເຮົາເຄີຍສ້າງ Vector Store ໃນ ./faiss_db
        query: ຄຳຖາມທີ່ຕ້ອງການຄົ້ນຫາ
        k: ຈຳນວນຜົນລັບທີ່ຕ້ອງການ
            
        Returns:
            List of tuples (document, score)
        """
        
        try:
            print(f"🔍 Searching for: {query}")
            
            # ຄົ້ນຫາດ້ວຍ score
            results = vector_store.similarity_search_with_score(
                query=query,
                k=k
            )
            
            return results
            
        except Exception as e:
            print(f"❌ Error during search: {e}")
            return []

In [6]:
class GroqRAGSystem:
    """
    ລະບົບ RAG ປະສົມກັບ Groq LLM ເພື່ອຕອບຄຳຖາມອ້າງອີງຈາກເອກະສານ
    """
    
    def __init__(self, groq_api_key: str, model_name: str = "openai/gpt-oss-120b"):
        """
        ເລີ່ມຕົ້ນ GroqRAGSystem
        
        Args:
            groq_api_key: Groq API key (ຕ້ອງໄປສະໝັກທີ່ https://console.groq.com)
            model_name: ຊື່ Model ທີ່ຈະໃຊ້ (ຍົກຕົວຢ່າງ: openai/gpt-oss-120b)
        """
        self.client = Groq(api_key=groq_api_key)
        self.model_name = model_name
        
    def create_context_from_documents(self, search_results: List[tuple]) -> str:
        """
        ສ້າງ context ຈາກຜົນການຄົ້ນຫາເອກະສານ
        
        Args:
            search_results: List of tuples (document, score) ຈາກ vector search
            
        Returns:
            ຂໍ້ຄວາມ context ສຳລັບ LLM
        """
        if not search_results:
            return "ບໍ່ພົບເອກະສານທີ່ກ່ຽວຂ້ອງ"
            
        context_parts = []
        for i, (doc, score) in enumerate(search_results):
            # FAISS ໃຊ້ cosine distance, ຄ່າຕ່ຳໝາຍຄວາມວ່າຄ້າຍຄືກັນຫຼາຍ
            similarity = 1 - score  # ປ່ຽນເປັນ similarity
            source_info = f"ແຫຼ່ງ: {doc.metadata.get('source_file', 'Unknown')} (ໜ້າ {doc.metadata.get('page', 'Unknown')})"
            content = doc.page_content.strip()
            
            context_parts.append(f"ເອກະສານ {i+1} (ຄວາມຄ້າຍຄື: {similarity:.3f}):\n{source_info}\n{content}\n")
            
        return "\n---\n".join(context_parts)
    
    def generate_answer(self, query: str, context: str) -> str:
        """
        ສ້າງຄຳຕອບໂດຍໃຊ້ Groq LLM ພ້ອມ context ຈາກເອກະສານ
        
        Args:
            query: ຄຳຖາມຂອງຜູ້ໃຊ້
            context: Context ຈາກເອກະສານ
            
        Returns:
            ຄຳຕອບຈາກ LLM
        """
        
        # ສ້າງ prompt ສຳລັບ RAG
        prompt = f"""ທ່ານເປັນ AI Assistant ທີ່ຊ່ຽວຊານໃນການຕອບຄຳຖາມໂດຍອ້າງອີງຈາກເອກະສານທີ່ໃຫ້ມາ.

ຄຳແນະນຳ:
1. ຕອບຄຳຖາມໂດຍອ້າງອີງຈາກເອກະສານທີ່ໃຫ້ມາເທົ່ານັ້ນ
2. ຖ້າບໍ່ພົບຄຳຕອບໃນເອກະສານ, ໃຫ້ບອກວ່າບໍ່ພົບຂໍ້ມູນທີ່ກ່ຽວຂ້ອງ
3. ລະບຸແຫຼ່ງຂໍ້ມູນທີ່ໃຊ້ໃນການຕອບ
4. ຕອບເປັນພາສາລາວ ແລະ ໃຫ້ຄຳຕອບທີ່ຊັດເຈນ, ລະອຽດ
5. ຕອບໃຫ້ເປັນ Format markdown

ເອກະສານອ້າງອີງ:
{context}

ຄຳຖາມ: {query}

ຄຳຕອບ:"""

        try:
            # ສົ່ງ request ໄປ Groq
            chat_completion = self.client.chat.completions.create(
                messages=[
                    {
                        "role": "user",
                        "content": prompt
                    }
                ],
                model=self.model_name,
                temperature=0.1,  # ຄວາມສ້າງສັນຕ່ຳ ເພື່ອຄວາມແມ່ນຍຳ  ຂຶ້ນນຳ Model ເພາະຄ່າ temperature ແຕ່ລະເຈົ້າມັນຕ່າງກັນ
                max_tokens=2000,  # ຈຳນວນ tokens ສູງສຸດ 
            )
            
            answer = chat_completion.choices[0].message.content
            return answer
            
        except Exception as e:
            return f"❌ ເກີດຂໍ້ຜິດພາດໃນການສ້າງຄຳຕອບ: {str(e)}"
    
    def query_documents(self, vector_store: FAISS, query: str, k: int = 5) -> dict:
        """
        ຄຳຖາມແບບສົມບູນຈາກການຄົ້ນຫາເອກະສານຈົນເຖີງການສ້າງຄຳຕອບ
        
        Args:
            vector_store: FAISS vector store
            query: ຄຳຖາມຂອງຜູ້ໃຊ້
            k: ຈຳນວນເອກະສານທີ່ຈະຄົ້ນຫາ
            
        Returns:
            dict ທີ່ປະກອບດ້ວຍ answer, context, ແລະ sources
        """
        
        print(f"\n🤖 Processing query: {query}")
        
        # 1. ຄົ້ນຫາເອກະສານທີ່ກ່ຽວຂ້ອງ
        search_results = DocumentLoader.search_similar_documents(
            vector_store=vector_store,
            query=query,
            k=k
        )
        
        if not search_results:
            return {
                "answer": "❌ ບໍ່ພົບເອກະສານທີ່ກ່ຽວຂ້ອງກັບຄຳຖາມຂອງທ່ານ",
                "context": "",
                "sources": []
            }
        
        # 2. ສ້າງ context ຈາກຜົນການຄົ້ນຫາ
        context = self.create_context_from_documents(search_results)
        
        # 3. ສ້າງຄຳຕອບດ້ວຍ LLM
        print("🧠 Generating answer with Groq LLM...")
        answer = self.generate_answer(query, context)
        
        # 4. ສ້າງລາຍຊື່ແຫຼ່ງຂໍ້ມູນ
        sources = []
        for doc, score in search_results:
            similarity = 1 - score
            sources.append({
                "source_file": doc.metadata.get('source_file', 'Unknown'),
                "page": doc.metadata.get('page', 'Unknown'),
                "similarity": f"{similarity:.3f}",
                "content_preview": doc.page_content
            })
        
        return {
            "answer": answer,
            "context": context,
            "sources": sources
        }

In [7]:
def main():
    """
    ຟັງຊັ່ນຫຼັກສຳລັບການທົດສອບລະບົບ RAG ກັບ Groq ແລະ FAISS
    """
    
    # ການຕັ້ງຄ່າ
    GROQ_API_KEY = os.getenv("GROQ_API_KEY")  # ແທນຄ່າດ້ວຍ API key ຈິງ
    
    # ລາຍຊື່ໄຟລ໌ PDF (ຖ້າຕ້ອງການສ້າງ vector store ໃໝ່)
    pdf_files = [ 
        "C:/Users/Dell/Desktop/Finetuing vs RAG.pdf"
    ]
    
    # ກວດສອບວ່າມີ vector store ຢູ່ແລ້ວຫຼືບໍ່
    
    display(Markdown("## 🔍 ກວດສອບ FAISS Vector Store")) 
    loaded_vectorstore = DocumentLoader.load_existing_vector_store(
        embedding_model="D:/model/BAAI-bge-m3", 
        index_name="pdf_documents", 
        persist_directory="./faiss_db"
    )
    
    # ຖ້າບໍ່ມີ vector store, ສ້າງໃໝ່
    if loaded_vectorstore is None:
        display(Markdown("## 📚 Creating new FAISS vector store..."))  
        
        # 1. ໂຫຼດເອກະສານ
        documents = DocumentLoader.load_docs(pdf_files) 
        
        if not documents:
            print("❌ No documents found. Please check your PDF file paths.")
            return
            
        # 2. ເຮັດ chunking ຂໍ້ມູນ 
        display(Markdown("## ✂️ Chunking documents...")) 
        # ເຮັດ chunking ຂໍ້ມູນ
        # ໃຊ້ Model ຂອງ BAAI-bge-m3 ເພື່ອຮັບຄ່າການເຮັດ chunking ຂໍ້ມູນ ເຊີ່ງຜູ້ໃຊ້ແມ່ນສາມາດເລືອກໄດ້ຕາມໃຈເລີຍວ່າຈະ ໃຊ້ Model ຍັງໃນການເຮັດ Embedding ສາມາດໂຫລດຜ່ານ Hugginface ໄດ້ ໂດຍກຳນົດ path ເອງ ສາມາດ ເຂົ້າໄປໃນ Folder Download Model/download-model.ipynb ເພື່ອດາວໂຫລດ Model ຍັງ
        # ກຳນັດຄ່າຕ່າງໆຂອງ chunking ໂດຍ Base on ຈາກເອກະສານ ຖ້າ ມີເອກະສານຫລາຍຫນ້າ ແນະນຳໃຫ້ລອງເພິ່ມຄ່າ chunk_size ແລະ chunk_overlap ເພື່ອຮັບຄ່າທີ່ດີກວ່າ
        chunk_documents = DocumentLoader.chunk_documents_standard(
            documents, 
            chunk_size=500, 
            chunk_overlap=50, 
            tokenizer_model="D:/model/BAAI-bge-m3", 
            max_token_limit=1000
        )
        
        if not chunk_documents:
            print("❌ Failed to chunk documents.")
            return
            
        # 3. ສ້າງ vector store ດ້ວຍ FAISS
        display(Markdown("## 🔄 Creating FAISS vector store...")) 
        # ເຮັດ Embedding ຂໍ້ມູນ
        # ກໍລະນີນີ້ຈະຖ້າດົນແນ່ ເນື່ອງຈາກວ່າ ຈະມີການເອົາ ເອກະສານທີ່ເຮົາ Chunking ມາແປງເປັນ Vector ເພື່ອບັນທືກໃນ FAISS ຖ້າຢາກໃຫ້ໄວ້ ໃຜມີ GPU ແນະນຳໃຫ້ໃຊ້ cuda ແທນ cpu
        loaded_vectorstore = DocumentLoader.create_vector_store(chunk_documents)
        
        if loaded_vectorstore is None:
            print("❌ Failed to create FAISS vector store.")
            return
    
    # ເລີ່ມຕົ້ນລະບົບ RAG ກັບ Groq 
    display(Markdown("## 🚀 Initializing Groq RAG System...")) 
    
    if GROQ_API_KEY == "ໃສ່ Groq API Key ຂອງເຈົ້າທີ່ນີ້":
        print("❌ ກະລຸນາໃສ່ Groq API Key ຂອງເຈົ້າໃນຕົວແປ GROQ_API_KEY")
        print("💡 ສາມາດໄດ້ API key ຟຣີທີ່: https://console.groq.com")
        return
    
    try:
        rag_system = GroqRAGSystem(
            groq_api_key=GROQ_API_KEY,
            model_name="meta-llama/llama-4-maverick-17b-128e-instruct" 
        )
        display(Markdown("## ✅ Groq RAG System initialized successfully !"))  
        
    except Exception as e:
        print(f"❌ Error initializing Groq system: {e}")
        return
    
    # ທົດສອບລະບົບດ້ວຍຄຳຖາມຕົວຢ່າງ
    test_queries = [
        "RAG ກັບ Fine-tuning ມີຄວາມແຕກຕ່າງກັນແນວໃດ ?"
    ]
    
    display(Markdown("## 🧪 ການທົດສອບລະບົບ RAG"))
    display(Markdown("ທົດສອບດ້ວຍຄຳຖາມຕົວຢ່າງ"))
    display(Markdown("---"))
    
    for i, query in enumerate(test_queries, 1):
        print(f"\n📝 ຄຳຖາມທີ່ {i}: {query}")
        print("-" * 40)
        display(Markdown(f"### 📝 ຄຳຖາມທີ່ {i}: {query}"))
        display(Markdown("---"))
        
        # ສົ່ງຄຳຖາມໄປລະບົບ RAG
        result = rag_system.query_documents(
            vector_store=loaded_vectorstore,
            query=query,
            k=5  # ຄົ້ນຫາ 5 ເອກະສານທີ່ກ່ຽວຂ້ອງ
        )
        
        # ສະແດງຜົນລັບ
        display(Markdown("#### 🤖 ຄຳຕອບ:"))
        display(Markdown(f"""
            ```
            {result['answer']}
            ```
        """))
        
        if result['sources']:
            display(Markdown("#### 📚 ແຫຼ່ງຂໍ້ມູນອ້າງອີງ:"))
                
            sources_md = ""
            for j, source in enumerate(result['sources'], 1):
                    sources_md += f"""
                **{j}.** `{source['source_file']}` (ໜ້າ {source['page']}) - ຄວາມຄ້າຍຄື: `{source['similarity']}`
                > {source['content_preview']}...

            """
            display(Markdown(sources_md))
                    
        display(Markdown("---"))
    
    # ໂໝດ interactive ສຳລັບຜູ້ໃຊ້ສາມາດຖາມຄຳຖາມເອງ
    display(Markdown("## 💬 ໂໝດ Interactive - ພິມຄຳຖາມຂອງທ່ານ (ພິມ 'quit' ເພື່ອອອກ")) 
    display(Markdown("---"))
    
    while True:
        try:
            user_query = input("\n❓ ຄຳຖາມຂອງທ່ານ: ").strip()
            
            if user_query.lower() in ['quit', 'exit', 'ອອກ']:
                print("👋 ຂອບໃຈທີ່ໃຊ້ລະບົບ RAG!")
                break
                
            if not user_query:
                print("⚠️ ກະລຸນາໃສ່ຄຳຖາມ")
                continue
            
            display(Markdown(f"### ❓ ຄຳຖາມ: `{user_query}`"))
            
            # ສົ່ງຄຳຖາມໄປລະບົບ RAG
            result = rag_system.query_documents(
                vector_store=loaded_vectorstore,
                query=user_query,
                k=5
            )
            
            # ສະແດງຜົນລັບ
            display(Markdown("#### 🤖 ຄຳຕອບ:"))
            display(Markdown(f"""
                ```
                {result['answer']}
                ```
            """))
            
            # ສະແດງແຫຼ່ງຂໍ້ມູນ (ແບບຫຍໍ້)
            if result['sources']: 
                display(Markdown("#### 📚 ແຫຼ່ງຂໍ້ມູນອ້າງອີງ:"))
                for source in result['sources'][:3]:  # ສະແດງ 3 ແຫຼ່ງທຳອິດ
                    display(Markdown(f"#### • {source['source_file']} (ໜ້າ {source['page']})")) 
            
        except KeyboardInterrupt:
            print("\n\n👋 ຂອບໃຈທີ່ໃຊ້ລະບົບ RAG!")
            break
        except Exception as e:
            print(f"❌ ເກີດຂໍ້ຜິດພາດ: {e}")

In [8]:
if __name__ == "__main__":
    main()

## 🔍 ກວດສອບ FAISS Vector Store

✅ Loaded existing FAISS vector store: pdf_documents
📊 Total vectors: 2


## 🚀 Initializing Groq RAG System...

## ✅ Groq RAG System initialized successfully !

## 🧪 ການທົດສອບລະບົບ RAG

ທົດສອບດ້ວຍຄຳຖາມຕົວຢ່າງ

---


📝 ຄຳຖາມທີ່ 1: RAG ກັບ Fine-tuning ມີຄວາມແຕກຕ່າງກັນແນວໃດ ?
----------------------------------------


### 📝 ຄຳຖາມທີ່ 1: RAG ກັບ Fine-tuning ມີຄວາມແຕກຕ່າງກັນແນວໃດ ?

---


🤖 Processing query: RAG ກັບ Fine-tuning ມີຄວາມແຕກຕ່າງກັນແນວໃດ ?
🔍 Searching for: RAG ກັບ Fine-tuning ມີຄວາມແຕກຕ່າງກັນແນວໃດ ?
🧠 Generating answer with Groq LLM...


#### 🤖 ຄຳຕອບ:


            ```
            ## ຄວາມແຕກຕ່າງລະຫວ່າງ RAG ແລະ Fine-tuning

RAG (Retrieval-Augmented Generation) ແລະ Fine-tuning ແມ່ນສອງວິທີທີ່ໃຊ້ໃນການພັດທະນາແບບຈໍາລອງພາສາໃຫຍ່ (LLM) ສໍາລັບການນໍາໃຊ້ສະເພາະດ້ານ. ຄວາມແຕກຕ່າງຫຼັກລະຫວ່າງພວກມັນຢູ່ໃນວິທີການທີ່ພວກເຂົາຈັດການກັບຂໍ້ມູນແລະການປັບປຸງແບບຈໍາລອງ.

### Fine-tuning

*   **ການປະຕິບັດ**: ການເກັບກຳຂໍ້ມູນຈໍານວນຫຼວງຫຼາຍ, ການເລືອກແບບຈໍາລອງທີ່ເຫມາະສົມ, ການຝຶກອົບຮົມແບບຈໍາລອງ, ແລະການນໍາໄປໃຊ້.
*   **ຂໍ້ດີ**: ການສ້າງຄໍາຕອບແບບ end-to-end.
*   **ຂໍ້ເສຍ**: ຄ່າໃຊ້ຈ່າຍໃນການຝຶກອົບຮົມສູງ, ຄວາມຍາກລໍາໃນການປັບປຸງຄວາມຮູ້.

### RAG

*   **ການປະຕິບັດ**: ການເກັບກຳເອກະສານທີ່ກ່ຽວຂ້ອງ, ການສ້າງຖານຄວາມຮູ້, ການນໍາໃຊ້ແບບຈໍາລອງ, ແລະການພັດທະນາແອັບພລິເຄຊັນ RAG.
*   **ຂໍ້ດີ**: ການປັບປຸງຄວາມຮູ້ແບບຍືດຫຍຸ່ນ, ການລວມຂໍ້ມູນຫຼ້າສຸດ, ຄວາມສາມາດໃນການຂະຫຍາຍ.
*   **ຂໍ້ເສຍ**: ຄວາມຊັກຊ້າໃນການສ້າງຄໍາຕອບ.

### ຄວາມແຕກຕ່າງຫຼັກ

*   Fine-tuning ແມ່ນເຫມາະສົມກັບຂົງເຂດທີ່ຕ້ອງການຄວາມຖືກຕ້ອງແລະຄວາມສອດຄ່ອງຂອງຄວາມຮູ້ສູງ ແລະການປັບປຸງຄວາມຮູ້ແມ່ນຂ້ອນຂ້າງຊ້າ.
*   RAG ແມ່ນເຫມາະສົມກັບຂົງເຂດທີ່ມີການປັບປຸງຄວາມຮູ້ຢ່າງໄວວາແລະຕ້ອງການການຕອບສະຫນອງໄວ.

ອ້າງອີງຈາກເອກະສານ: Finetuing vs RAG.pdf (ໜ້າ0 ແລະ ໜ້າ1) [1](https://example.com/finetuing-vs-rag.pdf)
            ```
        

#### 📚 ແຫຼ່ງຂໍ້ມູນອ້າງອີງ:


                **1.** `Finetuing vs RAG.pdf` (ໜ້າ 0) - ຄວາມຄ້າຍຄື: `0.197`
                > 1 Huawei Confidential
How to Develop a Professional Domain Knowledge Q&A Assistant？
Implementation
Data Preparation: Collect a large amount of text data. These 
data need to be cleaned and annotated to ensure quality.
Model Selection: Choose a suitable pre-trained model.
Model Training: Input the prepared data into the model and 
adjust its parameters.
Model Deployment: Deploy the trained model for inference.
Advantages
End-to-End Generation: The model can directly generate 
answers based on input questions. 
Disadvantages
High Training Costs: Time-consuming and costly by 
requiring a large amount of data and computational resources.
Difficulty in Knowledge Update: Once the model is trained, 
updating the knowledge requires heavy re -fine-tuning.
Fine-tuning RAG
Implementation
Data Preparation: Gather a wide range of documents relevant 
to the professional domain and clean them.
Knowledge Base Construction: Build a structured knowledge 
base that can be efficiently queried by the retrieval module.
Model Deployment: Deploy a LLM as expected (deepseek).
RAG Application Development: Develop a RAG application 
that integrates retrieval and generation to answer user queries.
Advantages
Flexible Knowledge Update: The knowledge base can be 
updated at any time without retraining the model.
Integration of Latest Information: The system can generate 
more accurate and up -to-date answers.
Scalability: It is easy to expand the content and scope of the 
knowledge base.
Disadvantages
Generation Delay: The added retrieval step may slow down 
the system's response time.
The Fine-tuning approach is suitable for fields where knowledge accuracy and consistency are highly valued and knowledge updates
are relatively slow, such as some traditional academic research areas. 
In contrast, the RAG approach is more suitable for fields where knowledge updates rapidly and quick responses are required, such as 
finance, healthcare, and technology....

            
                **2.** `Finetuing vs RAG.pdf` (ໜ້າ 1) - ຄວາມຄ້າຍຄື: `-0.168`
                > 2 Huawei Confidential
ModelEngine: AI Toolchain that Accelerates the Implementation of AI Applications
40% NPU pooling usage
60%+ faster multimodal data cleansing
1.5x online concurrency
1.6x offline throughput
Lower costs Fast application rollout Good inference performance
95% accuracy
50% shorter development latency
Operator 
ecosystem
ModelEngine
Full-process AI 
toolchain
Application enablement
High-precision RAG application 
development and optimization
Open toolchain Open-source framework, supporting third-party operators
Model enablement
Lightweight model inference 
toolchain
Data enablement
Automatic data processing and 
knowledge generation
API API
Training and inference offload acceleration
Cache offload acceleration | Ascend+Kunpeng
heterogeneous computing
Retrieval acceleration
Converged retrieval of multimodal data | Vector 
retrieval acceleration
Low-code toolchain
Self-orchestrated data processing | Modular 
RAG
NPU basic software Container platform Knowledge base storage
Model ecosystem OpenMind HuggingFace Blue Whale Market LangChain LlamaIndex
GPU processor | Ascend NPU processor
Resource enablement
AI task scheduling and XPU pooling...

            

---

## 💬 ໂໝດ Interactive - ພິມຄຳຖາມຂອງທ່ານ (ພິມ 'quit' ເພື່ອອອກ

---

⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃສ່ຄຳຖາມ
⚠️ ກະລຸນາໃ