In [21]:
import os
from typing import List, Optional
from langchain.schema import Document
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from transformers import AutoTokenizer
from langchain.vectorstores import Chroma 
from langchain.embeddings import HuggingFaceEmbeddings
import chromadb
from groq import Groq
from IPython.display import Image, display, Markdown


In [22]:
class DocumentLoader:
    @staticmethod
    def load_docs(file_paths: List[str]) -> List[Document]:
        """
        ‡ªÇ‡∫´‡∫•‡∫î PDF documents ‡ªÇ‡∫î‡∫ç‡ªÉ‡∫ä‡ªâ LangChain PyPDFLoader ‡ªÄ‡∫û‡∫≤‡∫∞‡∫ç‡∫±‡∫á‡ªÅ‡∫≠‡∫±‡∫î‡∫à‡∫±‡∫á‡∫°‡∫±‡∫Å‡ªÄ‡∫û‡∫≤‡∫∞‡∫°‡∫±‡∫ô‡∫™‡ªâ‡∫≤‡∫á Metadata ‡ªÉ‡∫´‡ªâ Auto 

        Metadata ‡∫Ñ‡∫∑‡∫ç‡∫±‡∫á ? 
        Metadata ‡∫Ñ‡∫∑‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô‡ªÄ‡∫û‡∫µ‡ªà‡∫°‡ªÄ‡∫ï‡∫µ‡∫°‡∫Å‡ªà‡∫Ω‡∫ß‡∫Å‡∫±‡∫ö‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô
        ‡ªÅ‡∫ï‡ªà‡∫•‡∫∞ Document ‡∫à‡∫∞‡∫°‡∫µ 2 ‡∫™‡ªà‡∫ß‡∫ô‡∫´‡∫º‡∫±‡∫Å:
        1. page_content: ‡ªÄ‡∫ô‡∫∑‡ªâ‡∫≠‡ªÉ‡∫ô‡∫Ç‡ªç‡ªâ‡∫Ñ‡∫ß‡∫≤‡∫°‡∫à‡∫¥‡∫á‡ªÜ
        2. metadata: ‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô‡∫•‡∫≤‡∫ç‡∫•‡∫∞‡∫≠‡∫Ω‡∫î‡∫Å‡ªà‡∫Ω‡∫ß‡∫Å‡∫±‡∫ö‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô ‡ªÄ‡∫û‡∫∑‡ªà‡∫≠‡∫ö‡∫ª‡∫á‡∫ö‡∫≠‡∫Å‡∫ß‡ªà‡∫≤ ‡ªÄ‡∫ß‡∫•‡∫≤‡ªÄ‡∫Æ‡∫ª‡∫≤ ‡∫Ñ‡∫ª‡ªâ‡∫ô‡∫´‡∫≤‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô ‡ªÅ‡∫´‡∫º‡ªà‡∫á‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô‡∫ô‡∫±‡ªâ‡∫ô‡∫°‡∫≤‡∫à‡∫≤‡∫Å‡ªÑ‡∫™
        
        Args:
            file_paths (list): List of PDF file paths
        
        Returns:
            List[Document]: List of LangChain Document objects
        """
        
        all_docs = []
        
        for file_path in file_paths:
            if not os.path.exists(file_path):
                print(f"Warning: File {file_path} not found. Skipping...")
                continue
                
            try:
                file_extension = os.path.splitext(file_path)[1].lower()
                
                # Check if file is PDF
                if file_extension != '.pdf':
                    print(f"Warning: {file_path} is not a PDF file. Skipping...")
                    continue
                
                # Load PDF using LangChain PyPDFLoader
                loader = PyPDFLoader(file_path)
                documents = loader.load()
                
                # Add enhanced metadata to all documents
                for doc in documents:
                    if doc.metadata is None:
                        doc.metadata = {}
                        
                    doc.metadata.update({
                        'source_file': os.path.basename(file_path),
                        'file_type': file_extension,
                        'file_path': file_path,
                        'file_size': os.path.getsize(file_path) if os.path.exists(file_path) else 0,
                    })
                
                all_docs.extend(documents)
                print(f"‚úÖ Processed PDF: {file_path} ({len(documents)} pages)")
                
            except Exception as e:
                print(f"‚ùå Error processing {file_path}: {str(e)}")
                continue
        
        print(f"üìö Total PDF documents loaded: {len(all_docs)}")
        return all_docs
    
    @staticmethod
    def chunk_documents_standard(
        docs: List[Document], 
        chunk_size: int = 1000,
        chunk_overlap: int = 200,
        tokenizer_model: str = "D:/model/BAAI-bge-m3",
        max_token_limit: int = 8192
    ) -> List[Document]:
        """
        ‡ªÉ‡∫ä‡ªâ Lanchain ‡ªÉ‡∫ô‡∫Å‡∫≤‡∫ô‡ªÄ‡∫Æ‡∫±‡∫î chunking ‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô ‡ªÄ‡∫û‡∫∑‡ªà‡∫≠‡∫Å‡∫≤‡∫ô‡∫Ñ‡∫ª‡ªâ‡∫ô‡∫´‡∫≤‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô‡∫î‡ªâ‡∫ß‡∫ç ChromaDB  

        Chunk_size: ‡ªÅ‡∫°‡ªà‡∫ô‡∫à‡∫≥‡∫ô‡∫ß‡∫ô‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô‡∫ó‡∫µ‡ªà‡∫à‡∫∞‡ªÄ‡∫Æ‡∫±‡∫î chunking ‡∫ï‡ªç‡ªà‡∫´‡∫ô‡ªà‡∫ß‡∫ç ‡ªÄ‡∫û‡∫≤‡∫∞‡∫ç‡∫±‡∫á ‡ªÄ‡∫Æ‡∫ª‡∫≤‡∫ö‡ªç‡ªà‡∫™‡∫≤‡∫°‡∫≤‡∫î‡ªÄ‡∫≠‡∫ª‡∫≤‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô‡∫ó‡∫±‡ªâ‡∫á‡ªù‡∫ª‡∫î‡ªÉ‡∫´‡ªâ AI ‡∫ï‡∫≠‡∫ö‡ªÑ‡∫î‡ªâ ‡ªÄ‡∫ô‡∫∑‡ªà‡∫≠‡∫á‡∫à‡∫≤‡∫Å‡∫ö‡∫≤‡∫á‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô‡∫°‡∫µ‡∫´‡∫•‡∫≤‡∫ç‡∫´‡∫ô‡ªâ‡∫≤
        Chunk_overlap: ‡ªÅ‡∫°‡ªà‡∫ô‡∫à‡∫≥‡∫ô‡∫ß‡∫ô‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô‡∫ó‡∫µ‡ªà‡∫à‡∫∞‡ªÄ‡∫Æ‡∫±‡∫î chunking ‡∫ï‡ªç‡ªà‡∫´‡∫ô‡ªà‡∫ß‡∫ç ‡ªÄ‡∫û‡∫≤‡∫∞‡∫ç‡∫±‡∫á ‡ªÄ‡∫Æ‡∫ª‡∫≤‡∫ö‡ªç‡ªà‡∫™‡∫≤‡∫°‡∫≤‡∫î‡ªÄ‡∫≠‡∫ª‡∫≤‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô‡∫ó‡∫±‡ªâ‡∫á‡ªù‡∫ª‡∫î‡ªÉ‡∫´‡ªâ AI ‡∫ï‡∫≠‡∫ö‡ªÑ‡∫î‡ªâ ‡ªÄ‡∫ô‡∫∑‡ªà‡∫≠‡∫á‡∫à‡∫≤‡∫Å‡∫ö‡∫≤‡∫á‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô‡∫°‡∫µ‡∫´‡∫•‡∫≤‡∫ç‡∫´‡∫ô‡ªâ‡∫≤
        Tokenizer_model: ‡ªÅ‡∫°‡ªà‡∫ô Model ‡∫ó‡∫µ‡ªà‡ªÄ‡∫Æ‡∫ª‡∫≤‡∫à‡∫∞‡ªÉ‡∫ä‡ªâ‡ªÉ‡∫ô‡∫Å‡∫≤‡∫ô‡ªÄ‡∫Æ‡∫±‡∫î chunking ‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô ‡ªÄ‡∫û‡∫∑‡ªà‡∫≠‡∫Å‡∫≤‡∫ô‡∫Ñ‡∫ª‡ªâ‡∫ô‡∫´‡∫≤‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô‡∫î‡ªâ‡∫ß‡∫ç ChromaDB
        Max_token_limit: ‡ªÅ‡∫°‡ªà‡∫ô‡∫Å‡∫≤‡∫ô‡ªÅ‡∫ö‡ªà‡∫á‡∫™‡∫±‡∫î‡∫™‡ªà‡∫ß‡∫ô‡ªÉ‡∫´‡ªâ‡ªÄ‡∫´‡∫°‡∫≤‡∫∞‡∫™‡∫ª‡∫°‡∫Å‡∫±‡∫ö chunk_size
        
        Args:
            docs: List of LangChain Document objects
            chunk_size: Target size for each chunk in tokens
            chunk_overlap: Number of overlapping tokens between chunks
            tokenizer_model: Path to tokenizer model
            max_token_limit: Maximum tokens allowed
            
        Returns:
            List of chunked LangChain Document objects
        """
        
        if not docs:
            print("‚ö†Ô∏è  No documents provided for chunking")
            return []
        
        # Load tokenizer
        try:
            tokenizer = AutoTokenizer.from_pretrained(tokenizer_model)
            print(f"‚úÖ Loaded tokenizer: {tokenizer_model}")
        except Exception as e:
            print(f"‚ùå Error loading tokenizer: {e}") 
        
        # Validate parameters
        if chunk_size >= max_token_limit:
            chunk_size = max_token_limit - 500  # Safe buffer
            print(f"‚ö†Ô∏è  Adjusted chunk_size to {chunk_size} for safety")
        
        if chunk_overlap >= chunk_size:
            chunk_overlap = chunk_size // 5  # 20% overlap
            print(f"‚ö†Ô∏è  Adjusted chunk_overlap to {chunk_overlap}")
        
        # Create tokenizer-aware text splitter
        text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer(
            tokenizer=tokenizer,
            chunk_size=chunk_size,
            chunk_overlap=chunk_overlap,
            strip_whitespace=True,
            separators=[
                "\n\n",      # Paragraph breaks
                "\n",        # Line breaks
                ". ",        # Sentence endings
                "! ",        # Exclamation endings  
                "? ",        # Question endings
                "; ",        # Semicolon breaks
                ", ",        # Comma breaks
                " ",         # Word breaks
                ""           # Character level
            ]
        )
        
        # Split documents
        print(f"üîÑ Chunking {len(docs)} documents...")
        chunked_docs = text_splitter.split_documents(docs)
        
        # Validate token counts and add metadata
        validated_chunks = []
        max_tokens_found = 0
        
        for i, chunk in enumerate(chunked_docs):
            # Count actual tokens
            token_count = len(tokenizer.encode(chunk.page_content))
            max_tokens_found = max(max_tokens_found, token_count)
            
            # Add chunk metadata
            if chunk.metadata is None:
                chunk.metadata = {}
                
            chunk.metadata.update({
                'chunk_id': i,
                'token_count': token_count,
                'char_count': len(chunk.page_content),
                'chunk_method': 'tokenizer_based'
            })
            
            # Skip if too large
            if token_count > max_token_limit:
                print(f"‚ö†Ô∏è  Skipping oversized chunk {i}: {token_count} tokens")
                continue
                
            validated_chunks.append(chunk)
        
        # Print summary
        print(f"‚úÖ Created {len(validated_chunks)} chunks")
        print(f"üìä Max tokens in any chunk: {max_tokens_found}")
        
        return validated_chunks
    
    @staticmethod
    def create_vector_store(
        chunked_docs: List[Document],
        embedding_model: str = "D:/model/BAAI-bge-m3",
        collection_name: str = "pdf_documents",
        persist_directory: str = "./chroma_db",
        batch_size: int = 32
    ) -> Chroma:
        """
        ‡∫™‡ªâ‡∫≤‡∫á Vector Store ‡∫î‡ªâ‡∫ß‡∫ç ChromaDB ‡∫à‡∫≤‡∫Å chunked documents
        
        Embedding_model: ‡ªÅ‡∫°‡ªà‡∫ô Model ‡∫ó‡∫µ‡ªà‡ªÉ‡∫ä‡ªâ‡ªÉ‡∫ô‡∫Å‡∫≤‡∫ô‡ªÄ‡∫Æ‡∫±‡∫î Embedding ‡ªÄ‡∫û‡∫∑‡ªà‡∫≠‡∫õ‡ªà‡∫Ω‡∫ô‡∫Ç‡ªç‡ªâ‡∫Ñ‡∫ß‡∫≤‡∫°‡ªÄ‡∫õ‡∫±‡∫ô Vector
        Collection_name: ‡ªÅ‡∫°‡ªà‡∫ô‡∫ä‡∫∑‡ªà‡∫Ç‡∫≠‡∫á Collection ‡ªÉ‡∫ô ChromaDB ‡∫™‡∫≤‡∫°‡∫≤‡∫î‡∫™‡ªâ‡∫≤‡∫á‡∫ï‡∫≤‡∫°‡ªÉ‡∫à ‡∫ó‡∫µ‡ªà‡∫ï‡ªâ‡∫≠‡∫á‡∫Å‡∫≤‡∫ô ‡ªÅ‡∫ô‡∫∞‡∫ô‡∫≥‡ªÉ‡∫´‡ªâ‡∫™‡ªâ‡∫≤‡∫á‡ªÄ‡∫õ‡∫±‡∫ô Folder ‡∫Ç‡∫≠‡∫á‡ªÉ‡∫ú‡∫°‡∫±‡∫ô ‡ªÅ‡∫•‡∫∞ point ‡ªÑ‡∫õ Folder ‡∫ô‡∫±‡ªà‡∫ô ‡ªÄ‡∫û‡∫≤‡∫∞‡∫ß‡ªà‡∫≤ ChromaDB ‡ªÄ‡∫ß‡∫•‡∫≤‡∫°‡∫±‡∫ô‡∫ö‡∫±‡∫ô‡∫ó‡∫∑‡∫Å‡∫°‡∫±‡∫ô‡∫à‡∫∞‡∫ö‡∫±‡∫ô‡∫ó‡∫∑‡∫Å unique key ‡ªÄ‡∫ä‡∫µ‡ªà‡∫á‡∫°‡∫±‡∫ô‡∫à‡∫∞‡ªÄ‡∫Æ‡∫±‡∫î‡ªÉ‡∫´‡ªâ‡ªÄ‡∫Æ‡∫ª‡∫≤‡∫à‡∫≥‡ªÅ‡∫ô‡∫Å‡∫¢‡∫≤‡∫Å
        Persist_directory: ‡ªÅ‡∫°‡ªà‡∫ô‡ªÇ‡∫ü‡∫•‡ªÄ‡∫î‡∫µ‡∫¢‡∫∏‡ªâ‡∫ö‡∫±‡∫ô‡∫ó‡∫∂‡∫Å ChromaDB
        Batch_size: ‡ªÅ‡∫°‡ªà‡∫ô‡∫à‡∫≥‡∫ô‡∫ß‡∫ô chunks ‡∫ó‡∫µ‡ªà‡ªÄ‡∫Æ‡∫±‡∫î embedding ‡∫ï‡ªç‡ªà‡∫Ñ‡∫±‡ªâ‡∫á ‡ªÄ‡∫û‡∫∑‡ªà‡∫≠‡∫õ‡ªâ‡∫≠‡∫á‡∫Å‡∫±‡∫ô memory overflow
        
        Args:
            chunked_docs: List of chunked Document objects
            embedding_model: Path to embedding model
            collection_name: Name for ChromaDB collection
            persist_directory: Directory to save ChromaDB
            batch_size: Number of documents to process at once
            
        Returns:
            Chroma vector store object
        """
        
        if not chunked_docs:
            print("‚ö†Ô∏è  No chunked documents provided")
            return None
        
        # Create embeddings
        try:
            print(f"üîÑ Loading embedding model: {embedding_model}")
            embeddings = HuggingFaceEmbeddings(
                model_name=embedding_model,
                model_kwargs={'device': 'cpu'},  # ‡∫õ‡ªà‡∫Ω‡∫ô‡ªÄ‡∫õ‡∫±‡∫ô 'cuda' ‡∫ñ‡ªâ‡∫≤‡∫°‡∫µ GPU
                encode_kwargs={'normalize_embeddings': True}
            )
            print(f"‚úÖ Loaded embedding model successfully")
        except Exception as e:
            print(f"‚ùå Error loading embedding model: {e}")
            return None
        
        # Create ChromaDB client and collection
        try:
            # ‡∫™‡ªâ‡∫≤‡∫á‡ªÇ‡∫ü‡∫•‡ªÄ‡∫î‡∫µ‡∫ñ‡ªâ‡∫≤‡∫ç‡∫±‡∫á‡∫ö‡ªç‡ªà‡∫°‡∫µ
            os.makedirs(persist_directory, exist_ok=True)
            
            print(f"üîÑ Creating ChromaDB collection: {collection_name}")
            
            # ‡∫•‡∫∑‡∫ö collection ‡ªÄ‡∫Å‡∫ª‡ªà‡∫≤‡∫ñ‡ªâ‡∫≤‡∫°‡∫µ (‡∫õ‡ªâ‡∫≠‡∫á‡∫Å‡∫±‡∫ô‡∫Ç‡ªç‡ªâ‡∫ú‡∫¥‡∫î‡∫û‡∫≤‡∫î)
            try:
                client = chromadb.PersistentClient(path=persist_directory)
                try:
                    client.delete_collection(collection_name)
                    print(f"üóëÔ∏è  Deleted existing collection: {collection_name}")
                except:
                    pass  # Collection ‡∫ö‡ªç‡ªà‡∫°‡∫µ‡∫¢‡∫π‡ªà‡ªÅ‡∫•‡ªâ‡∫ß
            except Exception as e:
                print(f"‚ö†Ô∏è  Warning during cleanup: {e}")
            
            # ‡∫™‡ªâ‡∫≤‡∫á vector store ‡ªÅ‡∫ö‡∫ö batch
            print(f"üîÑ Processing {len(chunked_docs)} documents in batches of {batch_size}")
            
            vector_store = None
            total_processed = 0
            
            for i in range(0, len(chunked_docs), batch_size):
                batch = chunked_docs[i:i + batch_size]
                batch_num = (i // batch_size) + 1
                total_batches = (len(chunked_docs) + batch_size - 1) // batch_size
                
                print(f"üì¶ Processing batch {batch_num}/{total_batches} ({len(batch)} documents)")
                
                try:
                    if vector_store is None:
                        # ‡∫™‡ªâ‡∫≤‡∫á vector store ‡∫ó‡∫≥‡∫≠‡∫¥‡∫î
                        vector_store = Chroma.from_documents(
                            documents=batch,
                            embedding=embeddings,
                            collection_name=collection_name,
                            persist_directory=persist_directory
                        )
                    else:
                        # ‡ªÄ‡∫û‡∫µ‡ªà‡∫° documents ‡ªÉ‡ªù‡ªà‡ªÄ‡∫Ç‡∫ª‡ªâ‡∫≤‡ªÑ‡∫õ
                        vector_store.add_documents(batch)
                    
                    total_processed += len(batch)
                    print(f"‚úÖ Batch {batch_num} completed. Total processed: {total_processed}")
                    
                except Exception as e:
                    print(f"‚ùå Error processing batch {batch_num}: {e}")
                    continue
            
            # ‡∫ö‡∫±‡∫ô‡∫ó‡∫∂‡∫Å‡∫Å‡∫≤‡∫ô‡∫õ‡ªà‡∫Ω‡∫ô‡ªÅ‡∫õ‡∫á
            if vector_store:
                vector_store.persist()
                print(f"üíæ Vector store saved to: {persist_directory}")
                
                collection_count = vector_store._collection.count()
                print(f"üìä Total vectors in collection: {collection_count}")
                print(f"üìö Collection name: {collection_name}")
                
                return vector_store
            else:
                print("‚ùå Failed to create vector store")
                return None
                
        except Exception as e:
            print(f"‚ùå Error creating vector store: {e}")
            return None
        
    @staticmethod
    def load_existing_vector_store(
        embedding_model: str = "D:/model/BAAI-bge-m3",
        collection_name: str = "pdf_documents", 
        persist_directory: str = "./chroma_db"
    ) -> Optional[Chroma]:
        """
        ‡ªÇ‡∫´‡∫º‡∫î Vector Store ‡∫ó‡∫µ‡ªà‡∫°‡∫µ‡∫¢‡∫π‡ªà‡ªÅ‡∫•‡ªâ‡∫ß‡∫à‡∫≤‡∫Å ChromaDB
        
        Args:
            embedding_model: Path to embedding model
            collection_name: Name of ChromaDB collection
            persist_directory: Directory where ChromaDB is saved
            
        Returns:
            Chroma vector store object or None
        """
        
        try:
            # ‡∫Å‡∫ß‡∫î‡∫™‡∫≠‡∫ö‡∫ß‡ªà‡∫≤‡∫°‡∫µ‡ªÇ‡∫ü‡∫•‡ªÄ‡∫î‡∫µ‡∫´‡∫º‡∫∑‡∫ö‡ªç‡ªà
            if not os.path.exists(persist_directory):
                print(f"‚ùå Directory not found: {persist_directory}")
                return None
            
            # ‡ªÇ‡∫´‡∫º‡∫î embedding model
            embeddings = HuggingFaceEmbeddings(
                model_name=embedding_model,
                model_kwargs={'device': 'cpu'},
                encode_kwargs={'normalize_embeddings': True}
            )
            
            # ‡ªÇ‡∫´‡∫º‡∫î vector store
            vector_store = Chroma(
                collection_name=collection_name,
                embedding_function=embeddings,
                persist_directory=persist_directory
            )
            
            # ‡∫Å‡∫ß‡∫î‡∫™‡∫≠‡∫ö‡∫ß‡ªà‡∫≤‡∫°‡∫µ‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô‡∫´‡∫º‡∫∑‡∫ö‡ªç‡ªà
            collection_count = vector_store._collection.count()
            if collection_count > 0:
                print(f"‚úÖ Loaded existing vector store: {collection_name}")
                print(f"üìä Total vectors: {collection_count}")
                return vector_store
            else:
                print(f"‚ö†Ô∏è  Collection '{collection_name}' is empty")
                return None
                
        except Exception as e:
            print(f"‚ùå Error loading vector store: {e}")
            return None
        
    @staticmethod
    def search_similar_documents(
        vector_store: Chroma,
        query: str,
        k: int = 5
    ) -> List[tuple]:
        """
        ‡∫Ñ‡∫ª‡ªâ‡∫ô‡∫´‡∫≤‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô‡∫ó‡∫µ‡ªà‡∫Ñ‡ªâ‡∫≤‡∫ç‡∫Ñ‡∫∑‡∫Å‡∫±‡∫ô
        vector_store: ‡ªÅ‡∫°‡ªà‡∫ô‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡ªÄ‡∫Æ‡∫ª‡∫≤‡ªÄ‡∫Ñ‡∫µ‡∫ç‡∫™‡ªâ‡∫≤‡∫á Vector Store ‡ªÉ‡∫ô ./chroma_db
        query: ‡∫Ñ‡∫≥‡∫ñ‡∫≤‡∫°‡∫ó‡∫µ‡ªà‡∫ï‡ªâ‡∫≠‡∫á‡∫Å‡∫≤‡∫ô‡∫Ñ‡∫ª‡ªâ‡∫ô‡∫´‡∫≤
        k: ‡∫à‡∫≥‡∫ô‡∫ß‡∫ô‡∫ú‡∫ª‡∫ô‡∫•‡∫±‡∫ö‡∫ó‡∫µ‡ªà‡∫ï‡ªâ‡∫≠‡∫á‡∫Å‡∫≤‡∫ô
            
        Returns:
            List of tuples (document, score)
        """
        
        try:
            print(f"üîç Searching for: {query}")
            
            # ‡∫Ñ‡∫ª‡ªâ‡∫ô‡∫´‡∫≤‡∫î‡ªâ‡∫ß‡∫ç score
            results = vector_store.similarity_search_with_score(
                query=query,
                k=k
            )
            
            # ‡∫™‡∫∞‡ªÅ‡∫î‡∫á‡∫ú‡∫ª‡∫ô‡∫•‡∫±‡∫ö
            # for i, (doc, score) in enumerate(results):
            #     similarity = 1 - score  # ‡∫õ‡ªà‡∫Ω‡∫ô distance ‡ªÄ‡∫õ‡∫±‡∫ô similarity
            #     print(f"\nüìÑ Result {i+1} (Similarity: {similarity:.3f}):")
            #     print(f"   üìÅ Source: {doc.metadata.get('source_file', 'Unknown')}")
            #     print(f"   üìÑ Page: {doc.metadata.get('page', 'Unknown')}")
            #     print(f"   üîñ Chunk: {doc.metadata.get('chunk_id', 'Unknown')}")
            #     print(f"   üìù Content preview: {doc.page_content[:100]}...")
            
            return results
            
        except Exception as e:
            print(f"‚ùå Error during search: {e}")
            return []

In [23]:
class GroqRAGSystem:
    """
    ‡∫•‡∫∞‡∫ö‡∫ª‡∫ö RAG ‡∫õ‡∫∞‡∫™‡∫ª‡∫°‡∫Å‡∫±‡∫ö Groq LLM ‡ªÄ‡∫û‡∫∑‡ªà‡∫≠‡∫ï‡∫≠‡∫ö‡∫Ñ‡∫≥‡∫ñ‡∫≤‡∫°‡∫≠‡ªâ‡∫≤‡∫á‡∫≠‡∫µ‡∫á‡∫à‡∫≤‡∫Å‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô
    """
    
    def __init__(self, groq_api_key: str, model_name: str = "openai/gpt-oss-120b"):
        """
        ‡ªÄ‡∫•‡∫µ‡ªà‡∫°‡∫ï‡∫ª‡ªâ‡∫ô GroqRAGSystem
        
        Args:
            groq_api_key: Groq API key (‡∫ï‡ªâ‡∫≠‡∫á‡ªÑ‡∫õ‡∫™‡∫∞‡ªù‡∫±‡∫Å‡∫ó‡∫µ‡ªà https://console.groq.com)
            model_name: ‡∫ä‡∫∑‡ªà Model ‡∫ó‡∫µ‡ªà‡∫à‡∫∞‡ªÉ‡∫ä‡ªâ (‡∫ç‡∫ª‡∫Å‡∫ï‡∫ª‡∫ß‡∫¢‡ªà‡∫≤‡∫á: openai/gpt-oss-120b)
        """
        self.client = Groq(api_key=groq_api_key)
        self.model_name = model_name
        
    def create_context_from_documents(self, search_results: List[tuple]) -> str:
        """
        ‡∫™‡ªâ‡∫≤‡∫á context ‡∫à‡∫≤‡∫Å‡∫ú‡∫ª‡∫ô‡∫Å‡∫≤‡∫ô‡∫Ñ‡∫ª‡ªâ‡∫ô‡∫´‡∫≤‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô
        
        Args:
            search_results: List of tuples (document, score) ‡∫à‡∫≤‡∫Å vector search
            
        Returns:
            ‡∫Ç‡ªç‡ªâ‡∫Ñ‡∫ß‡∫≤‡∫° context ‡∫™‡∫≥‡∫•‡∫±‡∫ö LLM
        """
        if not search_results:
            return "‡∫ö‡ªç‡ªà‡∫û‡∫ª‡∫ö‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô‡∫ó‡∫µ‡ªà‡∫Å‡ªà‡∫Ω‡∫ß‡∫Ç‡ªâ‡∫≠‡∫á"
            
        context_parts = []
        for i, (doc, score) in enumerate(search_results):
            similarity = 1 - score
            source_info = f"‡ªÅ‡∫´‡∫º‡ªà‡∫á: {doc.metadata.get('source_file', 'Unknown')} (‡ªú‡ªâ‡∫≤ {doc.metadata.get('page', 'Unknown')})"
            content = doc.page_content.strip()
            
            context_parts.append(f"‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô {i+1} (‡∫Ñ‡∫ß‡∫≤‡∫°‡∫Ñ‡ªâ‡∫≤‡∫ç‡∫Ñ‡∫∑: {similarity:.3f}):\n{source_info}\n{content}\n")
            
        return "\n---\n".join(context_parts)
    
    def generate_answer(self, query: str, context: str) -> str:
        """
        ‡∫™‡ªâ‡∫≤‡∫á‡∫Ñ‡∫≥‡∫ï‡∫≠‡∫ö‡ªÇ‡∫î‡∫ç‡ªÉ‡∫ä‡ªâ Groq LLM ‡∫û‡ªâ‡∫≠‡∫° context ‡∫à‡∫≤‡∫Å‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô
        
        Args:
            query: ‡∫Ñ‡∫≥‡∫ñ‡∫≤‡∫°‡∫Ç‡∫≠‡∫á‡∫ú‡∫π‡ªâ‡ªÉ‡∫ä‡ªâ
            context: Context ‡∫à‡∫≤‡∫Å‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô
            
        Returns:
            ‡∫Ñ‡∫≥‡∫ï‡∫≠‡∫ö‡∫à‡∫≤‡∫Å LLM
        """
        
        # ‡∫™‡ªâ‡∫≤‡∫á prompt ‡∫™‡∫≥‡∫•‡∫±‡∫ö RAG
        prompt = f"""‡∫ó‡ªà‡∫≤‡∫ô‡ªÄ‡∫õ‡∫±‡∫ô AI Assistant ‡∫ó‡∫µ‡ªà‡∫ä‡ªà‡∫Ω‡∫ß‡∫ä‡∫≤‡∫ô‡ªÉ‡∫ô‡∫Å‡∫≤‡∫ô‡∫ï‡∫≠‡∫ö‡∫Ñ‡∫≥‡∫ñ‡∫≤‡∫°‡ªÇ‡∫î‡∫ç‡∫≠‡ªâ‡∫≤‡∫á‡∫≠‡∫µ‡∫á‡∫à‡∫≤‡∫Å‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô‡∫ó‡∫µ‡ªà‡ªÉ‡∫´‡ªâ‡∫°‡∫≤.

‡∫Ñ‡∫≥‡ªÅ‡∫ô‡∫∞‡∫ô‡∫≥:
1. ‡∫ï‡∫≠‡∫ö‡∫Ñ‡∫≥‡∫ñ‡∫≤‡∫°‡ªÇ‡∫î‡∫ç‡∫≠‡ªâ‡∫≤‡∫á‡∫≠‡∫µ‡∫á‡∫à‡∫≤‡∫Å‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô‡∫ó‡∫µ‡ªà‡ªÉ‡∫´‡ªâ‡∫°‡∫≤‡ªÄ‡∫ó‡∫ª‡ªà‡∫≤‡∫ô‡∫±‡ªâ‡∫ô
2. ‡∫ñ‡ªâ‡∫≤‡∫ö‡ªç‡ªà‡∫û‡∫ª‡∫ö‡∫Ñ‡∫≥‡∫ï‡∫≠‡∫ö‡ªÉ‡∫ô‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô, ‡ªÉ‡∫´‡ªâ‡∫ö‡∫≠‡∫Å‡∫ß‡ªà‡∫≤‡∫ö‡ªç‡ªà‡∫û‡∫ª‡∫ö‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô‡∫ó‡∫µ‡ªà‡∫Å‡ªà‡∫Ω‡∫ß‡∫Ç‡ªâ‡∫≠‡∫á
3. ‡∫•‡∫∞‡∫ö‡∫∏‡ªÅ‡∫´‡∫º‡ªà‡∫á‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô‡∫ó‡∫µ‡ªà‡ªÉ‡∫ä‡ªâ‡ªÉ‡∫ô‡∫Å‡∫≤‡∫ô‡∫ï‡∫≠‡∫ö
4. ‡∫ï‡∫≠‡∫ö‡ªÄ‡∫õ‡∫±‡∫ô‡∫û‡∫≤‡∫™‡∫≤‡∫•‡∫≤‡∫ß ‡ªÅ‡∫•‡∫∞ ‡ªÉ‡∫´‡ªâ‡∫Ñ‡∫≥‡∫ï‡∫≠‡∫ö‡∫ó‡∫µ‡ªà‡∫ä‡∫±‡∫î‡ªÄ‡∫à‡∫ô, ‡∫•‡∫∞‡∫≠‡∫Ω‡∫î
5. ‡∫ï‡∫≠‡∫ö‡ªÉ‡∫´‡ªâ‡ªÄ‡∫õ‡∫±‡∫ô Format markdown

‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô‡∫≠‡ªâ‡∫≤‡∫á‡∫≠‡∫µ‡∫á:
{context}

‡∫Ñ‡∫≥‡∫ñ‡∫≤‡∫°: {query}

‡∫Ñ‡∫≥‡∫ï‡∫≠‡∫ö:"""

        try:
            # ‡∫™‡∫ª‡ªà‡∫á request ‡ªÑ‡∫õ Groq
            chat_completion = self.client.chat.completions.create(
                messages=[
                    {
                        "role": "user",
                        "content": prompt
                    }
                ],
                model=self.model_name,
                temperature=0.1,  # ‡∫Ñ‡∫ß‡∫≤‡∫°‡∫™‡ªâ‡∫≤‡∫á‡∫™‡∫±‡∫ô‡∫ï‡ªà‡∫≥ ‡ªÄ‡∫û‡∫∑‡ªà‡∫≠‡∫Ñ‡∫ß‡∫≤‡∫°‡ªÅ‡∫°‡ªà‡∫ô‡∫ç‡∫≥  ‡∫Ç‡∫∂‡ªâ‡∫ô‡∫ô‡∫≥ Model ‡ªÄ‡∫û‡∫≤‡∫∞‡∫Ñ‡ªà‡∫≤ temperature ‡ªÅ‡∫ï‡ªà‡∫•‡∫∞‡ªÄ‡∫à‡∫ª‡ªâ‡∫≤‡∫°‡∫±‡∫ô‡∫ï‡ªà‡∫≤‡∫á‡∫Å‡∫±‡∫ô
                max_tokens=1500,  # ‡∫à‡∫≥‡∫ô‡∫ß‡∫ô tokens ‡∫™‡∫π‡∫á‡∫™‡∫∏‡∫î 
            )
            
            answer = chat_completion.choices[0].message.content
            return answer
            
        except Exception as e:
            return f"‚ùå ‡ªÄ‡∫Å‡∫µ‡∫î‡∫Ç‡ªç‡ªâ‡∫ú‡∫¥‡∫î‡∫û‡∫≤‡∫î‡ªÉ‡∫ô‡∫Å‡∫≤‡∫ô‡∫™‡ªâ‡∫≤‡∫á‡∫Ñ‡∫≥‡∫ï‡∫≠‡∫ö: {str(e)}"
    
    def query_documents(self, vector_store: Chroma, query: str, k: int = 5) -> dict:
        """
        ‡∫Ñ‡∫≥‡∫ñ‡∫≤‡∫°‡ªÅ‡∫ö‡∫ö‡∫™‡∫ª‡∫°‡∫ö‡∫π‡∫ô‡∫à‡∫≤‡∫Å‡∫Å‡∫≤‡∫ô‡∫Ñ‡∫ª‡ªâ‡∫ô‡∫´‡∫≤‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô‡∫à‡∫ª‡∫ô‡ªÄ‡∫ñ‡∫µ‡∫á‡∫Å‡∫≤‡∫ô‡∫™‡ªâ‡∫≤‡∫á‡∫Ñ‡∫≥‡∫ï‡∫≠‡∫ö
        
        Args:
            vector_store: ChromaDB vector store
            query: ‡∫Ñ‡∫≥‡∫ñ‡∫≤‡∫°‡∫Ç‡∫≠‡∫á‡∫ú‡∫π‡ªâ‡ªÉ‡∫ä‡ªâ
            k: ‡∫à‡∫≥‡∫ô‡∫ß‡∫ô‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô‡∫ó‡∫µ‡ªà‡∫à‡∫∞‡∫Ñ‡∫ª‡ªâ‡∫ô‡∫´‡∫≤
            
        Returns:
            dict ‡∫ó‡∫µ‡ªà‡∫õ‡∫∞‡∫Å‡∫≠‡∫ö‡∫î‡ªâ‡∫ß‡∫ç answer, context, ‡ªÅ‡∫•‡∫∞ sources
        """
        
        print(f"\nü§ñ Processing query: {query}")
        
        # 1. ‡∫Ñ‡∫ª‡ªâ‡∫ô‡∫´‡∫≤‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô‡∫ó‡∫µ‡ªà‡∫Å‡ªà‡∫Ω‡∫ß‡∫Ç‡ªâ‡∫≠‡∫á
        search_results = DocumentLoader.search_similar_documents(
            vector_store=vector_store,
            query=query,
            k=k
        )
        
        if not search_results:
            return {
                "answer": "‚ùå ‡∫ö‡ªç‡ªà‡∫û‡∫ª‡∫ö‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô‡∫ó‡∫µ‡ªà‡∫Å‡ªà‡∫Ω‡∫ß‡∫Ç‡ªâ‡∫≠‡∫á‡∫Å‡∫±‡∫ö‡∫Ñ‡∫≥‡∫ñ‡∫≤‡∫°‡∫Ç‡∫≠‡∫á‡∫ó‡ªà‡∫≤‡∫ô",
                "context": "",
                "sources": []
            }
        
        # 2. ‡∫™‡ªâ‡∫≤‡∫á context ‡∫à‡∫≤‡∫Å‡∫ú‡∫ª‡∫ô‡∫Å‡∫≤‡∫ô‡∫Ñ‡∫ª‡ªâ‡∫ô‡∫´‡∫≤
        context = self.create_context_from_documents(search_results)
        
        # 3. ‡∫™‡ªâ‡∫≤‡∫á‡∫Ñ‡∫≥‡∫ï‡∫≠‡∫ö‡∫î‡ªâ‡∫ß‡∫ç LLM
        print("üß† Generating answer with Groq LLM...")
        answer = self.generate_answer(query, context)
        
        # 4. ‡∫™‡ªâ‡∫≤‡∫á‡∫•‡∫≤‡∫ç‡∫ä‡∫∑‡ªà‡ªÅ‡∫´‡∫º‡ªà‡∫á‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô
        sources = []
        for doc, score in search_results:
            similarity = 1 - score
            sources.append({
                "source_file": doc.metadata.get('source_file', 'Unknown'),
                "page": doc.metadata.get('page', 'Unknown'),
                "similarity": f"{similarity:.3f}",
                "content_preview": doc.page_content
            })
        
        return {
            "answer": answer,
            "context": context,
            "sources": sources
        }

In [24]:
def main():
    """
    ‡∫ü‡∫±‡∫á‡∫ä‡∫±‡ªà‡∫ô‡∫´‡∫º‡∫±‡∫Å‡∫™‡∫≥‡∫•‡∫±‡∫ö‡∫Å‡∫≤‡∫ô‡∫ó‡∫ª‡∫î‡∫™‡∫≠‡∫ö‡∫•‡∫∞‡∫ö‡∫ª‡∫ö RAG ‡∫Å‡∫±‡∫ö Groq
    """
    
    # ‡∫Å‡∫≤‡∫ô‡∫ï‡∫±‡ªâ‡∫á‡∫Ñ‡ªà‡∫≤
    GROQ_API_KEY = os.getenv("GROQ_API_KEY")  # ‡ªÅ‡∫ó‡∫ô‡∫Ñ‡ªà‡∫≤‡∫î‡ªâ‡∫ß‡∫ç API key ‡∫à‡∫¥‡∫á
    
    # ‡∫•‡∫≤‡∫ç‡∫ä‡∫∑‡ªà‡ªÑ‡∫ü‡∫•‡ªå PDF (‡∫ñ‡ªâ‡∫≤‡∫ï‡ªâ‡∫≠‡∫á‡∫Å‡∫≤‡∫ô‡∫™‡ªâ‡∫≤‡∫á vector store ‡ªÉ‡ªù‡ªà)
    pdf_files = [ 
        "C:/Users/Dell/Desktop/Finetuing vs RAG.pdf"
    ]
    
    # ‡∫Å‡∫ß‡∫î‡∫™‡∫≠‡∫ö‡∫ß‡ªà‡∫≤‡∫°‡∫µ vector store ‡∫¢‡∫π‡ªà‡ªÅ‡∫•‡ªâ‡∫ß‡∫´‡∫º‡∫∑‡∫ö‡ªç‡ªà
    
    display(Markdown("## üîç ‡∫Å‡∫ß‡∫î‡∫™‡∫≠‡∫ö Vector Store")) 
    loaded_vectorstore = DocumentLoader.load_existing_vector_store(
        embedding_model="D:/model/BAAI-bge-m3", 
        collection_name="pdf_documents", 
        persist_directory="./chroma_db"
    )
    
    # ‡∫ñ‡ªâ‡∫≤‡∫ö‡ªç‡ªà‡∫°‡∫µ vector store, ‡∫™‡ªâ‡∫≤‡∫á‡ªÉ‡ªù‡ªà
    if loaded_vectorstore is None:
        display(Markdown("## üìö Creating new vector store..."))  
        
        # 1. ‡ªÇ‡∫´‡∫º‡∫î‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô
        documents = DocumentLoader.load_docs(pdf_files) 
        
        if not documents:
            print("‚ùå No documents found. Please check your PDF file paths.")
            return
            
        # 2. ‡ªÄ‡∫Æ‡∫±‡∫î chunking ‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô 
        display(Markdown("## ‚úÇÔ∏è Chunking documents...")) 
        # ‡ªÄ‡∫Æ‡∫±‡∫î chunking ‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô
        # ‡ªÉ‡∫ä‡ªâ Model ‡∫Ç‡∫≠‡∫á BAAI-bge-m3 ‡ªÄ‡∫û‡∫∑‡ªà‡∫≠‡∫Æ‡∫±‡∫ö‡∫Ñ‡ªà‡∫≤‡∫Å‡∫≤‡∫ô‡ªÄ‡∫Æ‡∫±‡∫î chunking ‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô ‡ªÄ‡∫ä‡∫µ‡ªà‡∫á‡∫ú‡∫π‡ªâ‡ªÉ‡∫ä‡ªâ‡ªÅ‡∫°‡ªà‡∫ô‡∫™‡∫≤‡∫°‡∫≤‡∫î‡ªÄ‡∫•‡∫∑‡∫≠‡∫Å‡ªÑ‡∫î‡ªâ‡∫ï‡∫≤‡∫°‡ªÉ‡∫à‡ªÄ‡∫•‡∫µ‡∫ç‡∫ß‡ªà‡∫≤‡∫à‡∫∞ ‡ªÉ‡∫ä‡ªâ Model ‡∫ç‡∫±‡∫á‡ªÉ‡∫ô‡∫Å‡∫≤‡∫ô‡ªÄ‡∫Æ‡∫±‡∫î Embedding ‡∫™‡∫≤‡∫°‡∫≤‡∫î‡ªÇ‡∫´‡∫•‡∫î‡∫ú‡ªà‡∫≤‡∫ô Hugginface ‡ªÑ‡∫î‡ªâ ‡ªÇ‡∫î‡∫ç‡∫Å‡∫≥‡∫ô‡∫ª‡∫î path ‡ªÄ‡∫≠‡∫á ‡∫™‡∫≤‡∫°‡∫≤‡∫î ‡ªÄ‡∫Ç‡∫ª‡ªâ‡∫≤‡ªÑ‡∫õ‡ªÉ‡∫ô Folder Download Model/download-model.ipynb ‡ªÄ‡∫û‡∫∑‡ªà‡∫≠‡∫î‡∫≤‡∫ß‡ªÇ‡∫´‡∫•‡∫î Model ‡∫ç‡∫±‡∫á
        # ‡∫Å‡∫≥‡∫ô‡∫±‡∫î‡∫Ñ‡ªà‡∫≤‡∫ï‡ªà‡∫≤‡∫á‡ªÜ‡∫Ç‡∫≠‡∫á chunking ‡ªÇ‡∫î‡∫ç Base on ‡∫à‡∫≤‡∫Å‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô ‡∫ñ‡ªâ‡∫≤ ‡∫°‡∫µ‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô‡∫´‡∫•‡∫≤‡∫ç‡∫´‡∫ô‡ªâ‡∫≤ ‡ªÅ‡∫ô‡∫∞‡∫ô‡∫≥‡ªÉ‡∫´‡ªâ‡∫•‡∫≠‡∫á‡ªÄ‡∫û‡∫¥‡ªà‡∫°‡∫Ñ‡ªà‡∫≤ chunk_size ‡ªÅ‡∫•‡∫∞ chunk_overlap ‡ªÄ‡∫û‡∫∑‡ªà‡∫≠‡∫Æ‡∫±‡∫ö‡∫Ñ‡ªà‡∫≤‡∫ó‡∫µ‡ªà‡∫î‡∫µ‡∫Å‡∫ß‡ªà‡∫≤
        chunk_documents = DocumentLoader.chunk_documents_standard(
            documents, 
            chunk_size=500, 
            chunk_overlap=50, 
            tokenizer_model="D:/model/BAAI-bge-m3", 
            max_token_limit=1000
        )
        
        if not chunk_documents:
            print("‚ùå Failed to chunk documents.")
            return
            
        # 3. ‡∫™‡ªâ‡∫≤‡∫á vector store 
        display(Markdown("## üîÑ Creating vector store...")) 
        # ‡ªÄ‡∫Æ‡∫±‡∫î Embedding ‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô
        # ‡∫Å‡ªç‡∫•‡∫∞‡∫ô‡∫µ‡∫ô‡∫µ‡ªâ‡∫à‡∫∞‡∫ñ‡ªâ‡∫≤‡∫î‡∫ª‡∫ô‡ªÅ‡∫ô‡ªà ‡ªÄ‡∫ô‡∫∑‡ªà‡∫≠‡∫á‡∫à‡∫≤‡∫Å‡∫ß‡ªà‡∫≤ ‡∫à‡∫∞‡∫°‡∫µ‡∫Å‡∫≤‡∫ô‡ªÄ‡∫≠‡∫ª‡∫≤ ‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô‡∫ó‡∫µ‡ªà‡ªÄ‡∫Æ‡∫ª‡∫≤ Chunking ‡∫°‡∫≤‡ªÅ‡∫õ‡∫á‡ªÄ‡∫õ‡∫±‡∫ô Vector ‡ªÄ‡∫û‡∫∑‡ªà‡∫≠‡∫ö‡∫±‡∫ô‡∫ó‡∫∑‡∫Å‡ªÉ‡∫ô ChromaDB ‡∫ñ‡ªâ‡∫≤‡∫¢‡∫≤‡∫Å‡ªÉ‡∫´‡ªâ‡ªÑ‡∫ß‡ªâ ‡ªÉ‡∫ú‡∫°‡∫µ GPU ‡ªÅ‡∫ô‡∫∞‡∫ô‡∫≥‡ªÉ‡∫´‡ªâ‡ªÉ‡∫ä‡ªâ cuda ‡ªÅ‡∫ó‡∫ô cpu
        loaded_vectorstore = DocumentLoader.create_vector_store(chunk_documents)
        
        if loaded_vectorstore is None:
            print("‚ùå Failed to create vector store.")
            return
    
    # ‡ªÄ‡∫•‡∫µ‡ªà‡∫°‡∫ï‡∫ª‡ªâ‡∫ô‡∫•‡∫∞‡∫ö‡∫ª‡∫ö RAG ‡∫Å‡∫±‡∫ö Groq 
    display(Markdown("## üöÄ Initializing Groq RAG System...")) 
    
    if GROQ_API_KEY == "‡ªÉ‡∫™‡ªà Groq API Key ‡∫Ç‡∫≠‡∫á‡ªÄ‡∫à‡∫ª‡ªâ‡∫≤‡∫ó‡∫µ‡ªà‡∫ô‡∫µ‡ªâ":
        print("‚ùå ‡∫Å‡∫∞‡∫•‡∫∏‡∫ô‡∫≤‡ªÉ‡∫™‡ªà Groq API Key ‡∫Ç‡∫≠‡∫á‡ªÄ‡∫à‡∫ª‡ªâ‡∫≤‡ªÉ‡∫ô‡∫ï‡∫ª‡∫ß‡ªÅ‡∫õ GROQ_API_KEY")
        print("üí° ‡∫™‡∫≤‡∫°‡∫≤‡∫î‡ªÑ‡∫î‡ªâ API key ‡∫ü‡∫£‡∫µ‡∫ó‡∫µ‡ªà: https://console.groq.com")
        return
    
    try:
        rag_system = GroqRAGSystem(
            groq_api_key=GROQ_API_KEY,
            model_name="openai/gpt-oss-120b" 
        )
        display(Markdown("## ‚úÖ Groq RAG System initialized successfully !"))  
        
    except Exception as e:
        print(f"‚ùå Error initializing Groq system: {e}")
        return
    
    # ‡∫ó‡∫ª‡∫î‡∫™‡∫≠‡∫ö‡∫•‡∫∞‡∫ö‡∫ª‡∫ö‡∫î‡ªâ‡∫ß‡∫ç‡∫Ñ‡∫≥‡∫ñ‡∫≤‡∫°‡∫ï‡∫ª‡∫ß‡∫¢‡ªà‡∫≤‡∫á
    test_queries = [
        "RAG ‡∫Å‡∫±‡∫ö Fine-tuning ‡∫°‡∫µ‡∫Ñ‡∫ß‡∫≤‡∫°‡ªÅ‡∫ï‡∫Å‡∫ï‡ªà‡∫≤‡∫á‡∫Å‡∫±‡∫ô‡ªÅ‡∫ô‡∫ß‡ªÉ‡∫î ?"
    ]
    
    display(Markdown("## üß™ ‡∫Å‡∫≤‡∫ô‡∫ó‡∫ª‡∫î‡∫™‡∫≠‡∫ö‡∫•‡∫∞‡∫ö‡∫ª‡∫ö RAG"))
    display(Markdown("‡∫ó‡∫ª‡∫î‡∫™‡∫≠‡∫ö‡∫î‡ªâ‡∫ß‡∫ç‡∫Ñ‡∫≥‡∫ñ‡∫≤‡∫°‡∫ï‡∫ª‡∫ß‡∫¢‡ªà‡∫≤‡∫á 4 ‡∫Ñ‡∫≥‡∫ñ‡∫≤‡∫°"))
    display(Markdown("---"))
    
    for i, query in enumerate(test_queries, 1):
        print(f"\nüìù ‡∫Ñ‡∫≥‡∫ñ‡∫≤‡∫°‡∫ó‡∫µ‡ªà {i}: {query}")
        print("-" * 40)
        display(Markdown(f"### üìù ‡∫Ñ‡∫≥‡∫ñ‡∫≤‡∫°‡∫ó‡∫µ‡ªà {i}: {query}"))
        display(Markdown("---"))
        
        # ‡∫™‡∫ª‡ªà‡∫á‡∫Ñ‡∫≥‡∫ñ‡∫≤‡∫°‡ªÑ‡∫õ‡∫•‡∫∞‡∫ö‡∫ª‡∫ö RAG
        result = rag_system.query_documents(
            vector_store=loaded_vectorstore,
            query=query,
            k=5  # ‡∫Ñ‡∫ª‡ªâ‡∫ô‡∫´‡∫≤ 5 ‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô‡∫ó‡∫µ‡ªà‡∫Å‡ªà‡∫Ω‡∫ß‡∫Ç‡ªâ‡∫≠‡∫á
        )
        
        # ‡∫™‡∫∞‡ªÅ‡∫î‡∫á‡∫ú‡∫ª‡∫ô‡∫•‡∫±‡∫ö
        display(Markdown("#### ü§ñ ‡∫Ñ‡∫≥‡∫ï‡∫≠‡∫ö:"))
        display(Markdown(f"""
            ```
            {result['answer']}
            ```
        """))
        
        if result['sources']:
            display(Markdown("#### üìö ‡ªÅ‡∫´‡∫º‡ªà‡∫á‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô‡∫≠‡ªâ‡∫≤‡∫á‡∫≠‡∫µ‡∫á:"))
                
            sources_md = ""
            for j, source in enumerate(result['sources'], 1):
                    sources_md += f"""
                **{j}.** `{source['source_file']}` (‡ªú‡ªâ‡∫≤ {source['page']}) - ‡∫Ñ‡∫ß‡∫≤‡∫°‡∫Ñ‡ªâ‡∫≤‡∫ç‡∫Ñ‡∫∑: `{source['similarity']}`
                > {source['content_preview']}...

            """
            display(Markdown(sources_md))
                    
        display(Markdown("---"))
    
    # ‡ªÇ‡ªù‡∫î interactive ‡∫™‡∫≥‡∫•‡∫±‡∫ö‡∫ú‡∫π‡ªâ‡ªÉ‡∫ä‡ªâ‡∫™‡∫≤‡∫°‡∫≤‡∫î‡∫ñ‡∫≤‡∫°‡∫Ñ‡∫≥‡∫ñ‡∫≤‡∫°‡ªÄ‡∫≠‡∫á
    display(Markdown("## üí¨ ‡ªÇ‡ªù‡∫î Interactive - ‡∫û‡∫¥‡∫°‡∫Ñ‡∫≥‡∫ñ‡∫≤‡∫°‡∫Ç‡∫≠‡∫á‡∫ó‡ªà‡∫≤‡∫ô (‡∫û‡∫¥‡∫° 'quit' ‡ªÄ‡∫û‡∫∑‡ªà‡∫≠‡∫≠‡∫≠‡∫Å")) 
    display(Markdown("---"))
    
    while True:
        try:
            user_query = input("\n‚ùì ‡∫Ñ‡∫≥‡∫ñ‡∫≤‡∫°‡∫Ç‡∫≠‡∫á‡∫ó‡ªà‡∫≤‡∫ô: ").strip()
            
            if user_query.lower() in ['quit', 'exit', '‡∫≠‡∫≠‡∫Å']:
                print("üëã ‡∫Ç‡∫≠‡∫ö‡ªÉ‡∫à‡∫ó‡∫µ‡ªà‡ªÉ‡∫ä‡ªâ‡∫•‡∫∞‡∫ö‡∫ª‡∫ö RAG!")
                break
                
            if not user_query:
                print("‚ö†Ô∏è ‡∫Å‡∫∞‡∫•‡∫∏‡∫ô‡∫≤‡ªÉ‡∫™‡ªà‡∫Ñ‡∫≥‡∫ñ‡∫≤‡∫°")
                continue
            
            display(Markdown(f"### ‚ùì ‡∫Ñ‡∫≥‡∫ñ‡∫≤‡∫°: `{user_query}`"))
            
            # ‡∫™‡∫ª‡ªà‡∫á‡∫Ñ‡∫≥‡∫ñ‡∫≤‡∫°‡ªÑ‡∫õ‡∫•‡∫∞‡∫ö‡∫ª‡∫ö RAG
            result = rag_system.query_documents(
                vector_store=loaded_vectorstore,
                query=user_query,
                k=5
            )
            
            # ‡∫™‡∫∞‡ªÅ‡∫î‡∫á‡∫ú‡∫ª‡∫ô‡∫•‡∫±‡∫ö
            display(Markdown("#### ü§ñ ‡∫Ñ‡∫≥‡∫ï‡∫≠‡∫ö:"))
            display(Markdown(f"""
                ```
                {result['answer']}
                ```
            """))
            
            # ‡∫™‡∫∞‡ªÅ‡∫î‡∫á‡ªÅ‡∫´‡∫º‡ªà‡∫á‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô (‡ªÅ‡∫ö‡∫ö‡∫´‡∫ç‡ªç‡ªâ)
            if result['sources']: 
                display(Markdown("#### üìö ‡ªÅ‡∫´‡∫º‡ªà‡∫á‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô‡∫≠‡ªâ‡∫≤‡∫á‡∫≠‡∫µ‡∫á:"))
                for source in result['sources'][:3]:  # ‡∫™‡∫∞‡ªÅ‡∫î‡∫á 3 ‡ªÅ‡∫´‡∫º‡ªà‡∫á‡∫ó‡∫≥‡∫≠‡∫¥‡∫î
                    display(Markdown(f"#### ‚Ä¢ {source['source_file']} (‡ªú‡ªâ‡∫≤ {source['page']})")) 
            
        except KeyboardInterrupt:
            print("\n\nüëã ‡∫Ç‡∫≠‡∫ö‡ªÉ‡∫à‡∫ó‡∫µ‡ªà‡ªÉ‡∫ä‡ªâ‡∫•‡∫∞‡∫ö‡∫ª‡∫ö RAG!")
            break
        except Exception as e:
            print(f"‚ùå ‡ªÄ‡∫Å‡∫µ‡∫î‡∫Ç‡ªç‡ªâ‡∫ú‡∫¥‡∫î‡∫û‡∫≤‡∫î: {e}")

In [25]:
if __name__ == "__main__":
    main()

## üîç ‡∫Å‡∫ß‡∫î‡∫™‡∫≠‡∫ö Vector Store

‚úÖ Loaded existing vector store: pdf_documents
üìä Total vectors: 2


## üöÄ Initializing Groq RAG System...

## ‚úÖ Groq RAG System initialized successfully !

## üß™ ‡∫Å‡∫≤‡∫ô‡∫ó‡∫ª‡∫î‡∫™‡∫≠‡∫ö‡∫•‡∫∞‡∫ö‡∫ª‡∫ö RAG

‡∫ó‡∫ª‡∫î‡∫™‡∫≠‡∫ö‡∫î‡ªâ‡∫ß‡∫ç‡∫Ñ‡∫≥‡∫ñ‡∫≤‡∫°‡∫ï‡∫ª‡∫ß‡∫¢‡ªà‡∫≤‡∫á 4 ‡∫Ñ‡∫≥‡∫ñ‡∫≤‡∫°

---


üìù ‡∫Ñ‡∫≥‡∫ñ‡∫≤‡∫°‡∫ó‡∫µ‡ªà 1: RAG ‡∫Å‡∫±‡∫ö Fine-tuning ‡∫°‡∫µ‡∫Ñ‡∫ß‡∫≤‡∫°‡ªÅ‡∫ï‡∫Å‡∫ï‡ªà‡∫≤‡∫á‡∫Å‡∫±‡∫ô‡ªÅ‡∫ô‡∫ß‡ªÉ‡∫î ?
----------------------------------------


### üìù ‡∫Ñ‡∫≥‡∫ñ‡∫≤‡∫°‡∫ó‡∫µ‡ªà 1: RAG ‡∫Å‡∫±‡∫ö Fine-tuning ‡∫°‡∫µ‡∫Ñ‡∫ß‡∫≤‡∫°‡ªÅ‡∫ï‡∫Å‡∫ï‡ªà‡∫≤‡∫á‡∫Å‡∫±‡∫ô‡ªÅ‡∫ô‡∫ß‡ªÉ‡∫î ?

---


ü§ñ Processing query: RAG ‡∫Å‡∫±‡∫ö Fine-tuning ‡∫°‡∫µ‡∫Ñ‡∫ß‡∫≤‡∫°‡ªÅ‡∫ï‡∫Å‡∫ï‡ªà‡∫≤‡∫á‡∫Å‡∫±‡∫ô‡ªÅ‡∫ô‡∫ß‡ªÉ‡∫î ?
üîç Searching for: RAG ‡∫Å‡∫±‡∫ö Fine-tuning ‡∫°‡∫µ‡∫Ñ‡∫ß‡∫≤‡∫°‡ªÅ‡∫ï‡∫Å‡∫ï‡ªà‡∫≤‡∫á‡∫Å‡∫±‡∫ô‡ªÅ‡∫ô‡∫ß‡ªÉ‡∫î ?
üß† Generating answer with Groq LLM...


#### ü§ñ ‡∫Ñ‡∫≥‡∫ï‡∫≠‡∫ö:


            ```
            ## ‡∫Ñ‡∫ß‡∫≤‡∫°‡ªÅ‡∫ï‡∫Å‡∫ï‡ªà‡∫≤‡∫á‡∫•‡∫∞‡∫´‡∫ß‡ªà‡∫≤‡∫á **RAG (Retrieval‚ÄëAugmented Generation)** ‡ªÅ‡∫•‡∫∞ **Fine‚Äëtuning**  

| ‡∫à‡∫∏‡∫î‡∫™‡∫≥‡∫Ñ‡∫±‡∫ô | **Fine‚Äëtuning** | **RAG** |
|---|---|---|
| **‡∫ß‡∫¥‡∫ó‡∫µ‡∫Å‡∫≤‡∫ô‡∫™‡ªâ‡∫≤‡∫á‡∫Ñ‡∫ß‡∫≤‡∫°‡∫Æ‡∫π‡ªâ** | ‡∫ù‡∫∂‡∫Å‡∫≠‡∫ª‡∫ö‡∫Æ‡∫ª‡∫°‡ªÇ‡∫ï‡ªÅ‡∫ö‡∫ö‡∫û‡∫Ω‡∫á‡∫î‡ªà‡∫ß‡∫ô‡∫î‡ªâ‡∫ß‡∫ç‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô‡∫ó‡∫µ‡ªà‡∫Å‡∫ß‡∫î‡∫™‡∫≠‡∫ö‡ªÅ‡∫•‡∫∞‡∫≠‡∫±‡∫ô‡∫ï‡∫¥‡∫î‡∫ï‡∫≤‡∫° (end‚Äëto‚Äëend) | ‡∫™‡ªâ‡∫≤‡∫á **knowledge base** ‡∫ó‡∫µ‡ªà‡∫™‡∫≤‡∫°‡∫≤‡∫î‡∫î‡∫±‡∫ö‡∫Å‡∫±‡∫ö‡∫Å‡∫≤‡∫ô‡∫î‡∫±‡∫ö‡∫Å‡∫±‡∫ö (retrieval) ‡ªÅ‡∫•‡∫∞ ‡∫™‡ªâ‡∫≤‡∫á‡∫ú‡∫ª‡∫ô‡∫ú‡∫∞‡∫•‡∫¥‡∫î (generation) ‡∫û‡ªâ‡∫≠‡∫°‡∫Å‡∫±‡∫ô |
| **‡∫Å‡∫≤‡∫ô‡∫≠‡∫±‡∫ö‡ªÄ‡∫î‡∫î‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô** | ‡∫ï‡ªâ‡∫≠‡∫á‡∫Å‡∫≤‡∫ô **re‚Äëfine‚Äëtuning** ‡∫ó‡∫±‡∫á‡ªù‡∫ª‡∫î‡∫ï‡ªç‡ªà‡ªÄ‡∫°‡∫∑‡ªà‡∫≠‡∫°‡∫µ‡∫Å‡∫≤‡∫ô‡∫≠‡∫±‡∫ö‡ªÄ‡∫î‡∫î‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô (‡∫à‡ªà‡∫≤‡∫ç‡∫Ñ‡ªà‡∫≤‡∫™‡∫π‡∫á)** | ‡∫™‡∫≤‡∫°‡∫≤‡∫î‡∫≠‡∫±‡∫ö‡ªÄ‡∫î‡∫î **knowledge base** ‡ªÇ‡∫î‡∫ç‡∫ö‡ªç‡ªà‡∫à‡ªç‡∫≤‡ªÄ‡∫õ‡∫±‡∫ô‡∫≠‡∫±‡∫ö‡ªÇ‡∫ï‡ªÅ‡∫ö‡∫ö‡∫≠‡∫±‡∫î‡∫ï‡∫∞‡∫û‡∫±‡∫ô‡∫≠‡∫±‡∫î‡∫ï‡∫∞‡∫û‡∫±‡∫ô (‡∫≠‡∫±‡∫ö‡ªÄ‡∫î‡∫î‡ªÑ‡∫î‡ªâ‡∫ó‡∫±‡∫ô‡∫ó‡∫µ) |
| **‡∫Ñ‡∫ß‡∫≤‡∫°‡ªÅ‡∫à‡ªâ‡∫á‡∫Ç‡∫≠‡∫á‡∫Å‡∫≤‡∫ô‡∫ï‡∫≠‡∫ö** | ‡∫™‡∫≤‡∫°‡∫≤‡∫î‡∫™‡ªâ‡∫≤‡∫á‡∫Ñ‡∫≥‡∫ï‡∫≠‡∫ö‡∫ó‡∫±‡∫ô‡∫ó‡∫µ‡ªà‡∫≠‡∫±‡∫î‡∫ï‡∫∞‡∫û‡∫±‡∫ô‡∫î‡ªà‡∫ß‡∫ô (‡∫ö‡ªç‡ªà‡∫°‡∫µ‡∫Ç‡∫±‡ªâ‡∫ô‡∫ï‡∫≠‡∫ö‡∫Å‡∫±‡∫ö‡∫Å‡∫≤‡∫ô‡∫î‡∫±‡∫ö‡∫Å‡∫±‡∫ö) | ‡∫°‡∫µ **generation delay** ‡ªÄ‡∫û‡∫≤‡∫∞‡∫ï‡ªâ‡∫≠‡∫á‡∫î‡∫±‡∫ö‡∫Å‡∫±‡∫ö‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô‡∫à‡∫≤‡∫Å knowledge base ‡∫Å‡ªà‡∫≠‡∫ô‡∫à‡∫∞‡∫™‡ªâ‡∫≤‡∫á‡∫Ñ‡∫≥‡∫ï‡∫≠‡∫ö |
| **‡∫Ñ‡ªà‡∫≤‡ªÉ‡∫ä‡ªâ‡∫à‡ªà‡∫≤‡∫ç‡∫Å‡∫≤‡∫ô‡∫ù‡∫∂‡∫Å** | ‡∫ï‡ªâ‡∫≠‡∫á‡∫Å‡∫≤‡∫ô **‡∫ä‡ªà‡∫ß‡∫ç‡∫™‡∫∞‡∫´‡∫º‡∫∏‡∫ö‡∫Å‡∫≤‡∫ô‡∫ù‡∫∂‡∫Å** ‡∫ó‡∫µ‡ªà‡∫Å‡∫ß‡ªà‡∫≤‡∫Å‡∫±‡∫ö‡∫Å‡∫≤‡∫ô‡∫î‡∫±‡∫ö‡∫Å‡∫±‡∫ö‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô‡∫à‡∫≥‡∫ô‡∫ß‡∫ô‡∫´‡∫º‡∫≤‡∫ç ‡ªÅ‡∫•‡∫∞ ‡∫Å‡∫≤‡∫ô‡∫Ñ‡∫¥‡∫î‡∫Ñ‡ªà‡∫≤‡∫™‡∫π‡∫á (‡∫Å‡∫±‡∫ö‡∫Å‡∫≤‡∫ô‡∫û‡∫±‡∫î‡∫ó‡∫∞‡∫ô‡∫≤‡∫Å‡∫≤‡∫ô‡∫î‡∫±‡∫ö‡∫Å‡∫±‡∫ö) | ‡∫ö‡ªç‡ªà‡∫à‡ªç‡∫≤‡ªÄ‡∫õ‡∫±‡∫ô‡∫Å‡∫≤‡∫ô‡∫ù‡∫∂‡∫Å‡∫≠‡∫±‡∫î‡∫ï‡∫∞‡∫û‡∫±‡∫ô‡∫ä‡ªà‡∫ß‡∫ç‡∫™‡∫∞‡∫´‡∫º‡∫∏‡∫ö‡∫Å‡∫≤‡∫ô‡∫≠‡∫±‡∫ö‡ªÄ‡∫î‡∫î‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô‡∫Å‡ªà‡∫≠‡∫ô‡∫Å‡∫≤‡∫ô‡∫î‡∫±‡∫ö‡∫Å‡∫±‡∫ö‡∫Å‡∫≤‡∫ô‡∫î‡∫±‡∫ö‡∫Å‡∫±‡∫ö‡∫Å‡∫≤‡∫ô‡∫î‡∫±‡∫ö‡∫Å‡∫±‡∫ö‡∫Å‡∫≤‡∫ô‡∫î‡∫±‡∫ö‡∫Å‡∫±‡∫ö‡∫Å‡∫≤‡∫ô‡∫î‡∫±‡∫ö‡∫Å‡∫±‡∫ö‡∫Å‡∫≤‡∫ô‡∫î‡∫±‡∫ö‡∫Å‡∫±‡∫ö‡∫Å‡∫≤‡∫ô‡∫î‡∫±‡∫ö‡∫Å‡∫±‡∫ö‡∫Å‡∫≤‡∫ô‡∫î‡∫±‡∫ö‡∫Å‡∫±‡∫ö‡∫Å‡∫≤‡∫ô‡∫î‡∫±‡∫ö‡∫Å‡∫±‡∫ö‡∫Å‡∫≤‡∫ô‡∫î‡∫±‡∫ö‡∫Å‡∫±‡∫ö‡∫Å‡∫≤‡∫ô‡∫î‡∫±‡∫ö‡∫Å‡∫±‡∫ö‡∫Å‡∫≤‡∫ô‡∫î‡∫±‡∫ö‡∫Å‡∫±‡∫ö‡∫Å‡∫≤‡∫ô‡∫î‡∫±‡∫ö‡∫Å‡∫±‡∫ö‡∫Å‡∫≤‡∫ô‡∫î‡∫±‡∫ö‡∫Å
            ```
        

#### üìö ‡ªÅ‡∫´‡∫º‡ªà‡∫á‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô‡∫≠‡ªâ‡∫≤‡∫á‡∫≠‡∫µ‡∫á:


                **1.** `Finetuing vs RAG.pdf` (‡ªú‡ªâ‡∫≤ 0) - ‡∫Ñ‡∫ß‡∫≤‡∫°‡∫Ñ‡ªâ‡∫≤‡∫ç‡∫Ñ‡∫∑: `0.197`
                > 1 Huawei Confidential
How to Develop a Professional Domain Knowledge Q&A AssistantÔºü
Implementation
Data Preparation: Collect a large amount of text data. These 
data need to be cleaned and annotated to ensure quality.
Model Selection: Choose a suitable pre-trained model.
Model Training: Input the prepared data into the model and 
adjust its parameters.
Model Deployment: Deploy the trained model for inference.
Advantages
End-to-End Generation: The model can directly generate 
answers based on input questions. 
Disadvantages
High Training Costs: Time-consuming and costly by 
requiring a large amount of data and computational resources.
Difficulty in Knowledge Update: Once the model is trained, 
updating the knowledge requires heavy re -fine-tuning.
Fine-tuning RAG
Implementation
Data Preparation: Gather a wide range of documents relevant 
to the professional domain and clean them.
Knowledge Base Construction: Build a structured knowledge 
base that can be efficiently queried by the retrieval module.
Model Deployment: Deploy a LLM as expected (deepseek).
RAG Application Development: Develop a RAG application 
that integrates retrieval and generation to answer user queries.
Advantages
Flexible Knowledge Update: The knowledge base can be 
updated at any time without retraining the model.
Integration of Latest Information: The system can generate 
more accurate and up -to-date answers.
Scalability: It is easy to expand the content and scope of the 
knowledge base.
Disadvantages
Generation Delay: The added retrieval step may slow down 
the system's response time.
The Fine-tuning approach is suitable for fields where knowledge accuracy and consistency are highly valued and knowledge updates
are relatively slow, such as some traditional academic research areas. 
In contrast, the RAG approach is more suitable for fields where knowledge updates rapidly and quick responses are required, such as 
finance, healthcare, and technology....

            
                **2.** `Finetuing vs RAG.pdf` (‡ªú‡ªâ‡∫≤ 1) - ‡∫Ñ‡∫ß‡∫≤‡∫°‡∫Ñ‡ªâ‡∫≤‡∫ç‡∫Ñ‡∫∑: `-0.168`
                > 2 Huawei Confidential
ModelEngine: AI Toolchain that Accelerates the Implementation of AI Applications
40% NPU pooling usage
60%+ faster multimodal data cleansing
1.5x online concurrency
1.6x offline throughput
Lower costs Fast application rollout Good inference performance
95% accuracy
50% shorter development latency
Operator 
ecosystem
ModelEngine
Full-process AI 
toolchain
Application enablement
High-precision RAG application 
development and optimization
Open toolchain Open-source framework, supporting third-party operators
Model enablement
Lightweight model inference 
toolchain
Data enablement
Automatic data processing and 
knowledge generation
API API
Training and inference offload acceleration
Cache offload acceleration | Ascend+Kunpeng
heterogeneous computing
Retrieval acceleration
Converged retrieval of multimodal data | Vector 
retrieval acceleration
Low-code toolchain
Self-orchestrated data processing | Modular 
RAG
NPU basic software Container platform Knowledge base storage
Model ecosystem OpenMind HuggingFace Blue Whale Market LangChain LlamaIndex
GPU processor | Ascend NPU processor
Resource enablement
AI task scheduling and XPU pooling...

            

---

## üí¨ ‡ªÇ‡ªù‡∫î Interactive - ‡∫û‡∫¥‡∫°‡∫Ñ‡∫≥‡∫ñ‡∫≤‡∫°‡∫Ç‡∫≠‡∫á‡∫ó‡ªà‡∫≤‡∫ô (‡∫û‡∫¥‡∫° 'quit' ‡ªÄ‡∫û‡∫∑‡ªà‡∫≠‡∫≠‡∫≠‡∫Å

---

üëã ‡∫Ç‡∫≠‡∫ö‡ªÉ‡∫à‡∫ó‡∫µ‡ªà‡ªÉ‡∫ä‡ªâ‡∫•‡∫∞‡∫ö‡∫ª‡∫ö RAG!
