In [51]:
import chromadb
from sentence_transformers import SentenceTransformer, CrossEncoder
from langchain_anthropic import ChatAnthropic
import numpy as np
from typing import List, Dict
import os
from IPython.display import display, Markdown

In [52]:
class StreamlinedAdvancedRAG:
    """
    Advanced RAG ‡ªÅ‡∫ö‡∫ö‡∫á‡ªà‡∫≤‡∫ç - ‡ªÅ‡∫≠‡∫±‡∫î‡ªÉ‡∫ä‡ªâ Process ‡∫ô‡∫µ‡ªâ‡ªÄ‡∫õ‡∫±‡∫ô‡∫´‡∫º‡∫±‡∫Å:
    1. Dense Retrieval 
    2. Query Rewriting (LLM)
    3. HyDE (LLM)
    4. Re-ranking
    """
    
    def __init__(self, collection_name: str, anthropic_api_key: str = None):
        # ‡∫ï‡∫¥‡∫î‡∫ï‡∫±‡ªâ‡∫á‡∫û‡∫∑‡ªâ‡∫ô‡∫ñ‡∫≤‡∫ô
        self.client = chromadb.PersistentClient(path="../Vector/chroma_db")
        self.collection = self._load_collection(collection_name)
        
        # Models
        self.embedding_model = SentenceTransformer('D:/model/BAAI-bge-m3',device='cpu')
        self.rerank_model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
        
        # LLM Setup
        api_key = anthropic_api_key or os.getenv("ANTHROPIC_API_KEY")
        if not api_key:
            raise ValueError("‡∫Å‡∫∞‡∫•‡∫∏‡∫ô‡∫≤‡ªÉ‡∫´‡ªâ ANTHROPIC_API_KEY")
        
        # Main LLM (Claude Opus 4)
        self.llm = ChatAnthropic(
            api_key=api_key,
            model="claude-opus-4-1-20250805",
            temperature=0.1,
            max_tokens=2000
        )
        
        # Query processing LLM (‡ªÉ‡∫ä‡ªâ Sonnet ‡ªÄ‡∫û‡∫∑‡ªà‡∫≠‡∫õ‡∫∞‡∫´‡∫ç‡∫±‡∫î)
        self.query_llm = ChatAnthropic(
            api_key=api_key,
            model="claude-3-7-sonnet-20250219",
            temperature=0.0,
            max_tokens=300
        )
        
        print(f"‚úÖ Streamlined Advanced RAG ready! ({self.collection.count()} documents)")
    
    def _load_collection(self, collection_name: str):
        """‡ªÇ‡∫´‡∫º‡∫î collection"""
        try:
            return self.client.get_collection(name=collection_name)
        except Exception as e:
            raise ValueError(f"Cannot load collection '{collection_name}': {e}")
    
    # ==================== 1. QUERY REWRITING ====================
    
    def rewrite_query(self, query: str) -> str:
        """
        ‡ªÉ‡∫ä‡ªâ LLM ‡∫õ‡∫±‡∫ö‡∫õ‡∫∏‡∫á‡∫Ñ‡ªç‡∫≤‡∫ñ‡∫≤‡∫°‡ªÉ‡∫´‡ªâ‡ªÄ‡ªù‡∫≤‡∫∞‡∫™‡∫ª‡∫°‡∫Å‡∫±‡∫ö‡∫Å‡∫≤‡∫ô‡∫Ñ‡∫ª‡ªâ‡∫ô‡∫´‡∫≤
        """
        rewriting_prompt = f"""‡∫ó‡ªà‡∫≤‡∫ô‡ªÄ‡∫õ‡∫±‡∫ô‡∫ú‡∫π‡ªâ‡∫ä‡ªà‡∫ß‡∫ç‡ªÉ‡∫ô‡∫Å‡∫≤‡∫ô‡∫õ‡∫±‡∫ö‡∫õ‡∫∏‡∫á‡∫Ñ‡ªç‡∫≤‡∫ñ‡∫≤‡∫°‡∫™‡ªç‡∫≤‡∫•‡∫±‡∫ö‡∫•‡∫∞‡∫ö‡∫ª‡∫ö‡∫Ñ‡∫ª‡ªâ‡∫ô‡∫´‡∫≤‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô.

‡∫Ñ‡ªç‡∫≤‡∫ñ‡∫≤‡∫°‡∫ï‡∫ª‡ªâ‡∫ô‡∫™‡∫∞‡∫ö‡∫±‡∫ö: "{query}"

‡∫Å‡∫∞‡∫•‡∫∏‡∫ô‡∫≤‡∫õ‡∫±‡∫ö‡∫õ‡∫∏‡∫á‡∫Ñ‡ªç‡∫≤‡∫ñ‡∫≤‡∫°‡∫ô‡∫µ‡ªâ‡ªÉ‡∫´‡ªâ:
- ‡ªÄ‡∫û‡∫µ‡ªà‡∫°‡∫Ñ‡∫ß‡∫≤‡∫°‡ªù‡∫≤‡∫ç‡ªÉ‡∫´‡ªâ‡∫à‡∫∞‡ªÅ‡∫à‡ªâ‡∫á ‡ªÅ‡∫•‡∫∞ ‡∫•‡∫∞‡∫≠‡∫Ω‡∫î
- ‡ªÄ‡∫û‡∫µ‡ªà‡∫°‡∫Ñ‡ªç‡∫≤‡∫™‡ªç‡∫≤‡∫Ñ‡∫±‡∫ô‡∫ó‡∫µ‡ªà‡∫Å‡ªà‡∫Ω‡∫ß‡∫Ç‡ªâ‡∫≠‡∫á
- ‡∫õ‡∫±‡∫ö‡ªÉ‡∫ä‡ªâ‡ªÇ‡∫Ñ‡∫á‡∫™‡ªâ‡∫≤‡∫á‡∫ó‡∫µ‡ªà‡ªÄ‡∫´‡∫°‡∫≤‡∫∞‡∫™‡∫ª‡∫°
- ‡∫™‡∫±‡ªâ‡∫ô‡ªÅ‡∫•‡∫∞‡∫Å‡∫∞‡∫ä‡∫±‡∫ö

‡∫ï‡∫≠‡∫ö‡ªÅ‡∫ï‡ªà‡∫Ñ‡ªç‡∫≤‡∫ñ‡∫≤‡∫°‡∫ó‡∫µ‡ªà‡∫õ‡∫±‡∫ö‡∫õ‡∫∏‡∫á‡ªÅ‡∫•‡ªâ‡∫ß‡ªÄ‡∫ó‡∫ª‡ªà‡∫≤‡∫ô‡∫±‡ªâ‡∫ô:"""

        try:
            response = self.query_llm.invoke(rewriting_prompt)
            rewritten = response.content.strip()
            
            # ‡ªÄ‡∫≠‡∫ª‡∫≤ quotes ‡∫≠‡∫≠‡∫Å‡∫ñ‡ªâ‡∫≤‡∫°‡∫µ
            rewritten = rewritten.strip('"\'')
            
            print(f"üîÑ Query rewritten: '{query}' ‚Üí '{rewritten}'")
            return rewritten
            
        except Exception as e:
            print(f"‚ö†Ô∏è  Query rewriting failed: {e}")
            return query
    
    # ==================== 2. HyDE GENERATION ====================
    
    def generate_hyde(self, query: str) -> str:
        """
        ‡∫™‡ªâ‡∫≤‡∫á‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô‡∫™‡∫ª‡∫°‡∫°‡∫∏‡∫î‡∫ï‡∫¥‡∫ñ‡∫≤‡∫ô‡∫î‡ªâ‡∫ß‡∫ç Claude Opus 4
        """
        hyde_prompt = f"""‡∫ó‡ªà‡∫≤‡∫ô‡ªÄ‡∫õ‡∫±‡∫ô‡∫ú‡∫π‡ªâ‡∫ä‡ªà‡∫ß‡∫ç‡∫ó‡∫µ‡ªà‡∫ä‡ªà‡∫Ω‡∫ß‡∫ä‡∫≤‡∫ô‡ªÉ‡∫ô‡∫î‡ªâ‡∫≤‡∫ô‡∫Å‡∫≤‡∫ô‡∫™‡ªâ‡∫≤‡∫á‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô‡∫™‡∫ª‡∫°‡∫°‡∫∏‡∫î‡∫ï‡∫¥‡∫ñ‡∫≤‡∫ô

‡∫Ñ‡ªç‡∫≤‡∫ñ‡∫≤‡∫°: {query}

‡∫Ç‡∫Ω‡∫ô‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô:
- ‡∫ï‡∫≠‡∫ö‡∫Ñ‡ªç‡∫≤‡∫ñ‡∫≤‡∫°‡ªÇ‡∫î‡∫ç‡∫Å‡∫ª‡∫á‡ªÅ‡∫•‡∫∞‡∫ä‡∫±‡∫î‡ªÄ‡∫à‡∫ô
- ‡ªÉ‡∫ä‡ªâ‡∫û‡∫≤‡∫™‡∫≤‡ªÅ‡∫•‡∫∞‡∫Ñ‡ªç‡∫≤‡∫™‡∫±‡∫ö‡∫ó‡∫µ‡ªà‡∫°‡∫µ‡ªÇ‡∫≠‡∫Å‡∫≤‡∫î‡∫õ‡∫≤‡∫Å‡∫ª‡∫î‡ªÉ‡∫ô‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô‡∫à‡∫¥‡∫á
- ‡∫°‡∫µ‡ªÄ‡∫ô‡∫∑‡ªâ‡∫≠‡∫´‡∫≤‡∫ó‡∫µ‡ªà‡∫°‡∫µ‡∫Ñ‡∫∏‡∫ô‡∫ô‡∫∞‡∫û‡∫≤‡∫ö‡ªÅ‡∫•‡∫∞‡∫ñ‡∫∑‡∫Å‡∫ï‡ªâ‡∫≠‡∫á
- ‡∫ç‡∫≤‡∫ß‡∫õ‡∫∞‡∫°‡∫≤‡∫ô 100-150 ‡∫Ñ‡ªç‡∫≤
- ‡ªÄ‡∫Ç‡∫ª‡ªâ‡∫≤‡ªÄ‡∫•‡∫∑‡ªà‡∫≠‡∫á‡ªÇ‡∫î‡∫ç‡∫Å‡∫ª‡∫á ‡∫ö‡ªç‡ªà‡∫ï‡ªâ‡∫≠‡∫á‡∫°‡∫µ‡∫ö‡∫ª‡∫î‡∫ô‡ªç‡∫≤

‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô:"""
        
        try:
            response = self.llm.invoke(hyde_prompt)
            hyde_doc = response.content.strip()
            
            print(f"üìù HyDE generated: {len(hyde_doc)} chars")

            return hyde_doc
            
        except Exception as e:
            print(f"‚ö†Ô∏è  HyDE generation failed: {e}")
            return query
    
    # ==================== 3. DENSE RETRIEVAL ====================
    
    def dense_retrieval(self, query: str, n_results: int = 10) -> List[Dict]:
        """
        Dense retrieval ‡∫î‡ªâ‡∫ß‡∫ç semantic embeddings
        """
        query_embedding = self.embedding_model.encode([query]).tolist()
        
        results = self.collection.query(
            query_embeddings=query_embedding,
            n_results=n_results
        )
        
        docs = []
        for i in range(len(results['documents'][0])):
            docs.append({
                'text': results['documents'][0][i],
                'score': 1 - results['distances'][0][i],  # Convert distance to similarity
                'id': results['ids'][0][i]
            })
        
        return docs
    
    # ==================== 4. RE-RANKING ====================
    
    def rerank_documents(self, query: str, docs: List[Dict], top_k: int = 5) -> List[Dict]:
        """
        Re-ranking ‡∫î‡ªâ‡∫ß‡∫ç cross-encoder
        """
        if len(docs) <= top_k:
            return docs
        
        print(f"üîÑ Re-ranking {len(docs)} ‚Üí {top_k} documents...")
        
        try:
            # ‡∫™‡ªâ‡∫≤‡∫á query-document pairs
            pairs = [(query, doc['text']) for doc in docs]
            
            # ‡∫Ñ‡∫¥‡∫î‡ªÑ‡∫•‡ªà relevance scores
            scores = self.rerank_model.predict(pairs)
            
            # ‡ªÄ‡∫û‡∫µ‡ªà‡∫° rerank scores
            for i, doc in enumerate(docs):
                doc['rerank_score'] = float(scores[i])
                doc['original_rank'] = i + 1
            
            # Sort ‡ªÇ‡∫î‡∫ç rerank score
            reranked = sorted(docs, key=lambda x: x['rerank_score'], reverse=True)
            
            print(f"‚úÖ Re-ranking completed")
            return reranked[:top_k]
            
        except Exception as e:
            print(f"‚ö†Ô∏è  Re-ranking failed: {e}")
            return docs[:top_k]
    
    # ==================== MAIN SEARCH PIPELINE ====================
    
    def search_advanced(self, query: str,
                       n_results: int = 10, 
                       use_rewriting: bool = True,
                       use_hyde: bool = True,
                       use_reranking: bool = True,
                       top_k: int = 5
                       ) -> List[Dict]:
        """
        Advanced search pipeline ‡ªÅ‡∫ö‡∫ö‡∫á‡ªà‡∫≤‡∫ç
        """
        print(f"üöÄ Advanced search: '{query}'")
        
        search_queries = [query]  # ‡ªÄ‡∫•‡∫µ‡ªà‡∫°‡∫î‡ªâ‡∫ß‡∫ç‡∫Ñ‡ªç‡∫≤‡∫ñ‡∫≤‡∫°‡∫ï‡∫ª‡ªâ‡∫ô‡∫™‡∫∞‡∫ö‡∫±‡∫ö

        # 1. Query Rewriting
        if use_rewriting:
            rewritten = self.rewrite_query(query)
            if rewritten != query:
                search_queries.append(rewritten)
        
        # 2. HyDE
        if use_hyde:
            hyde_doc = self.generate_hyde(query)
            search_queries.append(hyde_doc)
        
        # 3. Dense Retrieval ‡∫™‡ªç‡∫≤‡∫•‡∫±‡∫ö‡ªÅ‡∫ï‡ªà‡∫•‡∫∞ query
        all_docs = {}
        for i, sq in enumerate(search_queries):
            print(f"üîç Searching with query {i+1}/{len(search_queries)}")

            docs = self.dense_retrieval(sq, n_results)
            
            # ‡∫•‡∫ß‡∫°‡∫ú‡∫ª‡∫ô‡ªÑ‡∫î‡ªâ‡∫Æ‡∫±‡∫ö (‡∫ñ‡ªâ‡∫≤‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô‡∫ä‡ªâ‡ªç‡∫≤ ‡∫à‡∫∞‡∫•‡∫ß‡∫° scores)
            for doc in docs:
                doc_id = doc['id']
                if doc_id in all_docs:
                    # ‡∫•‡∫ß‡∫° scores ‡ªÅ‡∫•‡∫∞ ‡ªÄ‡∫û‡∫µ‡ªà‡∫° weight
                    all_docs[doc_id]['score'] = max(all_docs[doc_id]['score'], doc['score'])
                    all_docs[doc_id]['query_count'] = all_docs[doc_id].get('query_count', 1) + 1
                else:
                    doc['query_count'] = 1
                    all_docs[doc_id] = doc
        
        # Convert to list and sort by score
        retrieved_docs = sorted(all_docs.values(), key=lambda x: x['score'], reverse=True)
        retrieved_docs = retrieved_docs[:n_results]
        
        print(f"üìÑ Retrieved {len(retrieved_docs)} unique documents")
        
        # 4. Re-ranking
        if use_reranking and len(retrieved_docs) > 3:
            final_docs = self.rerank_documents(query, retrieved_docs, 
                                             min(n_results, len(retrieved_docs),top_k))
        else:
            final_docs = retrieved_docs
        
        return final_docs
    
    # ==================== MAIN Q&A FUNCTION ====================
    
    def ask(self, question: str, n_results: int = 8, **kwargs) -> Dict:
        """
        ‡∫ñ‡∫≤‡∫°‡∫Ñ‡ªç‡∫≤‡∫ñ‡∫≤‡∫°‡∫î‡ªâ‡∫ß‡∫ç Streamlined Advanced RAG
        """
        print(f"\n{'='*60}")
        print(f"‚ùì ‡∫Ñ‡ªç‡∫≤‡∫ñ‡∫≤‡∫°: {question}")
        print(f"{'='*60}")
        
        # ‡∫Ñ‡∫ª‡ªâ‡∫ô‡∫´‡∫≤‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô
        docs = self.search_advanced(question, n_results, **kwargs)
        
        if not docs:
            return {"error": "‡∫ö‡ªç‡ªà‡∫û‡∫ª‡∫ö‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô‡∫ó‡∫µ‡ªà‡∫Å‡ªà‡∫Ω‡∫ß‡∫Ç‡ªâ‡∫≠‡∫á"}
        
        # ‡∫™‡ªâ‡∫≤‡∫á context
        context_parts = []
        for i, doc in enumerate(docs, 1):
            score_info = f"(Score: {doc.get('rerank_score', doc['score']):.3f})"
            context_parts.append(f"[Document {i}] {score_info}\n{doc['text']}")
        
        context = "\n\n".join(context_parts)
        
        # ‡∫™‡ªâ‡∫≤‡∫á prompt
        prompt = f"""‡∫ó‡ªà‡∫≤‡∫ô‡ªÄ‡∫õ‡∫±‡∫ô‡∫ú‡∫π‡ªâ‡∫ä‡ªà‡∫ß‡∫ç AI ‡∫ó‡∫µ‡ªà‡∫ä‡ªà‡∫Ω‡∫ß‡∫ä‡∫≤‡∫ô‡ªÉ‡∫ô‡∫Å‡∫≤‡∫ô‡∫ï‡∫≠‡∫ö‡∫Ñ‡∫≥‡∫ñ‡∫≤‡∫°‡ªÇ‡∫î‡∫ç‡∫≠‡ªâ‡∫≤‡∫á‡∫≠‡∫µ‡∫á‡∫à‡∫≤‡∫Å‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô‡∫ó‡∫µ‡ªà‡ªÉ‡∫´‡ªâ‡∫°‡∫≤.

        ‡∫Ñ‡∫≥‡ªÅ‡∫ô‡∫∞‡∫ô‡∫≥:
        1. ‡∫ï‡∫≠‡∫ö‡∫Ñ‡∫≥‡∫ñ‡∫≤‡∫°‡ªÇ‡∫î‡∫ç‡∫≠‡ªâ‡∫≤‡∫á‡∫≠‡∫µ‡∫á‡∫à‡∫≤‡∫Å‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô‡∫ó‡∫µ‡ªà‡ªÉ‡∫´‡ªâ‡∫°‡∫≤‡ªÄ‡∫ó‡∫ª‡ªà‡∫≤‡∫ô‡∫±‡ªâ‡∫ô
        2. ‡∫ñ‡ªâ‡∫≤‡∫ö‡ªç‡ªà‡∫û‡∫ª‡∫ö‡∫Ñ‡∫≥‡∫ï‡∫≠‡∫ö‡ªÉ‡∫ô‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô, ‡ªÉ‡∫´‡ªâ‡∫ö‡∫≠‡∫Å‡∫ß‡ªà‡∫≤‡∫ö‡ªç‡ªà‡∫û‡∫ª‡∫ö‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô‡∫ó‡∫µ‡ªà‡∫Å‡ªà‡∫Ω‡∫ß‡∫Ç‡ªâ‡∫≠‡∫á
        3. ‡∫•‡∫∞‡∫ö‡∫∏‡ªÅ‡∫´‡∫º‡ªà‡∫á‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô‡∫ó‡∫µ‡ªà‡ªÉ‡∫ä‡ªâ‡ªÉ‡∫ô‡∫Å‡∫≤‡∫ô‡∫ï‡∫≠‡∫ö
        4. ‡∫ï‡∫≠‡∫ö‡ªÄ‡∫õ‡∫±‡∫ô‡∫û‡∫≤‡∫™‡∫≤‡∫•‡∫≤‡∫ß‡ªÄ‡∫ó‡∫ª‡ªà‡∫≤‡∫ô‡∫±‡ªâ‡∫ô, ‡ªÉ‡∫´‡ªâ‡∫Ñ‡∫≥‡∫ï‡∫≠‡∫ö‡∫ó‡∫µ‡ªà‡∫ä‡∫±‡∫î‡ªÄ‡∫à‡∫ô ‡ªÅ‡∫•‡∫∞ ‡∫•‡∫∞‡∫≠‡∫Ω‡∫î
        5. ‡∫ï‡∫≠‡∫ö‡ªÉ‡∫´‡ªâ‡ªÄ‡∫õ‡∫±‡∫ô Format markdown
                
        ‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô‡∫≠‡ªâ‡∫≤‡∫á‡∫≠‡∫µ‡∫á:
        {context}

        ‡∫Ñ‡ªç‡∫≤‡∫ñ‡∫≤‡∫°: {question}
        """
        
        # ‡∫™‡ªâ‡∫≤‡∫á‡∫Ñ‡ªç‡∫≤‡∫ï‡∫≠‡∫ö
        try:
            response = self.llm.invoke(prompt)
            answer = response.content.strip()
            
            return {
                'question': question,
                'answer': answer,
                'sources': docs,
                'metadata': {
                    'total_sources': len(docs),
                    'avg_score': np.mean([doc.get('rerank_score', doc['score']) for doc in docs]),
                    'features_used': {
                        'query_rewriting': kwargs.get('use_rewriting', True),
                        'hyde': kwargs.get('use_hyde', True),
                        'reranking': kwargs.get('use_reranking', True)
                    }
                }
            }
            
        except Exception as e:
            return {"error": f"‡ªÄ‡∫Å‡∫µ‡∫î‡∫Ç‡ªç‡ªâ‡∫ú‡∫¥‡∫î‡∫û‡∫≤‡∫î‡ªÉ‡∫ô‡∫Å‡∫≤‡∫ô‡∫™‡ªâ‡∫≤‡∫á‡∫Ñ‡ªç‡∫≤‡∫ï‡∫≠‡∫ö: {str(e)}"}

In [53]:
# ==================== MAIN USAGE ====================

def main():
    """
    ‡∫ï‡∫ª‡∫ß‡∫¢‡ªà‡∫≤‡∫á‡∫Å‡∫≤‡∫ô‡ªÉ‡∫ä‡ªâ‡∫á‡∫≤‡∫ô
    """
    # ‡∫™‡ªâ‡∫≤‡∫á RAG system
    rag = StreamlinedAdvancedRAG(
        collection_name="pdf_documents",
        anthropic_api_key=os.getenv("ANTHROPIC_API_KEY")
    ) 
    
    # ‡∫ñ‡∫≤‡∫°‡∫Ñ‡ªç‡∫≤‡∫ñ‡∫≤‡∫°
    result = rag.ask(
        question="SMS Banking Package ‡∫™‡∫∞‡ªù‡∫±‡∫Å‡ªÅ‡∫ô‡∫ß‡ªÉ‡∫î ‡ªÅ‡∫•‡∫∞ ‡∫°‡∫µ Package ‡∫ç‡∫±‡∫á‡ªÅ‡∫ô‡ªà?",
        n_results=20, # ‡∫à‡∫≥‡∫ô‡∫ß‡∫ô‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô‡∫ó‡∫µ‡ªà‡∫à‡∫∞‡∫Ñ‡∫ª‡ªâ‡∫ô‡∫´‡∫≤
        use_rewriting=True,    # ‡ªÉ‡∫ä‡ªâ LLM ‡∫õ‡∫±‡∫ö‡∫õ‡∫∏‡∫á‡∫Ñ‡ªç‡∫≤‡∫ñ‡∫≤‡∫°  True = ‡ªÄ‡∫õ‡∫µ‡∫î / False = ‡∫õ‡∫¥‡∫î
        use_hyde=True,        # ‡∫ö‡ªç‡ªà‡ªÉ‡∫ä‡ªâ HyDE  True = ‡ªÄ‡∫õ‡∫µ‡∫î / False = ‡∫õ‡∫¥‡∫î
        use_reranking=True,   # ‡∫ö‡ªç‡ªà‡ªÉ‡∫ä‡ªâ re-ranking  True = ‡ªÄ‡∫õ‡∫µ‡∫î / False = ‡∫õ‡∫¥‡∫î
        top_k=10 # ‡∫à‡∫≥‡∫ô‡∫ß‡∫ô Ranking ‡∫ó‡∫µ‡ªà‡∫à‡∫∞‡∫Ñ‡∫ª‡ªâ‡∫ô‡∫´‡∫≤
    )
    
    if 'error' not in result:
        display(Markdown(f"\n‚úÖ ‡∫Ñ‡ªç‡∫≤‡∫ï‡∫≠‡∫ö: {result['answer']}"))   
        display(Markdown(f"\nüìä ‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô:")) 
        display(Markdown(f"   Sources: {result['metadata']['total_sources']}")) 
        display(Markdown(f"   Avg Score: {result['metadata']['avg_score']:.3f}")) 
        display(Markdown(f"   Features: {result['metadata']['features_used']}"))  
    else:
        print(f"\n‚ùå {result['error']}")
        

In [54]:
if __name__ == "__main__":
    main()

‚úÖ Streamlined Advanced RAG ready! (95 documents)

‚ùì ‡∫Ñ‡ªç‡∫≤‡∫ñ‡∫≤‡∫°: SMS Banking Package ‡∫™‡∫∞‡ªù‡∫±‡∫Å‡ªÅ‡∫ô‡∫ß‡ªÉ‡∫î ‡ªÅ‡∫•‡∫∞ ‡∫°‡∫µ Package ‡∫ç‡∫±‡∫á‡ªÅ‡∫ô‡ªà?
üöÄ Advanced search: 'SMS Banking Package ‡∫™‡∫∞‡ªù‡∫±‡∫Å‡ªÅ‡∫ô‡∫ß‡ªÉ‡∫î ‡ªÅ‡∫•‡∫∞ ‡∫°‡∫µ Package ‡∫ç‡∫±‡∫á‡ªÅ‡∫ô‡ªà?'
üîÑ Query rewritten: 'SMS Banking Package ‡∫™‡∫∞‡ªù‡∫±‡∫Å‡ªÅ‡∫ô‡∫ß‡ªÉ‡∫î ‡ªÅ‡∫•‡∫∞ ‡∫°‡∫µ Package ‡∫ç‡∫±‡∫á‡ªÅ‡∫ô‡ªà?' ‚Üí '‡∫ß‡∫¥‡∫ó‡∫µ‡∫™‡∫∞‡ªù‡∫±‡∫Å‡∫ô‡∫≥‡ªÉ‡∫ä‡ªâ‡∫ö‡ªç‡∫•‡∫¥‡∫Å‡∫≤‡∫ô SMS Banking ‡∫Ç‡∫≠‡∫á‡∫ó‡∫∞‡∫ô‡∫≤‡∫Ñ‡∫≤‡∫ô ‡ªÅ‡∫•‡∫∞ ‡∫°‡∫µ‡ªÅ‡∫û‡∫±‡∫Å‡ªÄ‡∫Å‡∫±‡∫î‡ªÉ‡∫î‡ªÅ‡∫î‡ªà‡∫ó‡∫µ‡ªà‡∫™‡∫≤‡∫°‡∫≤‡∫î‡ªÄ‡∫•‡∫∑‡∫≠‡∫Å‡ªÑ‡∫î‡ªâ?'
üìù HyDE generated: 619 chars
üîç Searching with query 1/3
üîç Searching with query 2/3
üîç Searching with query 3/3
üìÑ Retrieved 20 unique documents
üîÑ Re-ranking 20 ‚Üí 10 documents...
‚úÖ Re-ranking completed



‚úÖ ‡∫Ñ‡ªç‡∫≤‡∫ï‡∫≠‡∫ö: ‡∫≠‡∫µ‡∫á‡∫ï‡∫≤‡∫°‡ªÄ‡∫≠‡∫Å‡∫∞‡∫™‡∫≤‡∫ô‡∫ó‡∫µ‡ªà‡ªÉ‡∫´‡ªâ‡∫°‡∫≤, ‡∫Ç‡ªâ‡∫≠‡∫ç‡∫Ç‡ªç‡∫ï‡∫≠‡∫ö‡∫Å‡ªà‡∫Ω‡∫ß‡∫Å‡∫±‡∫ö SMS Banking Package ‡∫î‡∫±‡ªà‡∫á‡∫ô‡∫µ‡ªâ:

## ‡∫ß‡∫¥‡∫ó‡∫µ‡∫Å‡∫≤‡∫ô‡∫™‡∫∞‡ªù‡∫±‡∫Å SMS Banking Package:

‡∫ó‡ªà‡∫≤‡∫ô‡∫™‡∫≤‡∫°‡∫≤‡∫î‡∫™‡∫∞‡ªù‡∫±‡∫Å Package ‡ªÄ‡∫û‡∫µ‡ªà‡∫°‡ªÑ‡∫î‡ªâ‡ªÇ‡∫î‡∫ç:
- **‡∫û‡∫¥‡∫° B10 [‡∫ç‡∫∞‡∫´‡∫ß‡ªà‡∫≤‡∫á] <‡ªÄ‡∫•‡∫Å‡∫ö‡∫±‡∫ô‡∫ä‡∫µ> ‡ªÅ‡∫•‡ªâ‡∫ß‡∫™‡∫ª‡ªà‡∫á‡ªÄ‡∫ö‡∫µ 1444**

## Package ‡∫ó‡∫µ‡ªà‡∫°‡∫µ‡ªÉ‡∫´‡ªâ‡ªÄ‡∫•‡∫∑‡∫≠‡∫Å:

| ‡∫•‡∫∞‡∫´‡∫±‡∫î Package | ‡∫à‡∫≥‡∫ô‡∫ß‡∫ô‡ªÄ‡∫á‡∫¥‡∫ô (‡∫Å‡∫µ‡∫ö) | ‡∫à‡∫≥‡∫ô‡∫ß‡∫ô‡∫ß‡∫±‡∫ô‡∫ó‡∫µ‡ªà‡ªÉ‡∫ä‡ªâ‡ªÑ‡∫î‡ªâ | ‡∫à‡∫≥‡∫ô‡∫ß‡∫ô‡∫Ç‡ªç‡ªâ‡∫Ñ‡∫ß‡∫≤‡∫° |
|--------------|----------------|-----------------|--------------|
| **B05** | 5,000 | 365 ‡∫ß‡∫±‡∫ô | 30 ‡∫Ç‡ªç‡ªâ‡∫Ñ‡∫ß‡∫≤‡∫° |
| **B10** | 10,000 | 365 ‡∫ß‡∫±‡∫ô | 60 ‡∫Ç‡ªç‡ªâ‡∫Ñ‡∫ß‡∫≤‡∫° |
| **B25** | 25,000 | 365 ‡∫ß‡∫±‡∫ô | 150 ‡∫Ç‡ªç‡ªâ‡∫Ñ‡∫ß‡∫≤‡∫° |
| **B50** | 50,000 | 365 ‡∫ß‡∫±‡∫ô | 300 ‡∫Ç‡ªç‡ªâ‡∫Ñ‡∫ß‡∫≤‡∫° |
| **B100** | 100,000 | 365 ‡∫ß‡∫±‡∫ô | 600 ‡∫Ç‡ªç‡ªâ‡∫Ñ‡∫ß‡∫≤‡∫° |

## ‡∫ü‡∫±‡∫á‡∫ä‡∫±‡∫ô‡∫≠‡∫∑‡ªà‡∫ô‡ªÜ‡∫ó‡∫µ‡ªà‡ªÄ‡∫õ‡∫±‡∫ô‡∫õ‡∫∞‡ªÇ‡∫´‡∫ç‡∫î:
- ‡∫Å‡∫ß‡∫î Package ‡∫õ‡∫±‡∫î‡∫à‡∫∏‡∫ö‡∫±‡∫ô: ‡∫û‡∫¥‡∫° **C [‡∫ç‡∫∞‡∫´‡∫ß‡ªà‡∫≤‡∫á] <‡ªÄ‡∫•‡∫Å‡∫ö‡∫±‡∫ô‡∫ä‡∫µ>** ‡ªÅ‡∫•‡ªâ‡∫ß‡∫™‡∫ª‡ªà‡∫á‡ªÄ‡∫ö‡∫µ 1444
- ‡∫ó‡∫∏‡∫Å Package ‡∫™‡∫≤‡∫°‡∫≤‡∫î‡ªÉ‡∫ä‡ªâ‡ªÑ‡∫î‡ªâ 365 ‡∫ß‡∫±‡∫ô (1 ‡∫õ‡∫µ‡ªÄ‡∫ï‡∫±‡∫°)

**‡ªÅ‡∫´‡∫º‡ªà‡∫á‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô:** Document 3 - ‡∫õ‡∫∑‡ªâ‡∫°‡∫™‡∫±‡∫á‡∫•‡∫ß‡∫°‡∫ú‡∫∞‡∫•‡∫¥‡∫î‡∫ï‡∫∞‡∫û‡∫±‡∫ô‡∫ó‡∫±‡∫á‡ªù‡∫ª‡∫î‡∫Ç‡∫≠‡∫á ‡∫ó‡∫Ñ‡∫ï‡∫•_2020_Update.2 ‡ªú‡ªâ‡∫≤‡∫ó‡∫µ 15


üìä ‡∫Ç‡ªç‡ªâ‡∫°‡∫π‡∫ô:

   Sources: 10

   Avg Score: 4.395

   Features: {'query_rewriting': True, 'hyde': True, 'reranking': True}