**1. Built-in Document Loaders
SimpleDirectoryReader**


Supports: PDFs, Word docs, PowerPoint, images, audio, video, markdown, HTML, JSON, CSV

In [None]:
from llama_index.core import SimpleDirectoryReader

# Load everything from a directory
documents = SimpleDirectoryReader("./data").load_data()

# Filter by file types
documents = SimpleDirectoryReader(
    "./data",
        required_exts=[".pdf", ".docx", ".txt"]
        ).load_data()

# Recursive directory scanning
documents = SimpleDirectoryReader(
    "./data",
        recursive=True
        ).load_data()


**2. Specialized Data Connectors from LlamaHub**



Database Integration

In [None]:
from llama_index.readers.database import DatabaseReader

# Connect to any SQL database
reader = DatabaseReader(scheme="postgresql",
                        host="localhost",port=5432,
                        user="admin",password="password",
                        dbname="company_db"
                        )

# Natural language queries to SQL
query = "SELECT * FROM customers WHERE signup_date > '2024-01-01'"
documents = reader.load_data(query=query)


Google Workspace Integration

In [None]:
from llama_index.readers.google import GoogleDocsReader

# Load Google Docs directly
loader = GoogleDocsReader()
gdoc_ids = ['1wf-y2pd9C878Oh-FmLH7Q_BQkljdm6TQal-c1pUfrec']
documents = loader.load_data(document_ids=gdoc_ids)


Web Scraping & APIs

In [None]:
from llama_index.readers.web import SimpleWebPageReader

# Scrape web pages
documents = SimpleWebPageReader(html_to_text=True).load_data([
    "https://example.com/page1",
        "https://example.com/page2"

"
        ])

**3. Advanced Preprocessing with Custom Transformations**

Custom Node Parser

In [None]:
from llama_index.core.node_parser import NodeParser
from llama_index.core.schema import Document, TextNode

class CustomCodeParser(NodeParser):
    def get_nodes_from_documents(self, documents):
        nodes = []
        for doc in documents:
            # Split code by functions
            functions = self.extract_functions(doc.text)
            for func_name, func_code in functions:
                node = TextNode(
                    text=func_code,
                    metadata={
                        "function_name": func_name,
                        "file_path": doc.metadata.get("file_path"),
                        "language": "python"
                    }
                )
                nodes.append(node)
        return nodes


Pipeline with Custom Transformations

In [None]:
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.extractors import TitleExtractor, QuestionsAnsweredExtractor

# Custom preprocessing pipeline
pipeline = IngestionPipeline(
    transformations=[
        # Custom text cleaning
        lambda docs: [self.clean_text(doc) for doc in docs],

        # Extract titles and questions automatically
        TitleExtractor(nodes=5),
        QuestionsAnsweredExtractor(questions=3),

        # Custom metadata enrichment
        lambda docs: self.add_custom_metadata(docs),

        # Smart chunking
        SentenceSplitter(chunk_size=1024, chunk_overlap=200)
    ]
)

nodes = pipeline.run(documents=documents)


**4. Smart Chunking Strategies**

Hierarchical Document Parsing

In [None]:
from llama_index.core.node_parser import HierarchicalNodeParser

# Preserve document structure
parser = HierarchicalNodeParser.from_defaults(
    chunk_sizes=[2048, 512, 128]  # Multi-level chunks
)
nodes = parser.get_nodes_from_documents(documents)


Code-Aware Splitting

In [None]:
from llama_index.core.node_parser import CodeSplitter

# Split code intelligently
splitter = CodeSplitter(
    language="python",
    chunk_lines=40,  # Max lines per chunk
    chunk_lines_overlap=15,
    max_chars=1500
)
nodes = splitter.get_nodes_from_documents(code_documents)


**5. Metadata Extraction & Enrichment**

Automatic Metadata Extractors

In [None]:
from llama_index.core.extractors import (
    TitleExtractor,
    QuestionsAnsweredExtractor,
    SummaryExtractor,
    KeywordExtractor
)

# Chain multiple extractors
extractors = [
    TitleExtractor(nodes=5),
    QuestionsAnsweredExtractor(questions=3),
    SummaryExtractor(summaries=["prev", "self"]),
    KeywordExtractor(keywords=10)
]

# Apply to pipeline
pipeline = IngestionPipeline(transformations=extractors)
enriched_nodes = pipeline.run(documents=documents)


Custom Metadata Enrichment

In [None]:
def add_business_metadata(documents):
    for doc in documents:
        # Extract business-specific info
        doc.metadata.update({
            "department": extract_department(doc.text),
            "document_type": classify_document(doc.text),
            "urgency_level": assess_urgency(doc.text),
            "stakeholders": extract_people(doc.text)
        })
    return documents


Structured Data Extraction

In [None]:
from llama_index.core import PromptTemplate
from llama_index.core.output_parsers import PydanticOutputParser
from pydantic import BaseModel
from typing import List

class CompanyInfo(BaseModel):
    name: str
    industry: str
    key_products: List[str]
    revenue: str

# Custom prompt for extraction
extraction_prompt = PromptTemplate(
    "Extract company information from the following text:\n"
    "{context_str}\n"
    "Return the information in the specified JSON format."
)

output_parser = PydanticOutputParser(CompanyInfo)

query_engine = index.as_query_engine(
    output_parser=output_parser,
    text_qa_template=extraction_prompt
)

Code analysis & QnA

In [None]:
from llama_index.core import SimpleDirectoryReader
from llama_index.llms.ollama import Ollama

# Use CodeLlama for code analysis
Settings.llm = Ollama(model="codellama:7b")

# Load code files
code_documents = SimpleDirectoryReader(
    "./src",
    file_extractor={
        ".py": "python",
        ".js": "javascript",
        ".java": "java"
    }
).load_data()

# Create specialized index for code
code_index = VectorStoreIndex.from_documents(
    code_documents,
    storage_context=storage_context
)

**6. Real-World Integration Examples**

Enterprise Document Processing

In [None]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext, Settings
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
import pinecone


# Complete enterprise pipeline
def build_enterprise_rag():
    # 1. Load from multiple sources
    pdf_docs = SimpleDirectoryReader("./pdfs").load_data()
    db_docs = DatabaseReader(**db_config).load_data(query="SELECT * FROM documents")
    web_docs = SimpleWebPageReader().load_data(urls)

    # 2. Custom preprocessing
    all_docs = pdf_docs + db_docs + web_docs

    # 3. Advanced pipeline
    pipeline = IngestionPipeline(
        transformations=[
            # OCR for scanned PDFs
            OCRProcessor(),
            # Clean and normalize
            TextCleaner(),
            # Extract structured data
            TitleExtractor(),
            KeywordExtractor(keywords=15),
            # Intelligent chunking
            SentenceSplitter(chunk_size=1024),
            # Company-specific metadata
            CompanyMetadataExtractor()
        ]
    )

    # 4. Process and index
    nodes = pipeline.run(documents=all_docs)

    # 1. Initialize Pinecone
    pinecone.init(
        api_key="your-pinecone-api-key",
        environment="us-east-1-aws"  # your region
    )

    # 2. Create/connect to Pinecone index
    index_name = "document-index"
    if index_name not in pinecone.list_indexes():
        pinecone.create_index(
            name=index_name,
            dimension=1024,  # MistralAI embedding dimension
            metric="cosine"
        )

    pinecone_index = pinecone.Index(index_name)

    # 3. Create PineconeVectorStore
    vector_store = PineconeVectorStore(
        pinecone_index=pinecone_index,
        namespace="production",  # optional
        metadata_filters={"category": "technical"}   # # Advanced vector store setup with metadata filtering
    )

    # 4. Create StorageContext with Pinecone
    storage_context = StorageContext.from_defaults(
        vector_store=vector_store
    )

    # 5. Set embedding model (IMPORTANT: must match Pinecone dimension)
    Settings.embed_model = MistralAIEmbeddings(
                                model="mistral-embed",
                                api_key=mistral_key,
                                max_retries=5,
                                # request_timeout=60,
                            )

    # 6. Create VectorStoreIndex from your processed nodes
    index = VectorStoreIndex(
        nodes=nodes,  # Your processed nodes from pipeline
        storage_context=storage_context,
        show_progress=True
    )

    print(f"✅ Index created with {len(nodes)} nodes in Pinecone!")

    # 7. Create query engine and test
    query_engine = index.as_query_engine(similarity_top_k=5,response_mode="tree_summarize")
    # response = query_engine.query("Your question here")
    # print(response)

    return query_engine


Multi-modal Data Extraction

In [None]:
from llama_index.core import SimpleDirectoryReader
from llama_index.multi_modal_llms.ollama import OllamaMultiModal

# Use multi-modal model
mm_llm = OllamaMultiModal(model="llava:7b")

# Load documents with images
documents = SimpleDirectoryReader(
    "./mixed_content",
    file_extractor={
        ".jpg": "image",
        ".png": "image",
        ".pdf": "pdf"
    }
).load_data()

index = VectorStoreIndex.from_documents(
    documents,
    storage_context=storage_context
)

query_engine = index.as_query_engine(llm=mm_llm)

Knowledge Graph Construction

In [None]:
from llama_index.core import KnowledgeGraphIndex
from llama_index.graph_stores.simple import SimpleGraphStore

# Create knowledge graph
graph_store = SimpleGraphStore()
storage_context = StorageContext.from_defaults(
    vector_store=vector_store,
    graph_store=graph_store
)

kg_index = KnowledgeGraphIndex.from_documents(
    documents,
    storage_context=storage_context,
    max_triplets_per_chunk=10
)

# Query the knowledge graph
kg_query_engine = kg_index.as_query_engine(
    include_text=False,
    response_mode="tree_summarize"
)

response = kg_query_engine.query("What are the relationships between different entities?")

SQL Database + Natural Language Interface

In [None]:
from llama_index.core.query_engine import NLSQLTableQueryEngine
from llama_index.core import SQLDatabase

# Connect to production database
engine = create_engine("postgresql://user:pass@host/db")
sql_database = SQLDatabase(engine, include_tables=["customers", "orders", "products"])

# Natural language to SQL
query_engine = NLSQLTableQueryEngine(
    sql_database=sql_database,
    tables=["customers", "orders"],
    verbose=True
)

# Ask business questions
response = query_engine.query("Who are our top 5 customers by revenue this quarter?")


Document Comparison & Summarization

In [None]:
from llama_index.core.query_engine import SubQuestionQueryEngine
from llama_index.core.tools import QueryEngineTool

# Create separate indices for different document sets
index1 = VectorStoreIndex.from_documents(doc_set_1, storage_context=storage_context)
index2 = VectorStoreIndex.from_documents(doc_set_2, storage_context=storage_context)

# Create query engine tools
tool1 = QueryEngineTool.from_defaults(
    query_engine=index1.as_query_engine(),
    description="Contains information about Product A"
)

tool2 = QueryEngineTool.from_defaults(
    query_engine=index2.as_query_engine(),
    description="Contains information about Product B"
)

# Sub-question query engine for comparison
comparison_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=[tool1, tool2]
)

response = comparison_engine.query("Compare Product A and Product B features")

**7. Production-Ready Features**

Caching & Incremental Updates

In [None]:
from llama_index.core.ingestion import IngestionCache

# Persistent caching for large datasets
cache = IngestionCache(
    cache_dir="./ingestion_cache",
    collection="company_docs"
)

pipeline = IngestionPipeline(
    transformations=[...],
    cache=cache
)

# Only processes new/changed documents
nodes = pipeline.run(documents=new_documents)


Async Processing for Scale

In [None]:
import asyncio
from llama_index.core.ingestion import arun_transformations

async def process_large_dataset():
    # Process 10,000+ documents efficiently
    batches = chunk_documents(all_documents, batch_size=100)

    tasks = []
    for batch in batches:
        task = arun_transformations(
            documents=batch,
            transformations=pipeline.transformations
        )
        tasks.append(task)

    # Process all batches concurrently
    results = await asyncio.gather(*tasks)
    return flatten(results)


**Monitoring & Debugging**

In [None]:
import logging

# Enable debug logging
logging.basicConfig(level=logging.DEBUG)

# Add callbacks for monitoring
from llama_index.core.callbacks import CallbackManager, LlamaDebugHandler

llama_debug = LlamaDebugHandler(print_trace_on_end=True)
callback_manager = CallbackManager([llama_debug])

Settings.callback_manager = callback_manager

# Query with tracing
query_engine = index.as_query_engine()
response = query_engine.query("Your question")

# Print event traces
llama_debug.print_trace()

##**Full-fledged Enterprise Document Processing**

In [None]:

import pinecone
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.embeddings.mistralai import MistralAIEmbedding
from llama_index.core import Settings
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.extractors import TitleExtractor, KeywordExtractor
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import SimpleDirectoryReader
from llama_index.readers.database import DatabaseReader
from llama_index.readers.web import SimpleWebPageReader

def create_pinecone_index_with_llamaindex():
    """Complete example: Process documents and store in Pinecone using LlamaIndex"""

    # 1. Initialize Pinecone - ServerSpec or Serverless
    pinecone.init(
        api_key="your-pinecone-api-key",
        environment="us-east-1-aws"  # or your preferred region
    )

    # 2. Create or connect to Pinecone index
    index_name = "document-index"

    # Check if index exists, create if not
    if index_name not in pinecone.list_indexes():
        pinecone.create_index(
            name=index_name,
            dimension=1024,  # MistralAI embedding dimension
            metric="cosine"
        )

    # Get the Pinecone index
    pinecone_index = pinecone.Index(index_name)

    # 3. Create PineconeVectorStore
    vector_store = PineconeVectorStore(
        pinecone_index=pinecone_index,
        namespace="production",  # optional namespace for organization
        metadata_filters={"category": "technical"}   # # Advanced vector store setup with metadata filtering
    )

    # 4. Create StorageContext with Pinecone
    storage_context = StorageContext.from_defaults(
        vector_store=vector_store
    )

    # 5. Set up embedding model (important: must match Pinecone dimension)
    embed_model = MistralAIEmbeddings(
                      model="mistral-embed",
                      api_key=mistral_key,
                      max_retries=5,
                      # request_timeout=60,
                  )

    # Configure LlamaIndex settings globally
    Settings.embed_model = embed_model

    # 6. Load from multiple sources (your existing code)
    pdf_docs = SimpleDirectoryReader("./pdfs").load_data()

    # Database configuration
    db_config = {
        "scheme": "postgresql",
        "host": "localhost",
        "port": 5432,
        "user": "your_user",
        "password": "your_password",
        "dbname": "your_db"
    }
    db_docs = DatabaseReader(**db_config).load_data(query="SELECT * FROM documents")

    urls = ["https://example.com/page1", "https://example.com/page2"]
    web_docs = SimpleWebPageReader().load_data(urls)

    # Combine all documents
    all_docs = pdf_docs + db_docs + web_docs

    # 7. Create advanced processing pipeline (your existing code)
    pipeline = IngestionPipeline(
        transformations=[
            # Custom text cleaning (you'd implement these)
            # OCRProcessor(),
            # TextCleaner(),

            # Built-in LlamaIndex extractors
            TitleExtractor(nodes=5),
            KeywordExtractor(keywords=15),

            # Intelligent chunking
            SentenceSplitter(
                chunk_size=1024,
                chunk_overlap=200
            ),

            # Custom metadata extractor (you'd implement this)
            # CompanyMetadataExtractor()
        ]
    )

    # 8. Process documents into nodes
    nodes = pipeline.run(documents=all_docs)
    print(f"Processed {len(nodes)} nodes from {len(all_docs)} documents")

    # 9. Create VectorStoreIndex from processed nodes
    index = VectorStoreIndex(
        nodes=nodes,
        storage_context=storage_context,
        # embed_model is already set globally via Settings
        show_progress=True  # Show embedding progress
    )

    print(f"✅ Successfully created index with {len(nodes)} nodes in Pinecone")
    return index

def create_index_from_existing_nodes(processed_nodes):
    """Alternative: Create index from already processed nodes"""

    # Initialize Pinecone - ServerSpec or Serverless
    pinecone.init(
        api_key="your-pinecone-api-key",
        environment="us-east-1-aws"
    )

    # Connect to existing index
    pinecone_index = pinecone.Index("document-index")

    # Create vector store and storage context
    vector_store = PineconeVectorStore(
        pinecone_index=pinecone_index,
        namespace="production"
    )

    storage_context = StorageContext.from_defaults(
        vector_store=vector_store
    )

    # Set embedding model
    Settings.embed_model = MistralAIEmbeddings(
                              model="mistral-embed",
                              api_key=mistral_key,
                              max_retries=5,
                              # request_timeout=60,
                          )

    # Create index directly from processed nodes
    index = VectorStoreIndex(
        nodes=processed_nodes,  # Your processed nodes from the pipeline
        storage_context=storage_context,
        show_progress=True
    )

    return index

def query_the_index(index):
    """Example of querying the created index"""

    # Create query engine
    query_engine = index.as_query_engine(
        similarity_top_k=5,  # Return top 5 similar chunks
        response_mode="tree_summarize"  # or "compact", "refine"
    )

    # Query the index
    response = query_engine.query(
        "What are the main topics discussed in the documents?"
    )

    print("Query Response:")
    print(response)

    # Access source nodes for transparency
    print("\nSource nodes:")
    for node in response.source_nodes:
        print(f"- Score: {node.score:.3f}")
        print(f"  Text: {node.text[:100]}...")
        print(f"  Metadata: {node.metadata}")

# Advanced configuration options
def advanced_pinecone_configuration():
    """Show advanced Pinecone configuration options"""

    # Initialize with custom settings - ServerSpec
    pinecone.init(
        api_key="your-pinecone-api-key",
        environment="us-east-1-aws"
    )

    # Create index with advanced settings
    index_name = "advanced-document-index"

    if index_name not in pinecone.list_indexes():
        pinecone.create_index(
            name=index_name,
            dimension=1024,
            metric="cosine",
            pods=2,  # Scale up for better performance
            replicas=1,  # Add replicas for high availability
            pod_type="p1.x1"  # Performance optimized pods
        )

    pinecone_index = pinecone.Index(index_name)

    # Create vector store with advanced options
    vector_store = PineconeVectorStore(
        pinecone_index=pinecone_index,
        namespace="production",
        text_key="content",  # Custom field name for text
        add_sparse_vector=True,  # Enable hybrid search (if supported)
        batch_size=100  # Batch size for uploads
    )

    storage_context = StorageContext.from_defaults(
        vector_store=vector_store
    )

    return storage_context

def batch_processing_for_large_datasets(all_docs):
    """Handle large datasets efficiently"""

    # Setup Pinecone as before - ServerSpec or Serverless
    pinecone.init(api_key="your-key", environment="us-east-1-aws")
    vector_store = PineconeVectorStore(
        pinecone_index=pinecone.Index("document-index"),
        namespace="batch_processing"
    )
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    Settings.embed_model = MistralAIEmbeddings(
                                model="mistral-embed",
                                api_key=mistral_key,
                                max_retries=5,
                                # request_timeout=60,
                          )

    # Process in batches to avoid memory issues
    batch_size = 1000  # Adjust based on your memory constraints

    for i in range(0, len(all_docs), batch_size):
        batch_docs = all_docs[i:i + batch_size]
        print(f"Processing batch {i//batch_size + 1}: {len(batch_docs)} documents")

        # Create pipeline for this batch
        pipeline = IngestionPipeline(
            transformations=[
                TitleExtractor(nodes=5),
                KeywordExtractor(keywords=15),
                SentenceSplitter(chunk_size=1024, chunk_overlap=200)
            ]
        )

        # Process batch
        batch_nodes = pipeline.run(documents=batch_docs)

        # Add to existing index or create new one for first batch
        if i == 0:
            # Create initial index
            index = VectorStoreIndex(
                nodes=batch_nodes,
                storage_context=storage_context,
                show_progress=True
            )
        else:
            # Add to existing index
            for node in batch_nodes:
                index.insert(node)

        print(f"✅ Batch {i//batch_size + 1} processed and added to Pinecone")

    return index

# Example usage
if __name__ == "__main__":
    # Complete workflow example
    index = create_pinecone_index_with_llamaindex()

    # Query the created index
    query_the_index(index)

    print("\n" + "="*50)
    print("Pinecone + LlamaIndex integration complete! 🚀")


###**Performance Tips**

    Chunking Strategy: Use semantic splitters for better context preservation
    Embedding Models: BGE models often provide best quality/performance ratio
    Quantization: Use 4-bit quantization for local models to reduce memory
    Batch Processing: Process documents in batches for large datasets
    Caching: Enable caching for frequently accessed embeddings

**Open Source Models**

In [None]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# BGE models (recommended)
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
# or BAAI/bge-base-en-v1.5, BAAI/bge-large-en-v1.5

# E5 models
embed_model = HuggingFaceEmbedding(model_name="intfloat/e5-small-v2")

# UAE models
embed_model = HuggingFaceEmbedding(model_name="WhereIsAI/UAE-Large-V1")

# Nomic Embed (fully open source)
from llama_index.embeddings.nomic import NomicEmbedding
embed_model = NomicEmbedding(
    api_key="your_nomic_api_key",
    model_name="nomic-embed-text-v1",
    task_type="search_document"
)

In [None]:
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings
from transformers import BitsAndBytesConfig
import torch

# 4-bit quantization for memory efficiency
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

llm = HuggingFaceLLM(
    model_name="mistralai/Mistral-7B-Instruct-v0.1",
    tokenizer_name="mistralai/Mistral-7B-Instruct-v0.1",
    context_window=3900,
    max_new_tokens=256,
    model_kwargs={"quantization_config": quantization_config},
    generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
    device_map="auto",
)

embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)

Settings.llm = llm
Settings.embed_model = embed_model

**Ollama Models**

In [None]:
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings

# First run: ollama pull mistral (or llama3.1, mixtral)
llm = Ollama(
    model="mistral",  # or "llama3.1", "mixtral"
    request_timeout=30.0
)

embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)

Settings.llm = llm
Settings.embed_model = embed_model

**Mistral AI**

In [None]:
from llama_index.llms.mistralai import MistralAI
from llama_index.embeddings.mistralai import MistralAIEmbedding
from llama_index.core import Settings

# Set up Mistral LLM
llm = MistralAI(
    api_key="your_mistral_api_key",
    model="open-mixtral-8x22b",  # or "mistral-7b-instruct"
    temperature=0.1
)

# Set up Mistral embeddings
embed_model = MistralAIEmbedding(
    api_key="your_mistral_api_key",
    model_name="mistral-embed"
)

Settings.llm = llm
Settings.embed_model = embed_model

**Hybrid Search**

In [None]:
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.postprocessor import SentenceTransformerRerank

retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=20
)

reranker = SentenceTransformerRerank(
    model="cross-encoder/ms-marco-MiniLM-L-12-v2",
    top_n=5
)

query_engine = index.as_query_engine(
    retriever=retriever,
    node_postprocessors=[reranker]
    )

**Advanced retrival**

In [None]:
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor

# Custom retriever
retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=10,
)

# Post-processor to filter results
processor = SimilarityPostprocessor(similarity_cutoff=0.7)

# Query engine with custom retriever
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    node_postprocessors=[processor],
)

**Custom Response Synthesis**

In [None]:
from llama_index.core.response_synthesizers import TreeSummarize

response_synthesizer = TreeSummarize(
    summary_template="""
    Based on the context information:
    {context_str}

    Please provide a comprehensive answer to: {query_str}

    Format your response with clear sections and bullet points where appropriate.
    """
)

query_engine = index.as_query_engine(
    response_synthesizer=response_synthesizer
)

## Installations

In [None]:
# Core LlamaIndex
pip install llama-index-core

# For Pinecone vector store
pip install llama-index-vector-stores-pinecone
pip install pinecone-client

# For open source models
pip install llama-index-llms-huggingface
pip install llama-index-llms-ollama
pip install llama-index-llms-mistralai
pip install llama-index-embeddings-huggingface

# Optional: For quantization
pip install transformers torch bitsandbytes