# LlamaIndex Cheatsheet - Open Source Models + Pinecone

## Installation & Setup

```bash
# Core installations
pip install llama-index
pip install pinecone-client
pip install sentence-transformers
pip install transformers torch

# For specific model providers
pip install llama-index-llms-ollama
pip install llama-index-llms-huggingface
pip install llama-index-embeddings-huggingface
pip install llama-index-vector-stores-pinecone
```

## Basic Configuration

```python
import os
from llama_index.core import Settings
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.vector_stores.pinecone import PineconeVectorStore
from pinecone import Pinecone

# Setup Pinecone
pc = Pinecone(api_key="your-pinecone-api-key")
index = pc.Index("your-index-name")

# Configure global settings
Settings.llm = Ollama(model="llama3.1:8b", request_timeout=120.0)
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

# Setup vector store
vector_store = PineconeVectorStore(
    pinecone_index=index,
    namespace="default"
)
```

## Open Source Model Options

### LLMs via Ollama
```python
from llama_index.llms.ollama import Ollama

# Popular models
llama_3_1 = Ollama(model="llama3.1:8b")
codellama = Ollama(model="codellama:7b")
mistral = Ollama(model="mistral:7b")
phi3 = Ollama(model="phi3:mini")
```

### LLMs via HuggingFace
```python
from llama_index.llms.huggingface import HuggingFaceLLM

llm = HuggingFaceLLM(
    model_name="microsoft/DialoGPT-medium",
    tokenizer_name="microsoft/DialoGPT-medium",
    max_new_tokens=256,
    generate_kwargs={"do_sample": False}
)
```

### Embedding Models
```python
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# Popular embedding models
bge_small = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
bge_large = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5")
e5_base = HuggingFaceEmbedding(model_name="intfloat/e5-base-v2")
sentence_bert = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
```

## Use Case 1: Basic RAG with Document Loading

```python
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext
from llama_index.vector_stores.pinecone import PineconeVectorStore

# Load documents
documents = SimpleDirectoryReader("./data").load_data()

# Create storage context with Pinecone
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# Create index
index = VectorStoreIndex.from_documents(
    documents,
    storage_context=storage_context
)

# Query
query_engine = index.as_query_engine()
response = query_engine.query("What is the main topic of these documents?")
print(response)
```

## Use Case 2: Chat Engine for Conversational AI

```python
from llama_index.core import VectorStoreIndex
from llama_index.core.memory import ChatMemoryBuffer

# Create chat engine with memory
memory = ChatMemoryBuffer.from_defaults(token_limit=3000)

chat_engine = index.as_chat_engine(
    chat_mode="condense_plus_context",
    memory=memory,
    verbose=True
)

# Multi-turn conversation
response1 = chat_engine.chat("Tell me about machine learning")
response2 = chat_engine.chat("What are its main applications?")
response3 = chat_engine.chat("How does it relate to the previous topic?")
```

## Use Case 3: Multi-Document Analysis

```python
from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter

# Load multiple document types
documents = []
documents.extend(SimpleDirectoryReader("./pdfs").load_data())
documents.extend(SimpleDirectoryReader("./text_files").load_data())

# Custom chunking
node_parser = SentenceSplitter(chunk_size=512, chunk_overlap=50)
nodes = node_parser.get_nodes_from_documents(documents)

# Create index with custom nodes
index = VectorStoreIndex(nodes, storage_context=storage_context)

# Advanced querying
query_engine = index.as_query_engine(
    similarity_top_k=10,
    response_mode="tree_summarize"
)

response = query_engine.query("Compare the key findings across all documents")
```

## Use Case 4: Code Analysis and Q&A

```python
from llama_index.core import SimpleDirectoryReader
from llama_index.llms.ollama import Ollama

# Use CodeLlama for code analysis
Settings.llm = Ollama(model="codellama:7b")

# Load code files
code_documents = SimpleDirectoryReader(
    "./src",
    file_extractor={
        ".py": "python",
        ".js": "javascript",
        ".java": "java"
    }
).load_data()

# Create specialized index for code
code_index = VectorStoreIndex.from_documents(
    code_documents,
    storage_context=storage_context
)

query_engine = code_index.as_query_engine()
response = query_engine.query("Explain the main functions in this codebase")
```

## Use Case 5: Structured Data Extraction

```python
from llama_index.core import PromptTemplate
from llama_index.core.output_parsers import PydanticOutputParser
from pydantic import BaseModel
from typing import List

class CompanyInfo(BaseModel):
    name: str
    industry: str
    key_products: List[str]
    revenue: str

# Custom prompt for extraction
extraction_prompt = PromptTemplate(
    "Extract company information from the following text:\n"
    "{context_str}\n"
    "Return the information in the specified JSON format."
)

output_parser = PydanticOutputParser(CompanyInfo)

query_engine = index.as_query_engine(
    output_parser=output_parser,
    text_qa_template=extraction_prompt
)

response = query_engine.query("Extract company information from the documents")
```

## Use Case 6: Multi-Modal RAG (Text + Images)

```python
from llama_index.core import SimpleDirectoryReader
from llama_index.multi_modal_llms.ollama import OllamaMultiModal

# Use multi-modal model
mm_llm = OllamaMultiModal(model="llava:7b")

# Load documents with images
documents = SimpleDirectoryReader(
    "./mixed_content",
    file_extractor={
        ".jpg": "image",
        ".png": "image",
        ".pdf": "pdf"
    }
).load_data()

index = VectorStoreIndex.from_documents(
    documents,
    storage_context=storage_context
)

query_engine = index.as_query_engine(llm=mm_llm)
response = query_engine.query("Describe what you see in the images and how it relates to the text")
```

## Use Case 7: Knowledge Graph Construction

```python
from llama_index.core import KnowledgeGraphIndex
from llama_index.graph_stores.simple import SimpleGraphStore

# Create knowledge graph
graph_store = SimpleGraphStore()
storage_context = StorageContext.from_defaults(
    vector_store=vector_store,
    graph_store=graph_store
)

kg_index = KnowledgeGraphIndex.from_documents(
    documents,
    storage_context=storage_context,
    max_triplets_per_chunk=10
)

# Query the knowledge graph
kg_query_engine = kg_index.as_query_engine(
    include_text=False,
    response_mode="tree_summarize"
)

response = kg_query_engine.query("What are the relationships between different entities?")
```

## Use Case 8: Document Comparison and Summarization

```python
from llama_index.core.query_engine import SubQuestionQueryEngine
from llama_index.core.tools import QueryEngineTool

# Create separate indices for different document sets
index1 = VectorStoreIndex.from_documents(doc_set_1, storage_context=storage_context)
index2 = VectorStoreIndex.from_documents(doc_set_2, storage_context=storage_context)

# Create query engine tools
tool1 = QueryEngineTool.from_defaults(
    query_engine=index1.as_query_engine(),
    description="Contains information about Product A"
)

tool2 = QueryEngineTool.from_defaults(
    query_engine=index2.as_query_engine(),
    description="Contains information about Product B"
)

# Sub-question query engine for comparison
comparison_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=[tool1, tool2]
)

response = comparison_engine.query("Compare Product A and Product B features")
```

## Advanced Pinecone Configuration

```python
# Create Pinecone index with specific configuration
pc.create_index(
    name="llamaindex-demo",
    dimension=384,  # Match your embedding model dimension
    metric="cosine",
    spec=ServerlessSpec(
        cloud='aws',
        region='us-east-1'
    )
)

# Advanced vector store setup with metadata filtering
vector_store = PineconeVectorStore(
    pinecone_index=index,
    namespace="documents",
    text_key="content",
    metadata_filters={"category": "technical"}
)
```

## Performance Optimization Tips

```python
# 1. Optimize chunk sizes
from llama_index.core.node_parser import SentenceSplitter

node_parser = SentenceSplitter(
    chunk_size=1024,
    chunk_overlap=100,
    separator=" "
)

# 2. Use async for better performance
from llama_index.core import VectorStoreIndex
import asyncio

async def async_query():
    query_engine = index.as_query_engine()
    response = await query_engine.aquery("Your question here")
    return response

# 3. Batch processing
from llama_index.core.ingestion import IngestionPipeline

pipeline = IngestionPipeline(
    transformations=[
        node_parser,
        Settings.embed_model
    ],
    vector_store=vector_store
)

pipeline.run(documents=documents)
```

## Monitoring and Debugging

```python
import logging

# Enable debug logging
logging.basicConfig(level=logging.DEBUG)

# Add callbacks for monitoring
from llama_index.core.callbacks import CallbackManager, LlamaDebugHandler

llama_debug = LlamaDebugHandler(print_trace_on_end=True)
callback_manager = CallbackManager([llama_debug])

Settings.callback_manager = callback_manager

# Query with tracing
query_engine = index.as_query_engine()
response = query_engine.query("Your question")

# Print event traces
llama_debug.print_trace()
```

## Common Patterns

### Pattern 1: Hybrid Search (Vector + Keyword)
```python
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.postprocessor import SentenceTransformerRerank

retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=20
)

reranker = SentenceTransformerRerank(
    model="cross-encoder/ms-marco-MiniLM-L-12-v2",
    top_n=5
)

query_engine = index.as_query_engine(
    retriever=retriever,
    node_postprocessors=[reranker]
)
```

### Pattern 2: Custom Response Synthesis
```python
from llama_index.core.response_synthesizers import TreeSummarize

response_synthesizer = TreeSummarize(
    summary_template="""
    Based on the context information:
    {context_str}
    
    Please provide a comprehensive answer to: {query_str}
    
    Format your response with clear sections and bullet points where appropriate.
    """
)

query_engine = index.as_query_engine(
    response_synthesizer=response_synthesizer
)
```

## Error Handling Best Practices

```python
try:
    # Initialize components with error handling
    if not pc.list_indexes():
        pc.create_index(name="backup-index", dimension=384, metric="cosine")
    
    response = query_engine.query("Your question")
    
except Exception as e:
    print(f"Error during querying: {e}")
    # Fallback logic
    response = "I encountered an error processing your request."
```

This cheatsheet covers the most common LlamaIndex patterns using open-source models and Pinecone. Adjust the model names, chunk sizes, and configurations based on your specific requirements and hardware capabilities.