# Baseline RAG Implementation

This notebook implements a generic baseline RAG system that can be used with any source material. It uses:
- Cohere Embed English (cohere.embed-english-v3) for embeddings
- Claude 3.5 Sonnet for LLM responses
- Amazon OpenSearch for vector storage

## Features
- Generic document ingestion
- Vector similarity search
- Context-aware response generation

## Usage
1. Initialize the RAG system
2. Ingest documents (text content with optional metadata)
3. Query the system with natural language questions

In [None]:
import os
import json
import boto3
from typing import List, Dict, Any, Optional
from opensearchpy import OpenSearch, RequestsHttpConnection, helpers
from requests_aws4auth import AWS4Auth
from tqdm import tqdm

In [None]:
class AWSConfig:
    """AWS service configuration"""
    def __init__(self):
        self.bedrock = boto3.client('bedrock-runtime')
        self.region = boto3.Session().region_name
        
        # OpenSearch configuration
        self.opensearch_host = os.getenv('OPENSEARCH_HOST')
        if not self.opensearch_host:
            raise ValueError("OPENSEARCH_HOST environment variable is required")
        
        credentials = boto3.Session().get_credentials()
        self.awsauth = AWS4Auth(
            credentials.access_key,
            credentials.secret_key,
            self.region,
            'es',
            session_token=credentials.token
        )
        
        self.opensearch = OpenSearch(
            hosts=[{'host': self.opensearch_host, 'port': 443}],
            http_auth=self.awsauth,
            use_ssl=True,
            verify_certs=True,
            connection_class=RequestsHttpConnection
        )

In [None]:
class BaselineRAG:
    """Generic baseline RAG implementation"""
    def __init__(self, config: AWSConfig, index_name: str = "rag-documents"):
        self.config = config
        self.embedding_model_id = "cohere.embed-english-v3"
        self.llm_model_id = "anthropic.claude-3-5-sonnet-20240620-v1:0"
        self.index_name = index_name
        
        # Ensure index exists
        self._create_index_if_not_exists()
    
    def _create_index_if_not_exists(self):
        """Create OpenSearch index with appropriate mapping"""
        if not self.config.opensearch.indices.exists(self.index_name):
            mapping = {
                "mappings": {
                    "properties": {
                        "content": {"type": "text"},
                        "metadata": {"type": "object"},
                        "embedding": {
                            "type": "knn_vector",
                            "dimension": 1024  # Cohere embedding dimension
                        }
                    }
                }
            }
            
            self.config.opensearch.indices.create(
                index=self.index_name,
                body=mapping
            )
    
    def get_embeddings(self, text: str) -> List[float]:
        """Generate embeddings using Cohere model"""
        request_body = {
            "inputText": text
        }
        
        response = self.config.bedrock.invoke_model(
            modelId=self.embedding_model_id,
            body=json.dumps(request_body)
        )
        
        response_body = json.loads(response['body'].read())
        return response_body['embeddings']
    
    def ingest_documents(self, documents: List[Dict[str, Any]], batch_size: int = 100) -> None:
        """Ingest documents into vector store
        
        Args:
            documents: List of dictionaries with 'content' and optional 'metadata'
            batch_size: Number of documents to process in each batch
        """
        actions = []
        
        for doc in tqdm(documents, desc="Processing documents"):
            if 'content' not in doc:
                raise ValueError("Each document must have 'content' field")
                
            # Generate embedding
            embedding = self.get_embeddings(doc['content'])
            
            # Prepare document for indexing
            action = {
                "_index": self.index_name,
                "_source": {
                    "content": doc['content'],
                    "metadata": doc.get('metadata', {}),
                    "embedding": embedding
                }
            }
            actions.append(action)
            
            # Bulk index when batch is full
            if len(actions) >= batch_size:
                helpers.bulk(self.config.opensearch, actions)
                actions = []
        
        # Index any remaining documents
        if actions:
            helpers.bulk(self.config.opensearch, actions)
    
    def semantic_search(self, query: str, k: int = 3) -> List[Dict[str, Any]]:
        """Search for relevant documents using embeddings"""
        query_embedding = self.get_embeddings(query)
        
        script_query = {
            "script_score": {
                "query": {"match_all": {}},
                "script": {
                    "source": "cosineSimilarity(params.query_vector, 'embedding') + 1.0",
                    "params": {"query_vector": query_embedding}
                }
            }
        }
        
        response = self.config.opensearch.search(
            index=self.index_name,
            body={
                "size": k,
                "query": script_query,
                "_source": ["content", "metadata"]
            }
        )
        
        return [hit['_source'] for hit in response['hits']['hits']]
    
    def generate_response(self, query: str, context: List[Dict[str, Any]]) -> str:
        """Generate response using Claude 3.5 Sonnet"""
        context_str = "\n\n".join([doc['content'] for doc in context])
        
        prompt = f"""You are a helpful AI assistant. Use the following context to answer the question. 
        If you cannot answer the question based on the context, say so.
        
        Context:
        {context_str}
        
        Question: {query}
        
        Answer:"""
        
        request_body = {
            "anthropic_version": "bedrock-2023-05-31",
            "max_tokens": 1000,
            "messages": [
                {"role": "user", "content": prompt}
            ]
        }
        
        response = self.config.bedrock.invoke_model(
            modelId=self.llm_model_id,
            body=json.dumps(request_body)
        )
        
        response_body = json.loads(response['body'].read())
        return response_body['content'][0]['text']
    
    def query(self, query: str, k: int = 3) -> Dict[str, Any]:
        """Complete RAG pipeline
        
        Args:
            query: Natural language question
            k: Number of context documents to retrieve
            
        Returns:
            Dictionary containing:
            - query: Original question
            - context: Retrieved relevant documents
            - response: Generated answer
        """
        # Get relevant documents
        context = self.semantic_search(query, k)
        
        # Generate response
        response = self.generate_response(query, context)
        
        return {
            "query": query,
            "context": context,
            "response": response
        }

In [None]:
# Example usage
def test_rag_system():
    # Initialize AWS configuration
    config = AWSConfig()
    
    # Create RAG instance with test index
    rag = BaselineRAG(config, index_name="test-rag-documents")
    
    # Sample documents
    documents = [
        {
            "content": "Machine learning is a subset of artificial intelligence that focuses on developing systems that can learn from data.",
            "metadata": {"source": "test", "topic": "ML"}
        },
        {
            "content": "Deep learning is a type of machine learning that uses neural networks with multiple layers.",
            "metadata": {"source": "test", "topic": "DL"}
        }
    ]
    
    # Ingest documents
    print("Ingesting documents...")
    rag.ingest_documents(documents)
    
    # Test query
    print("\nTesting query...")
    result = rag.query("What is machine learning?")
    
    print("\nResponse:", result['response'])
    print("\nContext used:")
    for doc in result['context']:
        print(f"- {doc['content']}")
        print(f"  Metadata: {doc['metadata']}")

if __name__ == "__main__":
    test_rag_system()