# Cached RAG using Amazon Opensearch

In [None]:
!pip install --upgrade langchain langchain-core langchain-aws langchain-community boto3 sagemaker pydantic opensearch-py  

In [None]:
# Import all required libraries
import json
import boto3
import numpy as np
from langchain_aws import BedrockEmbeddings
from langchain_community.vectorstores import OpenSearchVectorSearch
from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth

In [None]:
# Create bedrock client and initialize claude llm
BEDROCK_CLIENT = boto3.client("bedrock-runtime", 'us-east-1')
llm = ChatBedrock( model_kwargs={"max_tokens":2048,"temperature":0.5,"top_k":50,"anthropic_version":"bedrock-2023-05-31"},
      model_id="us.anthropic.claude-3-5-sonnet-20241022-v2:0",
      client=BEDROCK_CLIENT)

In [None]:
# Initialize Cohere embeddings
embeddings = BedrockEmbeddings(client=BEDROCK_CLIENT,model_id="cohere.embed-english-v3")

In [None]:
# Using local cache(python dictionary), we can use Valkey or Redis cache
response_cache = {}
embedding_cache = {}

In [None]:
# Initialize Opensearch Client
oss_host = "search-xxxxxxxxxxxxxxxxxxxxxxx.us-east-.es.amazonaws.com"
credentials = boto3.Session().get_credentials()
region = "us-east-1"
auth = AWSV4SignerAuth(credentials, region, "es")
os_client = OpenSearch(
    hosts = [{'host':oss_host, 'port':443}],
    http_auth = auth,
    use_ssl = True,
    verify_certs = True,
    connection_class = RequestsHttpConnection,
    pool_maxsize = 60
)
os_client.info()

In [None]:
# Provide a name for your index
index_name = 'cached-rag-index'

In [None]:
#Create index
index_body = {
    "settings":{
        "index":{
            "knn": True,
            "knn.algo_param.ef_search": 256
        }
    },
    "mappings":{
        "properties":{
            "vector_field":{
                "type": "knn_vector",
                "dimension": 1024,
                "method":{
                    "name":"hnsw",
                    "space_type": "l2",
                    "engine": "nmslib",
                    "parameters": {
                        "ef_construction": 256,
                        "m":32
                    }
                }
            }
        }
    }    
}

response = os_client.indices.create(index_name, body=index_body)
response

In [None]:
def extract_paragraphs(file_path: str) -> list:
    # Open the file and read its contents
    with open(file_path, 'r', encoding='utf-8') as file:
        text = file.read()
    
    # Split the text by double newlines to separate paragraphs
    paragraphs = [para.strip() for para in text.split('\n\n') if para.strip()]
    
    return paragraphs


In [None]:
file_path = 'your-file.txt' # your file path 
paragraphs = extract_paragraphs(file_path)

In [None]:
opensearch_vector_search = OpenSearchVectorSearch.from_texts(
        paragraphs,
        embeddings,
        opensearch_url=f'https://{oss_host}:443',
        http_auth=auth,
        use_ssl = True,
        verify_certs = True,
        connection_class=RequestsHttpConnection,
        index_name=index_name,
        timeout=60,
        bulk_size=8000)

In [None]:
def search_similar_cached_query(new_embedding, threshold=0.80):
    best_match = None
    best_similarity = 0

    for cached_query, cached_embedding in embedding_cache.items():
        similarity = cosine_similarity(new_embedding, cached_embedding)
        print(f"Comparing: {cached_query} with similarity: {similarity}")

        # If similarity is above the threshold and better than the previous best match, store it
        if similarity >= threshold and similarity > best_similarity:
            best_match = cached_query
            best_similarity = similarity

    return best_match  # Returns the cached query key if found

In [None]:
def cosine_similarity(vec1, vec2):
    """Compute cosine similarity between two vectors."""
    return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))

In [None]:
def search_opensearch(index: str, query: str):
    """Perform k-NN search in OpenSearch using OpenSearchVectorSearch."""
    query_embedding = embeddings.embed_query(query)  # Get the 1024-dimensional embedding for the query

    search_query = {
        "size": 2,
        "query": {
            "knn": {
                "vector_field": {
                    "vector": query_embedding,
                    "k": 5
                }
            }
        }
    }

    # Perform the query on OpenSearch
    response = os_client.search(index=index, body=search_query)
    results = response["hits"]["hits"]
    
    return results, query_embedding

In [None]:
def cache_augmented_search(index: str, query: str):
    """Search OpenSearch with an embedding-based cache."""
    query_embedding = embeddings.embed_query(query)

    # Step 1: Check cache for similar query
    cached_key = search_similar_cached_query(query_embedding)

    if cached_key and cached_key in response_cache:  
        print("Cache hit! Returning similar cached results.")
        return response_cache[cached_key]

    # Step 2: If no match, query OpenSearch
    print("Cache miss. Querying OpenSearch...")
    results, new_embedding = search_opensearch(index, query)

    # Step 3: Store new embedding & results in cache
    response_cache[query] = results
    embedding_cache[query] = new_embedding

    return results

In [None]:
%%time
query_text = "your question"
results = cache_augmented_search(index_name, query_text)