**Setup and Installation**

In [None]:
!pip install -q langchain
!pip install -q langchain_community
!pip install -q langchain-openai
!pip install -q sentence-transformers
!pip install -q numpy
!pip install -q python-dotenv

# Print versions for debugging
print("Installed package versions:")
!pip freeze | grep langchain

**Import required libraries:**

In [22]:
import os
import time
import hashlib
import pickle
import numpy as np
from typing import Dict, Optional, List
from getpass import getpass
from langchain_openai import OpenAIEmbeddings

**Initialize Base Configuration**

In [23]:
# Securely input your OpenAI API key
openai_api_key = getpass('Enter your OpenAI API key: ')
os.environ['OPENAI_API_KEY'] = openai_api_key

# Initialize base embeddings model
base_embedder = OpenAIEmbeddings(
    model="text-embedding-3-large",
    openai_api_key=openai_api_key
)

# Create test data that we'll use across all examples
test_texts = [
    "Machine learning is fascinating",
    "AI is transforming industries",
    "Machine learning is fascinating",  # Repeated text
    "Python is great for AI",
    "AI is transforming industries"     # Repeated text
]

Enter your OpenAI API key: ··········


**Simple In-Memory Cache**

In [None]:
class SimpleEmbeddingCache:
    def __init__(self, embedding_model):
        self.cache = {}
        self.model = embedding_model
        self.hits = 0
        self.misses = 0

    def get_embedding(self, text: str) -> List[float]:
        """Get embedding from cache or compute it."""
        if text in self.cache:
            self.hits += 1
            return self.cache[text]

        self.misses += 1
        embedding = self.model.embed_query(text)
        self.cache[text] = embedding
        return embedding

    def get_stats(self):
        """Return cache statistics."""
        total = self.hits + self.misses
        hit_rate = (self.hits / total * 100) if total > 0 else 0
        return {
            'hits': self.hits,
            'misses': self.misses,
            'hit_rate': f"{hit_rate:.2f}%"
        }

"""Let's test the simple cache:"""

# Initialize simple cache
simple_cache = SimpleEmbeddingCache(base_embedder)

print("Testing Simple Cache Implementation...\n")
start_time = time.time()

for text in test_texts:
    embedding = simple_cache.get_embedding(text)
    print(f"Processed: '{text}'")

end_time = time.time()

# Print statistics
stats = simple_cache.get_stats()
print(f"\nSimple Cache Statistics:")
print(f"Cache Hits: {stats['hits']}")
print(f"Cache Misses: {stats['misses']}")
print(f"Hit Rate: {stats['hit_rate']}")
print(f"Processing Time: {end_time - start_time:.2f} seconds")

**File-Based Cache Implementation**

In [None]:
class FileCacheEmbeddings:
    def __init__(self, embedding_model, cache_dir="./embedding_cache"):
        self.model = embedding_model
        self.cache_dir = cache_dir
        self.stats = {'hits': 0, 'misses': 0}
        os.makedirs(cache_dir, exist_ok=True)

    def _get_cache_path(self, text: str) -> str:
        """Generate cache file path for the text."""
        text_hash = hashlib.sha256(text.encode()).hexdigest()
        return os.path.join(self.cache_dir, f"{text_hash}.pkl")

    def get_embedding(self, text: str) -> List[float]:
        """Get embedding from cache or compute it."""
        cache_path = self._get_cache_path(text)

        # Try to load from cache
        if os.path.exists(cache_path):
            with open(cache_path, 'rb') as f:
                self.stats['hits'] += 1
                return pickle.load(f)

        # Compute new embedding
        self.stats['misses'] += 1
        embedding = self.model.embed_query(text)

        # Save to cache
        with open(cache_path, 'wb') as f:
            pickle.dump(embedding, f)

        return embedding

    def get_stats(self):
        """Return cache statistics."""
        total = self.stats['hits'] + self.stats['misses']
        hit_rate = (self.stats['hits'] / total * 100) if total > 0 else 0
        return {
            'hits': self.stats['hits'],
            'misses': self.stats['misses'],
            'hit_rate': f"{hit_rate:.2f}%"
        }

"""Test the file-based cache:"""

# Initialize file-based cache
file_cache = FileCacheEmbeddings(base_embedder)

print("Testing File-Based Cache Implementation...\n")
start_time = time.time()

# First round of processing
print("First round of processing:")
for text in test_texts:
    embedding = file_cache.get_embedding(text)
    print(f"Processed: '{text}'")

print("\nStats after first round:")
print(file_cache.get_stats())

# Second round to test cache hits
print("\nSecond round of processing:")
for text in test_texts:
    embedding = file_cache.get_embedding(text)
    print(f"Processed: '{text}'")

end_time = time.time()

print("\nFinal Statistics:")
stats = file_cache.get_stats()
for key, value in stats.items():
    print(f"{key.replace('_', ' ').title()}: {value}")
print(f"Total Processing Time: {end_time - start_time:.2f} seconds")

**Production-Ready Cache Implementation**

In [None]:
class ProductionEmbeddingCache:
    def __init__(self,
                 embedding_model,
                 ttl_seconds: int = 86400,  # 24 hours
                 namespace: str = "default"):
        self.model = embedding_model
        self.ttl_seconds = ttl_seconds
        self.namespace = namespace
        self.cache: Dict[str, dict] = {}
        self.stats = {'hits': 0, 'misses': 0}

    def _generate_key(self, text: str) -> str:
        """Generate a consistent hash key for the text."""
        normalized_text = text.lower().strip()
        return hashlib.sha256(
            f"{self.namespace}:{normalized_text}".encode()
        ).hexdigest()

    def _is_cache_valid(self, cache_entry: dict) -> bool:
        """Check if cache entry is still valid based on TTL."""
        return time.time() - cache_entry['timestamp'] < self.ttl_seconds

    def get_embedding(self, text: str) -> Optional[List[float]]:
        """Get embedding from cache or compute and cache it."""
        cache_key = self._generate_key(text)

        # Check cache
        if cache_key in self.cache:
            entry = self.cache[cache_key]
            if self._is_cache_valid(entry):
                self.stats['hits'] += 1
                return entry['embedding']
            else:
                # Remove expired entry
                del self.cache[cache_key]

        # Compute new embedding
        self.stats['misses'] += 1
        try:
            embedding = self.model.embed_query(text)

            # Cache the result
            self.cache[cache_key] = {
                'embedding': embedding,
                'timestamp': time.time()
            }

            return embedding
        except Exception as e:
            print(f"Error generating embedding: {str(e)}")
            return None

    def get_stats(self):
        """Return cache statistics."""
        total = self.stats['hits'] + self.stats['misses']
        hit_rate = (self.stats['hits'] / total * 100) if total > 0 else 0
        return {
            'hits': self.stats['hits'],
            'misses': self.stats['misses'],
            'hit_rate': f"{hit_rate:.2f}%",
            'cache_size': len(self.cache)
        }

"""Test the production cache:"""

# Initialize production cache
prod_cache = ProductionEmbeddingCache(
    base_embedder,
    ttl_seconds=3600,  # 1 hour TTL
    namespace="production"
)

print("Testing Production Cache Implementation...\n")
start_time = time.time()

# First round of processing
print("First round of processing:")
for text in test_texts:
    embedding = prod_cache.get_embedding(text)
    print(f"Processed: '{text}'")

print("\nStats after first round:")
print(prod_cache.get_stats())

# Wait a bit to demonstrate TTL
time.sleep(2)  # Simulate time passing

print("\nSecond round of processing:")
for text in test_texts:
    embedding = prod_cache.get_embedding(text)
    print(f"Processed: '{text}'")

end_time = time.time()

print("\nFinal Statistics:")
stats = prod_cache.get_stats()
for key, value in stats.items():
    print(f"{key.replace('_', ' ').title()}: {value}")
print(f"Total Processing Time: {end_time - start_time:.2f} seconds")

**Performance Comparison**

In [None]:
def run_performance_test(cache_name, cache_impl, test_data):
    start_time = time.time()

    # Process all texts
    for text in test_data:
        _ = cache_impl.get_embedding(text)

    end_time = time.time()
    processing_time = end_time - start_time

    # Get stats if available
    stats = cache_impl.get_stats()

    return {
        'name': cache_name,
        'processing_time': processing_time,
        'stats': stats
    }

# Generate larger test dataset
extended_test_data = []
base_texts = [
    "Machine learning is fascinating",
    "AI is transforming industries",
    "Deep learning revolutionizes AI",
    "Natural language processing advances"
]

# Add some repetition to test caching
for _ in range(3):
    extended_test_data.extend(base_texts)

# Initialize fresh instances for fair comparison
fresh_simple_cache = SimpleEmbeddingCache(base_embedder)
fresh_file_cache = FileCacheEmbeddings(base_embedder)
fresh_prod_cache = ProductionEmbeddingCache(base_embedder)

# Run performance tests
print("Running Performance Comparison...\n")

implementations = [
    ('Simple Cache', fresh_simple_cache),
    ('File Cache', fresh_file_cache),
    ('Production Cache', fresh_prod_cache)
]

results = []
for name, impl in implementations:
    print(f"\nTesting {name}...")
    result = run_performance_test(name, impl, extended_test_data)
    results.append(result)
    print(f"Processing Time: {result['processing_time']:.2f} seconds")
    print("Statistics:", result['stats'])

**Cleanup**

In [None]:
# Clean up cache directory
import shutil
if os.path.exists("./embedding_cache"):
    shutil.rmtree("./embedding_cache")
print("Cleaned up cache directory")