In [None]:
import os
from dotenv import load_dotenv
import asyncio
import hashlib
from typing import Dict, Any, List
from redis.asyncio import Redis
from redisvl.extensions.cache.llm import SemanticCache
from redisvl.utils.vectorize import HFTextVectorizer
from langchain_openai import ChatOpenAI
from sentence_transformers import SentenceTransformer
import msgpack
import numpy as np

In [None]:
load_dotenv()

OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")

In [None]:
class SemanticLLMCache:
    def __init__(self, redis_url: str, model_name: str = "all-MiniLM-L6-v2"):
        self.redis = Redis.from_url(redis_url)
        self.embedder = HFTextVectorizer(
            model=model_name, 
            device="cpu"  # GPU –¥–ª—è prod
        )
        
        # ‚úÖ RedisVL SemanticCache (state-of-the-art 2025)
        self.semantic_cache = SemanticCache(
            name="llm_semantic_cache",
            redis_url=redis_url,
            distance_threshold=0.85,  # 85% —Å–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–∞—è —Å—Ö–æ–∂–µ—Å—Ç—å = HIT
            vectorizer=self.embedder,
            redis_embedding_namespace="semantic_embeddings"
        )
        
        self.llm = ChatOpenAI(
            base_url="https://openrouter.ai/api/v1",
            api_key=OPENROUTER_API_KEY,
            model="grok-beta",
            temperature=0.1
        )
    
    async def get_or_generate(
        self, 
        messages: List[Dict[str, str]], 
        **kwargs
    ) -> Dict[str, Any]:
        """–°–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–æ–µ –∫—ç—à–∏—Ä–æ–≤–∞–Ω–∏–µ + fallback –Ω–∞ exact match"""
        
        # 1. –°–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–∏–π –ø–æ–∏—Å–∫
        prompt_text = self._normalize_prompt(messages)
        cached = await self.semantic_cache.acheck(prompt_text)
        cached = cached[0]
        
        if cached:
            print(cached)
            print(f"‚úÖ SEMANTIC HIT: {cached['response'][:50]}...")
            return cached
        
        # 2. Exact match fallback (SHA256)
        exact_key = self._make_exact_key(messages, **kwargs)
        exact_cached = await self._exact_cache_get(exact_key)
        if exact_cached:
            print(f"‚úÖ EXACT HIT: {exact_key[:16]}...")
            return exact_cached
        
        # 3. LLM –≤—ã–∑–æ–≤
        print("üîÑ MISS ‚Üí LLM...")
        response = await self.llm.ainvoke(messages, **kwargs)
        
        # 4. –°–æ—Ö—Ä–∞–Ω—è–µ–º –≤ –æ–±–∞ –∫—ç—à–∞
        await self.semantic_cache.astore(prompt_text, response.content)
        await self._exact_cache_set(exact_key, response)
        
        return {"content": response.content, "usage": response.response_metadata}
    
    def _normalize_prompt(self, messages: List[Dict]) -> str:
        """–ö–∞–Ω–æ–Ω–∏—á–µ—Å–∫–∏–π —Ç–µ–∫—Å—Ç –¥–ª—è —ç–º–±–µ–¥–¥–∏–Ω–≥–∞"""
        return " | ".join([f"{m['role']}: {m['content']}" for m in messages])
    
    def _make_exact_key(self, messages: List[Dict], **kwargs) -> str:
        payload = msgpack.packb({"messages": messages, **kwargs})
        return f"exact:{hashlib.sha256(payload).hexdigest()}"
    
    async def _exact_cache_get(self, key: str) -> Dict[str, Any] | None:
        data = await self.redis.get(key)
        return msgpack.unpackb(data, raw=False) if data else None
    
    async def _exact_cache_set(self, key: str, response: Dict, ttl: int = 7200):
        await self.redis.set(key, msgpack.packb(response), ex=ttl)

# üß™ –¢–µ—Å—Ç —Å–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–æ–≥–æ –∫—ç—à–∞
async def test_semantic_cache():
    cache = SemanticLLMCache("redis://localhost:6380")
    
    # –ü–æ—Ö–æ–∂–∏–µ –∑–∞–ø—Ä–æ—Å—ã (—Ä–∞–∑–Ω—ã–µ —Ñ–æ—Ä–º—É–ª–∏—Ä–æ–≤–∫–∏)
    queries = [
        "–ö–∞–∫ —Ä–∞–±–æ—Ç–∞–µ—Ç Python async/await?",
        "–û–±—ä—è—Å–Ω–∏ –∞—Å–∏–Ω—Ö—Ä–æ–Ω–Ω–æ—Å—Ç—å –≤ Python",
        "–ß—Ç–æ —Ç–∞–∫–æ–µ asyncio –≤ Python?"
    ]
    
    for i, q in enumerate(queries):
        messages = [{"role": "user", "content": q}]
        result = await cache.get_or_generate(messages)
        print(result.keys())
        print(f"Q{i+1}: {result['response'][:100]}...")
        await asyncio.sleep(0.1)  # Rate limit

await test_semantic_cache()


20:45:13 sentence_transformers.SentenceTransformer INFO   Load pretrained SentenceTransformer: all-MiniLM-L6-v2
20:45:16 redisvl.index.index INFO   Index already exists, not overwriting.
{'entry_id': 'c60b06d91e39e3f8df14a5e99049585be889279752ce0ed3a50186079e2b2737', 'prompt': 'user: –ö–∞–∫ —Ä–∞–±–æ—Ç–∞–µ—Ç Python async/await?', 'response': '`async` –∏ `await` –≤ Python –∏—Å–ø–æ–ª—å–∑—É—é—Ç—Å—è –¥–ª—è —Ä–∞–±–æ—Ç—ã —Å –∞—Å–∏–Ω—Ö—Ä–æ–Ω–Ω—ã–º –ø—Ä–æ–≥—Ä–∞–º–º–∏—Ä–æ–≤–∞–Ω–∏–µ–º, —á—Ç–æ –ø–æ–∑–≤–æ–ª—è–µ—Ç –≤—ã–ø–æ–ª–Ω—è—Ç—å –æ–ø–µ—Ä–∞—Ü–∏–∏ –≤–≤–æ–¥–∞-–≤—ã–≤–æ–¥–∞ (I/O) –±–µ–∑ –±–ª–æ–∫–∏—Ä–æ–≤–∫–∏ –æ—Å–Ω–æ–≤–Ω–æ–≥–æ –ø–æ—Ç–æ–∫–∞ –≤—ã–ø–æ–ª–Ω–µ–Ω–∏—è. –≠—Ç–æ –æ—Å–æ–±–µ–Ω–Ω–æ –ø–æ–ª–µ–∑–Ω–æ –¥–ª—è –∑–∞–¥–∞—á, –∫–æ—Ç–æ—Ä—ã–µ –º–æ–≥—É—Ç –∑–∞–Ω—è—Ç—å –º–Ω–æ–≥–æ –≤—Ä–µ–º–µ–Ω–∏, —Ç–∞–∫–∏—Ö –∫–∞–∫ —Å–µ—Ç–µ–≤—ã–µ –∑–∞–ø—Ä–æ—Å—ã –∏–ª–∏ –æ–ø–µ—Ä–∞—Ü–∏–∏ —Å —Ñ–∞–π–ª–∞–º–∏.\n\n### –û—Å–Ω–æ–≤–Ω—ã–µ –∫–æ–Ω—Ü–µ–ø—Ü–∏–∏\n\n1. **–ê—Å–∏–Ω—Ö—Ä–æ–Ω–Ω—ã–µ —Ñ—É–Ω–∫—Ü–∏–∏**:\n   –§—É–Ω–∫—Ü–∏–∏, –æ–ø—Ä–µ–¥–µ–ª–µ–Ω–

IndexError: list index out of range

In [26]:
REDIS_URL = "redis://localhost:6380"

In [None]:
from redis.asyncio import Redis, ConnectionPool


class LLMCache:
    def __init__(self, redis_url=REDIS_URL):
        self.redis_url = redis_url
        self.pool = ConnectionPool(self.redis_url)
        self.redis = Redis(connection_pool=self.pool)
        self.default_ttl = 3600

    def make_cache_key(self, messages):
        payload = {
            "messages": messages
        }
        key_hash = hashlib.sha256(payload).hexdigest()
        return f"llm:{key_hash}"
    
    async def get(self, key):
        data = await self.redis.get(key)
        if data:
            return data
        return None
    
    async def set(self, key, response, ttl = None):
        ttl = ttl or self.default_ttl
        data = response
        await self.redis.set(key, data, ex=ttl)

    async def get_or_call(self, key, ttl = None):
        pass