# Redis Semantic Caching within a Chain

This notebook demonstrates LangChain semantic caching using Redis, and measures cache effects on an LCEL chain.

Prereqs: a running Redis at `redis://localhost:6379` (or set `REDIS_URL`).

In [1]:
# %pip install -U langchain-core langchain-openai langchain-redis redis python-dotenv

/Users/hasanain/Library/CloudStorage/OneDrive-Personal/DS/KN Ac/DocumentPortal/.venv/bin/python: No module named pip
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [12]:
import os, time
from dotenv import load_dotenv

from langchain.globals import set_llm_cache
# Prefer dedicated package; fallback to community cache for older versions
from langchain_redis.cache import RedisSemanticCache
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

load_dotenv()
REDIS_URL = os.getenv('REDIS_URL', 'redis://localhost:6379')
print(f'Connecting to Redis at: {REDIS_URL}')

# Ensure your OpenAI key is available in env as OPENAI_API_KEY
assert os.getenv('OPENAI_API_KEY'), 'Missing OPENAI_API_KEY in environment'

Connecting to Redis at: redis://localhost:6379


In [13]:
# Initialize LLM, Embeddings, and Redis semantic cache
llm = ChatOpenAI(model='gpt-4o-mini', temperature=0.0)
embedding = OpenAIEmbeddings()
# Some versions of RedisSemanticCache do not support distance_threshold/ttl/name in constructor.
# Pass only required arguments for broad compatibility.
set_llm_cache(RedisSemanticCache(redis_url=REDIS_URL, embeddings=embedding, distance_threshold=0.2, ttl=3600, name='document-portal'))

# Build a simple LCEL chain
prompt = ChatPromptTemplate.from_messages([('system', 'Be concise.'), ('human', '{question}')])
chain = prompt | llm | StrOutputParser()
print('Chain ready with Redis semantic cache.')

22:25:40 httpx INFO   HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
22:25:40 redisvl.index.index INFO   Index already exists, not overwriting.
Chain ready with Redis semantic cache.


In [14]:
# Helper to measure cache impact
def timed_invoke(q: str):
    t0 = time.time()
    out = chain.invoke({'question': q})
    dt = time.time() - t0
    return out, dt

q1 = 'What is the capital of France?'
out1, t1 = timed_invoke(q1)
print(f'First (uncached) call: {t1:.2f}s\n{out1[:200]}')

# Semantically similar prompt should hit the cache
q2 = 'Which city is the capital of France?'
out2, t2 = timed_invoke(q2)
print(f'Second (semantic cached) call: {t2:.2f}s\n{out2[:200]}')
print(f'Improvement: {(t1 / max(t2, 1e-6)):.2f}x')

22:25:44 httpx INFO   HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
22:25:44 httpx INFO   HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
22:25:45 httpx INFO   HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
First (uncached) call: 1.34s
The capital of France is Paris.
22:25:45 httpx INFO   HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
Second (semantic cached) call: 0.19s
The capital of France is Paris.
Improvement: 7.06x


In [None]:
# Optional: reconfigure cache (some versions support extra params like distance_threshold/ttl/name)
# If your installed RedisSemanticCache exposes these, you can tune it here.
# For compatibility, we'll just reinitialize the cache without extras.
import dis
from math import dist


set_llm_cache(RedisSemanticCache(redis_url=REDIS_URL, embeddings=embedding, distance_threshold=0.2, ttl=3600, name='document-portal'))
out3, t3 = timed_invoke(q2)
print(f'Cache reinit done; timing: {t3:.2f}s')

In [None]:
# Clear the demo cache
cache = RedisSemanticCache(redis_url=REDIS_URL, embeddings=embedding, distance_threshold=0.2, ttl=3600, name='document-portal')
cache.clear()
print('Cleared document-portal cache')