In [5]:
from dotenv import load_dotenv
import os

env_path = "/Users/ford/Documents/coding/confidential/.env"
load_dotenv(env_path)
api_key = os.getenv("OPENAI_API_KEY")
assert api_key, "API key is missing"

In [6]:
import os
from langchain_core.globals import set_llm_cache
from langchain_openai import ChatOpenAI
from langchain_core.outputs import Generation
from langchain_core.messages import (
    AIMessage,
)  # Although not directly used for updating, good to have
from langchain_community.cache import SQLiteCache
from typing import Sequence  # Import Sequence for type hinting

# Define the path for the SQLite database file
DB_FILE = ".langchain_test_update_cache.db"

# Optional: Remove the database file at the beginning to start with a clean cache
# This is useful for consistent testing to ensure you see cache misses initially
# if os.path.exists(DB_FILE):
#     os.remove(DB_FILE)
#     print(f"Removed existing database file: {DB_FILE}")

# Assume you have your LLM initialized
# Using gpt-3.5-turbo for faster response times during testing
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0.7)

# Set up your SQLite cache
cache = SQLiteCache(database_path=DB_FILE)
set_llm_cache(cache)

# Define your prompt
prompt = "give me random number from 1-1000"

# Get the llm_string *before* any calls, it should be consistent
# Note: _get_llm_string might be considered internal, but useful for debugging cache keys
llm_string = llm._get_llm_string(prompt)
print(f"LLM String used for caching: {llm_string}")

# --- First run (caches the result) ---
print("\n--- First request (should trigger LLM call and cache) ---")
print(
    "Cache lookup before first call:", cache.lookup(prompt, llm_string)
)  # Should be None
response1 = llm.invoke(prompt)
print("Response 1 (LLM Output):", response1.content)
print(
    "Cache lookup after first call:", cache.lookup(prompt, llm_string)
)  # Should show the cached result

# --- Second run (should be served from cache) ---
print("\n--- Second request (should be served from cache) ---")
response_cached = llm.invoke(prompt)
print("Response from Cache:", response_cached.content)
print(
    "Cache lookup after second call:", cache.lookup(prompt, llm_string)
)  # Should show the same cached result

# Verify they are the same
if response_cached.content == response1.content:
    print("Verification: Second response matched the first (served from cache).")
else:
    print(
        "Verification: Second response did NOT match the first (unexpected cache miss/different response)."
    )


# --- Force re-request and attempt to update cache ---
print("\n--- Forcing re-request and attempting to update cache ---")

# 1. Temporarily disable caching globally
# Store the old cache to restore it later if needed, though re-creating is safer for state sync
old_cache = set_llm_cache(None)
print("Caching temporarily disabled.")

# 2. Perform the request to get the new result (cache is off, so LLM is called)
print("Performing forced re-request to get a NEW result...")
new_response_message = llm.invoke(prompt)
new_response_text = new_response_message.content
print("Response 2 (New LLM Output):", new_response_text)

# 3. Re-enable the cache. Crucially, we might need a new instance or re-set
# the same instance to ensure Langchain's internal state is updated to use it.
# Re-creating and setting seems the most reliable way to force potential state sync.
cache = SQLiteCache(database_path=DB_FILE)
set_llm_cache(cache)
print("Caching re-enabled.")


# 4. Manually update the cache with the new result
# The value stored must be a Sequence[Generation]. AIMessage can be converted.
# Check current cache value BEFORE update
print(
    "Cache lookup BEFORE manual update:", cache.lookup(prompt, llm_string)
)  # Should still show the OLD cached value

# Prepare the new value in the expected format
# Ensure new_response_message is an AIMessage or similar object that has content
# If new_response_text was just a string, wrap it like: [Generation(text=new_response_text)]
new_return_val: Sequence[Generation] = [Generation(text=new_response_text)]

try:
    # Use the original llm_string for the update key
    cache.update(prompt, llm_string, new_return_val)
    print("Cache successfully updated using cache.update().")
except Exception as e:
    print(f"Error updating cache: {e}")

# Check cache value AFTER update using the cache object directly
print(
    "Cache lookup AFTER manual update:", cache.lookup(prompt, llm_string)
)  # Should now show the NEW cached value


# --- Subsequent request (intended to be served from the updated cache) ---
print("\n--- Subsequent request (Testing if llm.invoke() picks up the update) ---")

# While cache.lookup() confirms the update, llm.invoke() might not immediately
# reflect this manual change in the same script execution. This is the observed
# behavior from your output.


# Use the same LLM instance and prompt
response_subsequent = llm.invoke(prompt)
print("Subsequent Response (from llm.invoke()):", response_subsequent.content)

# Verification of the *subsequent invoke* result
if response_subsequent.content == new_response_text:
    print(
        "Subsequent request verification: Returned the NEW result (update reflected immediately)."
    )
else:
    # This is the outcome you observed and is common due to internal state
    print(
        "Subsequent request verification: Still returned the OLD result (manual update NOT reflected immediately by llm.invoke())."
    )
    print(
        "Note: While cache.lookup() shows the update, llm.invoke()'s behavior right after manual update can be inconsistent."
    )
    print(
        "The update IS persistent in the DB. The *next* time this script runs, the initial cache lookup should yield the new value."
    )


print(f"\nCache data is stored in: {DB_FILE}")

# To verify the update persisted, re-run this script. The first "Cache lookup before first call"
# should now show the NEW number obtained in the "Response 2 (New LLM Output)".

LLM String used for caching: {"id": ["langchain", "chat_models", "openai", "ChatOpenAI"], "kwargs": {"model_name": "gpt-4o-mini", "openai_api_key": {"id": ["OPENAI_API_KEY"], "lc": 1, "type": "secret"}, "temperature": 0.7}, "lc": 1, "name": "ChatOpenAI", "type": "constructor"}---[('stop', 'give me random number from 1-1000')]

--- First request (should trigger LLM call and cache) ---
Cache lookup before first call: [Generation(text='Sure! Here’s a random number between 1 and 1000: **472**.')]
Response 1 (LLM Output): Here’s a random number for you: **473**.
Cache lookup after first call: [Generation(text='Sure! Here’s a random number between 1 and 1000: **472**.')]

--- Second request (should be served from cache) ---
Response from Cache: Here’s a random number for you: **473**.
Cache lookup after second call: [Generation(text='Sure! Here’s a random number between 1 and 1000: **472**.')]
Verification: Second response matched the first (served from cache).

--- Forcing re-request and at

In [7]:
cache.lookup(prompt, llm_string=llm_string)

[Generation(text='Sure! Here’s a random number between 1 and 1000: **472**.')]

In [8]:
llm.invoke(prompt)

AIMessage(content='Here’s a random number for you: **473**.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 13, 'prompt_tokens': 17, 'total_tokens': 30, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_0392822090', 'id': 'chatcmpl-BPNZE8VVJgicIlgDPdKkbsVILOYNO', 'finish_reason': 'stop', 'logprobs': None}, id='run-9cff66a0-8a8c-40aa-9de7-8e83cd9fab38-0', usage_metadata={'input_tokens': 17, 'output_tokens': 13, 'total_tokens': 30, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})