##### **Benefits of Caching LLM Response:**

     1. Cost effective - By reducing number of API call to LLM provider.
     2. Speed-up application.

**Caching Types:**

     1. In-memory caching
     2. Persistent Caching

##### **In-memory Caching**

In [1]:
from langchain_core.caches import InMemoryCache
from langchain_core.globals import set_llm_cache

**Set-up In-memory Cache**

In [2]:
cache = InMemoryCache()
set_llm_cache(cache)

In [3]:
cache._cache

{}

##### **LLM**

In [4]:
import os
from langchain_openai.chat_models import ChatOpenAI

In [5]:
llm = ChatOpenAI(model = "gpt-4o-mini", 
                 api_key=os.environ.get("OPENAI_API_KEY"),
                 temperature = 0,
                 #cache = cache
                 )

In [6]:
llm.invoke("Hi")

AIMessage(content='Hello! How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 8, 'total_tokens': 17, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_f85bea6784', 'finish_reason': 'stop', 'logprobs': None}, id='run-b18840b1-ebd3-47d0-beb9-bd1b661c9244-0', usage_metadata={'input_tokens': 8, 'output_tokens': 9, 'total_tokens': 17})

##### **Caching LLM Response**

In [7]:
llm.invoke("Who is president of India?")

AIMessage(content='As of my last knowledge update in October 2023, the President of India is Droupadi Murmu. She took office on July 25, 2022, and is the first tribal woman to hold the position. Please verify with up-to-date sources, as political positions can change.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 60, 'prompt_tokens': 13, 'total_tokens': 73, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_f85bea6784', 'finish_reason': 'stop', 'logprobs': None}, id='run-731619fa-2bad-4caf-a928-ac63cd9d1cc9-0', usage_metadata={'input_tokens': 13, 'output_tokens': 60, 'total_tokens': 73})

In [8]:
len(cache._cache)

2

In [9]:
cache._cache

{('[{"lc": 1, "type": "constructor", "id": ["langchain", "schema", "messages", "HumanMessage"], "kwargs": {"content": "Hi", "type": "human"}}]',
  '{"id": ["langchain", "chat_models", "openai", "ChatOpenAI"], "kwargs": {"max_retries": 2, "model_name": "gpt-4o-mini", "n": 1, "openai_api_key": {"id": ["OPENAI_API_KEY"], "lc": 1, "type": "secret"}, "temperature": 0.0}, "lc": 1, "name": "ChatOpenAI", "type": "constructor"}---[(\'stop\', None)]'): [ChatGeneration(text='Hello! How can I assist you today?', generation_info={'finish_reason': 'stop', 'logprobs': None}, message=AIMessage(content='Hello! How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 8, 'total_tokens': 17, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_f85bea6784', 'finish_reason': 'stop', 'logprobs': None}, id='run-b18840b1-ebd3-47d0-beb9-bd1b661c9244-0', usage_

In [10]:
llm.invoke("Who is president of India?")

AIMessage(content='As of my last knowledge update in October 2023, the President of India is Droupadi Murmu. She took office on July 25, 2022, and is the first tribal woman to hold the position. Please verify with up-to-date sources, as political positions can change.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 60, 'prompt_tokens': 13, 'total_tokens': 73, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_f85bea6784', 'finish_reason': 'stop', 'logprobs': None}, id='run-731619fa-2bad-4caf-a928-ac63cd9d1cc9-0', usage_metadata={'input_tokens': 13, 'output_tokens': 60, 'total_tokens': 73})

##### **Caching LLM Response in SQL Chain**

In [11]:
from langchain_community.utilities import SQLDatabase
db = SQLDatabase.from_uri("sqlite:///chinook.db", sample_rows_in_table_info = 3)

In [13]:
print(db.table_info)


CREATE TABLE "Album" (
	"AlbumId" INTEGER NOT NULL, 
	"Title" NVARCHAR(160) NOT NULL, 
	"ArtistId" INTEGER NOT NULL, 
	PRIMARY KEY ("AlbumId"), 
	FOREIGN KEY("ArtistId") REFERENCES "Artist" ("ArtistId")
)

/*
3 rows from Album table:
AlbumId	Title	ArtistId
1	For Those About To Rock We Salute You	1
2	Balls to the Wall	2
3	Restless and Wild	2
*/


CREATE TABLE "Artist" (
	"ArtistId" INTEGER NOT NULL, 
	"Name" NVARCHAR(120), 
	PRIMARY KEY ("ArtistId")
)

/*
3 rows from Artist table:
ArtistId	Name
1	AC/DC
2	Accept
3	Aerosmith
*/


CREATE TABLE "Customer" (
	"CustomerId" INTEGER NOT NULL, 
	"FirstName" NVARCHAR(40) NOT NULL, 
	"LastName" NVARCHAR(20) NOT NULL, 
	"Company" NVARCHAR(80), 
	"Address" NVARCHAR(70), 
	"City" NVARCHAR(40), 
	"State" NVARCHAR(40), 
	"Country" NVARCHAR(40), 
	"PostalCode" NVARCHAR(10), 
	"Phone" NVARCHAR(24), 
	"Fax" NVARCHAR(24), 
	"Email" NVARCHAR(60) NOT NULL, 
	"SupportRepId" INTEGER, 
	PRIMARY KEY ("CustomerId"), 
	FOREIGN KEY("SupportRepId") REFERENCES "Empl

In [14]:
from langchain.chains import create_sql_query_chain

In [15]:

chain = create_sql_query_chain(llm, db)
response = chain.invoke({"question": "How many employees are there"})
response

'SQLQuery: SQL Query to run\n```sql\nSELECT COUNT("EmployeeId") AS "EmployeeCount" FROM "Employee";\n```'

In [16]:
cache._cache

{('[{"lc": 1, "type": "constructor", "id": ["langchain", "schema", "messages", "HumanMessage"], "kwargs": {"content": "Hi", "type": "human"}}]',
  '{"id": ["langchain", "chat_models", "openai", "ChatOpenAI"], "kwargs": {"max_retries": 2, "model_name": "gpt-4o-mini", "n": 1, "openai_api_key": {"id": ["OPENAI_API_KEY"], "lc": 1, "type": "secret"}, "temperature": 0.0}, "lc": 1, "name": "ChatOpenAI", "type": "constructor"}---[(\'stop\', None)]'): [ChatGeneration(text='Hello! How can I assist you today?', generation_info={'finish_reason': 'stop', 'logprobs': None}, message=AIMessage(content='Hello! How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 8, 'total_tokens': 17, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_f85bea6784', 'finish_reason': 'stop', 'logprobs': None}, id='run-b18840b1-ebd3-47d0-beb9-bd1b661c9244-0', usage_

In [17]:
chain.invoke({"question": "How many employees are there"})

'SQLQuery: SQL Query to run\n```sql\nSELECT COUNT("EmployeeId") AS "EmployeeCount" FROM "Employee";\n```'

In [None]:
cache._cache

##### **Caching LLM Response in Langchain Agents**

In [18]:
from langchain_community.agent_toolkits import create_sql_agent
from langchain.agents import AgentType
 
prefix = """
You are an agent designed to interact with a sqlite database. 
Given an input question, create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer.
Unless the user specifies a specific number of examples they wish to obtain, always limit your query to at most {top_k} 
results.
"""



agent = create_sql_agent(llm = llm, 
                         db = db, 
                         prefix=prefix, 
                         agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION, 
                         stream_runnable = False)


out = agent.invoke("How many total artists are there?")
print(out)

{'input': 'How many total artists are there?', 'output': 'There are a total of 275 artists.'}


In [19]:
len(cache._cache)

9

In [20]:
out = agent.invoke("How many total artists are there?")
out

{'input': 'How many total artists are there?',
 'output': 'There are a total of 275 artists.'}

In [21]:
cache.clear()

In [22]:
cache._cache

{}

##### **Persistent Caching**

In [None]:
import os
os.remove('.langchain_cache.db')

In [24]:
!del ".langchain_cache.db"

Could Not Find c:\Users\atulk\.langchain_cache.db


In [25]:
from langchain_community.cache import SQLiteCache

cache = SQLiteCache(database_path=".langchain_cache.db")

In [26]:
from langchain_openai.chat_models import ChatOpenAI
llm = ChatOpenAI(model = "gpt-4o-mini", 
                 api_key=os.environ.get("OPENAI_API_KEY"),
                 temperature = 0,
                 cache = cache
                 )

In [27]:
llm.invoke("hi")

AIMessage(content='Hello! How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 8, 'total_tokens': 17, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_f85bea6784', 'finish_reason': 'stop', 'logprobs': None}, id='run-2226a5d9-3e4c-40a7-92f8-d1d5d63037f6-0', usage_metadata={'input_tokens': 8, 'output_tokens': 9, 'total_tokens': 17})

In [28]:
import sqlite3

def get_db_data():
    conn = sqlite3.connect('.langchain_cache.db')
    cursor = conn.cursor()
    cursor.execute("SELECT * FROM full_llm_cache")
    cache_entries = cursor.fetchall()
    print(len(cache_entries))
    print(cache_entries)
    cursor.close()
    conn.close()
get_db_data()

1
[('[{"lc": 1, "type": "constructor", "id": ["langchain", "schema", "messages", "HumanMessage"], "kwargs": {"content": "hi", "type": "human"}}]', '{"id": ["langchain", "chat_models", "openai", "ChatOpenAI"], "kwargs": {"cache": {"id": ["langchain_community", "cache", "SQLiteCache"], "lc": 1, "type": "not_implemented"}, "max_retries": 2, "model_name": "gpt-4o-mini", "n": 1, "openai_api_key": {"id": ["OPENAI_API_KEY"], "lc": 1, "type": "secret"}, "temperature": 0.0}, "lc": 1, "name": "ChatOpenAI", "type": "constructor"}---[(\'stop\', None)]', 0, '{"lc": 1, "type": "constructor", "id": ["langchain", "schema", "output", "ChatGeneration"], "kwargs": {"text": "Hello! How can I assist you today?", "generation_info": {"finish_reason": "stop", "logprobs": null}, "type": "ChatGeneration", "message": {"lc": 1, "type": "constructor", "id": ["langchain", "schema", "messages", "AIMessage"], "kwargs": {"content": "Hello! How can I assist you today?", "additional_kwargs": {"refusal": null}, "response

In [29]:
llm.invoke("Who won 2007 t20 world cup?")

AIMessage(content='India won the inaugural ICC T20 World Cup in 2007. They defeated Pakistan in the final, which was held on September 24, 2007, at the Wanderers Stadium in Johannesburg, South Africa.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 44, 'prompt_tokens': 17, 'total_tokens': 61, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_f85bea6784', 'finish_reason': 'stop', 'logprobs': None}, id='run-9a693af3-a761-4b66-bd18-bae12361ebea-0', usage_metadata={'input_tokens': 17, 'output_tokens': 44, 'total_tokens': 61})

In [30]:
llm.invoke("Who won 2007 t20 world cup?")

AIMessage(content='India won the inaugural ICC T20 World Cup in 2007. They defeated Pakistan in the final, which was held on September 24, 2007, at the Wanderers Stadium in Johannesburg, South Africa.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 44, 'prompt_tokens': 17, 'total_tokens': 61, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_f85bea6784', 'finish_reason': 'stop', 'logprobs': None}, id='run-9a693af3-a761-4b66-bd18-bae12361ebea-0', usage_metadata={'input_tokens': 17, 'output_tokens': 44, 'total_tokens': 61})