## Cost Reduction

In [1]:
import langchain
from langchain.llms import OpenAI
from timeit import default_timer as timer

In [2]:
from dotenv import load_dotenv
import os

%load_ext dotenv
%dotenv

In [3]:
openai_api_key  = os.environ['OPANAI_API_KEY']

In [4]:
# To make the caching really obvious, lets use a slower model.
llm = OpenAI(openai_api_key=openai_api_key, model_name="text-davinci-002", n=2, best_of=2)

Let's setup some decorator to help use calculate the time to see the impact of using a decorator and not using one.

In [5]:
def time_to_complete(func):
    def inner(*args, **kwargs):
        start = timer()
        func(*args, **kwargs)
        end = timer()

        print(f"Time taken to complete: {end - start}")
    return inner

In [6]:
@time_to_complete
def run(llm, args):
    llm(args)

### In Memory Cache

Run without the caching implemeted

In [7]:
msg = "How far is the Earth from the Sun?"

In [8]:
llm(msg)

'\n\nThe Earth is about 93 million miles from the Sun.'

In [9]:
run(llm, msg)

Time taken to complete: 0.7098099169961642


Implement caching in place.

In [10]:
from langchain.cache import InMemoryCache

In [11]:
langchain.llm_cache = InMemoryCache()

In [12]:
llm(msg)

'\n\nThe Earth is about 93 million miles from the Sun.'

In [13]:
run(llm, msg)

Time taken to complete: 0.00023501500254496932


In [14]:
langchain.llm_cache = None

### SQLite Caching

In [15]:
from langchain.cache import SQLiteCache

In [16]:
run(llm, msg)

Time taken to complete: 0.6895445980044315


After implementation of caching

In [17]:
langchain.llm_cache = SQLiteCache(database_path=".testDB.db")

In [18]:
run(llm, msg)

Time taken to complete: 0.00644729399937205


In [19]:
langchain.llm_cache = None

### GTPCaching

There are two main ways of using the **GTPCaching**, these are:

1. **Exact matching**

2. **Semantic Similarity**

In [20]:
# !pip install gptcache

In [21]:
from gptcache import Cache
from gptcache.manager.factory import manager_factory
from gptcache.processor.pre import get_prompt
from langchain.cache import GPTCache
import hashlib

#### Exact Matching

In [22]:
def get_hashed_name(name):
    return hashlib.sha256(name.encode()).hexdigest()


def init_gptcache(cache_obj: Cache, llm: str):
    hashed_llm = get_hashed_name(llm)
    cache_obj.init(
        pre_embedding_func=get_prompt,
        data_manager=manager_factory(manager="map", data_dir=f"map_cache_{hashed_llm}"),
    )

Before introducing caching

In [23]:
run(llm, msg)

Time taken to complete: 0.7874066430085804


Now, let's introduce caching and measure the performance

In [24]:
langchain.llm_cache = GPTCache(init_gptcache)

In [25]:
run(llm, msg)

Time taken to complete: 0.002098513999953866


#### Semantic Similarity Matching

In [26]:
from gptcache.adapter.api import init_similar_cache

In [27]:
def get_hashed_name(name):
    return hashlib.sha256(name.encode()).hexdigest()


def init_gptcache(cache_obj: Cache, llm: str):
    hashed_llm = get_hashed_name(llm)
    init_similar_cache(cache_obj=cache_obj, data_dir=f"similar_cache_{hashed_llm}")

Before caching

In [28]:
run(llm, msg)

Time taken to complete: 0.00039819600351620466


Now, let's introduce caching and measure the performance

In [29]:
langchain.llm_cache = GPTCache(init_gptcache)

In [30]:
run(llm, msg)

Time taken to complete: 2.1773721140052658


In [31]:
langchain.llm_cache = None