In [2]:
from gptcache.processor.pre import last_content
content = last_content({"messages": [{"content": "foo1"}, {"content": "foo2"}]})
# content = "foo2"

In [3]:
from langchain import PromptTemplate

from gptcache import Config
from gptcache.processor.pre import last_content_without_template

template_obj = PromptTemplate.from_template("tell me a joke about {subject}")
prompt = template_obj.format(subject="animal")

value = last_content_without_template(
    data={"messages": [{"content": prompt}]},
    cache_config=Config(template=template_obj.template),
)
print(value)
# ['animal']

['animal']


In [4]:
from gptcache.embedding import LangChain
from langchain_openai import AzureOpenAIEmbeddings
#from langchain.embeddings.openai import OpenAIEmbeddings

test_sentence = 'Hello, world.'
#embeddings = OpenAIEmbeddings(model="your-embeddings-deployment-name")
embeddings = AzureOpenAIEmbeddings(
    azure_deployment="text-embedding-ada-002",
    openai_api_version="2023-12-01-preview",  # e.g., "2023-12-01-preview"
)
encoder = LangChain(embeddings=embeddings)
embed = encoder.to_embeddings(test_sentence)

In [5]:
import os
#from langchain_openai import AzureChatOpenAI
from openai import AzureOpenAI
# Load config values
openai_api_base=os.getenv("AZURE_OPENAI_ENDPOINT") 
openai_api_version=os.getenv("AZURE_OPENAI_API_VERSION") 
azure_deployment="gpt-4-32k"
openai_api_key = os.getenv("AZURE_OPENAI_KEY") 
openai_api_type="azure"

# Create an instance of chat llm
# client = AzureOpenAI(
#         azure_endpoint=openai_api_base,
#         openai_api_key=openai_api_key,
#         openai_api_version=openai_api_version,
#         azure_deployment=azure_deployment,   
#     )
client = AzureOpenAI(
    azure_endpoint=openai_api_base,
    api_version=openai_api_version,
    azure_deployment=azure_deployment,
    api_key=openai_api_key,
)

In [6]:
import time
from gptcache import Cache, Config
#from gptcache.adapter import openai
from gptcache.adapter.api import init_similar_cache
from gptcache.embedding import Onnx
from gptcache.manager import manager_factory
from gptcache.processor.post import random_one
from gptcache.processor.pre import last_content
from gptcache.similarity_evaluation import OnnxModelEvaluation

openai_complete_cache = Cache()

encoder = Onnx()
sqlite_faiss_data_manager = manager_factory(
    "sqlite,faiss",
    data_dir="openai_complete_cache",
    scalar_params={
        "sql_url": "sqlite:///./openai_complete_cache.db",
        "table_name": "openai_chat",
    },
    vector_params={
        "dimension": encoder.dimension,
        "index_file_path": "./openai_chat_faiss.index",
    },
)
onnx_evaluation = OnnxModelEvaluation()
cache_config = Config(similarity_threshold=0.75)

init_similar_cache(
    cache_obj=openai_complete_cache,
    pre_func=last_content,
    embedding=encoder,
    data_manager=sqlite_faiss_data_manager,
    evaluation=onnx_evaluation,
    post_func=random_one,
    config=cache_config,
)

questions = [
    "what's github",
    "can you explain what GitHub is",
    "can you tell me more about GitHub",
    "what is the purpose of GitHub",
]

for question in questions:
    start_time = time.time()
    # response = openai.ChatCompletion.create(
    #     model="gpt-3.5-turbo",
    #     messages=[{"role": "user", "content": question}],
    #     cache_obj=openai_complete_cache,
    # )
    # response = client.chat.completions.create(
    #     model="gpt-4-32k",
    #     messages=[{"role": "user", "content": question}],
    #     cache_obj=openai_complete_cache,
    # )
    response = client.chat.completions.create(        
        model="gpt-4-32k",
        messages=[{"role": "user", "content": question}],
        )
    # cache_obj[question] = response
    
    print(f"Question: {question}")
    print("Time consuming: {:.2f}s".format(time.time() - start_time))
    # print(f'Answer: {response["choices"][0]["message"]["content"]}\n')
    print(f'Answer: {response.choices[0].message.content}\n')

None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


Question: what's github
Time consuming: 9.13s
Answer: GitHub is a web-based platform used for version control and collaboration. It allows multiple people to work on projects at the same time. It's mainly used by programmers for developing software, but can also be used to manage and store documents. GitHub allows users to make copies of repositories, make changes to these copies and then propose these changes to the original repository (a process called "pull request"). In addition, GitHub provides a variety of other features, such as bug tracking, feature request, task management, and more.

Question: can you explain what GitHub is
Time consuming: 7.67s
Answer: GitHub is a web-based platform used for version control and collaboration. It allows multiple people to work on projects concurrently. It was designed around Git, which is an open-source version control software that allows users to download the current version of a project, work on it, and then upload it with a set of updates

#### Caching scenarios


**_Text To Image generation:_**


In [None]:
from gptcache import cache
from gptcache.adapter import openai
from gptcache.processor.pre import get_prompt

from gptcache.embedding import Onnx
from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation
from gptcache.manager import get_data_manager, CacheBase, VectorBase, ObjectBase

onnx = Onnx()
cache_base = CacheBase('sqlite')
vector_base = VectorBase('milvus', host='localhost', port='19530', dimension=onnx.dimension)
object_base = ObjectBase('local', path='./images')
data_manager = get_data_manager(cache_base, vector_base, object_base)

cache.init(
    pre_embedding_func=get_prompt,
    embedding_func=onnx.to_embeddings,
    data_manager=data_manager,
    similarity_evaluation=SearchDistanceEvaluation(),
    )
cache.set_openai_key()

response = openai.Image.create(
  prompt="a white siamese cat",
  n=1,
  size="256x256"
)
image_url = response['data'][0]['url']

response = openai.Image.create(
  prompt="a white siamese cat",
  n=1,
  size="256x256"
)
image_url = response['data'][0]['url']

**_NL2SQL / Codex scenarios:_**


In [None]:
import time

def response_text(openai_resp):
    return openai_resp["choices"][0]["text"]

from gptcache import cache
from gptcache.adapter import openai
from gptcache.embedding import Onnx
from gptcache.processor.pre import get_prompt
from gptcache.manager import CacheBase, VectorBase, get_data_manager
from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation

print("Cache loading.....")

onnx = Onnx()
data_manager = get_data_manager(CacheBase("sqlite"), VectorBase("faiss", dimension=onnx.dimension))
cache.init(pre_embedding_func=get_prompt,
    embedding_func=onnx.to_embeddings,
    data_manager=data_manager,
    similarity_evaluation=SearchDistanceEvaluation(),
    )
cache.set_openai_key()

questions = [
    "A query to list the names of the departments which employed more than 10 employees in the last 3 months\nSELECT",
    "Query the names of the departments which employed more than 10 employees in the last 3 months\nSELECT",
    "List the names of the departments which employed more than 10 employees in the last 3 months\nSELECT",
]

for question in questions:
    start_time = time.time()
    response = openai.Completion.create(
      engine="gpt-35-turbo-instruct",
      prompt="### Postgres SQL tables, with their properties:\n#\n# Employee(id, name, department_id)\n# Department(id, name, address)\n# Salary_Payments(id, employee_id, amount, date)\n#\n### " + question,
      temperature=0,
      max_tokens=150,
      top_p=1.0,
      frequency_penalty=0.0,
      presence_penalty=0.0,
      stop=["#", ";"]
    )
    print(question, response_text(response))
    print("Time consuming: {:.2f}s".format(time.time() - start_time))

In [1]:
import langchain
print(langchain.__version__)

0.2.0rc2


In [18]:
pip install langchain==0.2.0rc2

Collecting langchain==0.2.0rc2Note: you may need to restart the kernel to use updated packages.


  You can safely remove it manually.



  Downloading langchain-0.2.0rc2-py3-none-any.whl.metadata (13 kB)
Collecting SQLAlchemy<2.0.29,>=1.4 (from langchain==0.2.0rc2)
  Downloading SQLAlchemy-2.0.28-cp312-cp312-win_amd64.whl.metadata (9.8 kB)
Downloading langchain-0.2.0rc2-py3-none-any.whl (973 kB)
   ---------------------------------------- 0.0/973.8 kB ? eta -:--:--
   ---------- ----------------------------- 245.8/973.8 kB 5.1 MB/s eta 0:00:01
   ------------------------------ --------- 737.3/973.8 kB 9.3 MB/s eta 0:00:01
   ---------------------------------------- 973.8/973.8 kB 7.7 MB/s eta 0:00:00
Downloading SQLAlchemy-2.0.28-cp312-cp312-win_amd64.whl (2.1 MB)
   ---------------------------------------- 0.0/2.1 MB ? eta -:--:--
   ------- -------------------------------- 0.4/2.1 MB 11.6 MB/s eta 0:00:01
   -------------- ------------------------- 0.7/2.1 MB 9.5 MB/s eta 0:00:01
   --------------------- ------------------ 1.1/2.1 MB 8.9 MB/s eta 0:00:01
   --------------------------- ------------ 1.4/2.1 MB 8.3 MB/s