In [2]:
from langchain.embeddings import CacheBackedEmbeddings,OpenAIEmbeddings
import os

In [3]:
embeddings_model = OpenAIEmbeddings(api_key=os.getenv('OPENAI_API_KEY'))

In [4]:
embeddings = embeddings_model.embed_documents(
    [
        "Hi there!",
        "Oh, hello!",
        "What's your name?",
        "My friends call me World",
        "Hello World!"
    ]
)

In [5]:
len(embeddings), len(embeddings[0])

(5, 1536)

In [6]:
embedded_query = embeddings_model.embed_query("What was the name mentioned in the conversation?")

In [10]:
print(embedded_query[:5])

[0.0053772740534241935, -0.0006527779663918577, 0.038980290283414216, -0.002967397499514861, -0.008834564037682272]


In [16]:
from langchain.storage import LocalFileStore
from langchain_community.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import CharacterTextSplitter

underlying_embeddings = OpenAIEmbeddings()

store = LocalFileStore("./cache/")

cached_embedder = CacheBackedEmbeddings.from_bytes_store(
    underlying_embeddings, store, namespace=underlying_embeddings.model
)

In [17]:
list(store.yield_keys())

[]

In [14]:
raw_documents = TextLoader("state_of_union.txt").load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(raw_documents)

In [19]:
%%time
db = FAISS.from_documents(documents, cached_embedder)

CPU times: total: 46.9 ms
Wall time: 98.1 ms


In [20]:
%%time
db2 = FAISS.from_documents(documents, cached_embedder)

CPU times: total: 0 ns
Wall time: 12.7 ms
