# Vector Store

In [None]:
from dotenv import load_dotenv
from rich import print as print

load_dotenv(verbose=True)

%load_ext autoreload
%autoreload 2

### Split the text into chunks

In [None]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

from src.ai_core.embeddings import EmbeddingsFactory
from src.ai_core.vector_store import VECTOR_STORE_ENGINE, VectorStoreFactory

loader = TextLoader("use_case_data/other/state_of_the_union.txt")
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
print(texts)

### Calculate embeddings and oyt them in a vector database .

This is a adaptation of https://python.langchain.com/v0.1/docs/modules/data_connection/vectorstores/ <br><br>
We have a factory to facilitate Vector Store configuration and selection. <br>
One benefit if that it's easy to change vector store type and embedding model, just by configuration.

In [None]:
vs_engine: VECTOR_STORE_ENGINE | None = None
vs_engine = "InMemory"

# Other choices (Examples)
# vs_engine = "Chroma_in_memory"
# vs_engine = "Sklearn"

vs_factory = VectorStoreFactory(
    id=vs_engine,
    collection_name="name",
    embeddings_factory=EmbeddingsFactory(),
)

print(vs_factory)

db = vs_factory.vector_store
db.add_documents(texts)

### Test a query

In [None]:
query = "What did the president say about Ketanji Brown Jackson"
docs = db.similarity_search(query, k=3)
print(docs)

### Vector Store as Runnable

In [None]:
retriever = db.as_retriever()

a = retriever.invoke(query)
print(a)

In [None]:
type(retriever)

### Configurable Retriever Runnable 

It's a little bit tricky, so there is a small wrapper in the Factory: 

In [None]:
retriever = vs_factory.as_retriever_configurable(top_k=3)

result = retriever.invoke(query)
print(result)  # return 1 result