# Vector Stores

## Introduction

In [None]:
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_ollama import ChatOllama, OllamaEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

embedding_model = OllamaEmbeddings(model='snowflake-arctic-embed:22m')


## Basics of Vector Stores

In [None]:
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_core.documents import Document

# Initialize with an embedding model
vector_store = InMemoryVectorStore(embedding=embedding_model)

# Create a document
document_1 = Document(
    page_content="I had chocalate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

documents = [document_1, document_2]
# Add documents
vector_store.add_documents(documents=documents)
# Add documents with custom IDs
vector_store.add_documents(documents=documents, ids=["doc1", "doc2"])

# Delete documents
vector_store.delete(ids=["doc1"])

# Search for similar documents
query = "What is the weather forecast for tomorrow?"
results = vector_store.similarity_search(query, k=5)
print(results)

## Embedding Models and Text Loaders

In [None]:
fp = "data/constitution.pdf"

loader = PyPDFLoader(fp)

docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)

documents = text_splitter.split_documents(docs)
print(len(documents))

## Vector Store Creation Using Chroma and FAISS

In [None]:
################### CHROMA ###################
from langchain_chroma import Chroma

db = Chroma.from_documents(documents, embedding_model, persist_directory="./chroma")

################### FAISS ###################
from langchain_community.vectorstores import FAISS
db = FAISS.from_documents(documents, embedding_model)

## Similarity search

In [None]:
query = "What is the role of the President of the United States? What are the powers of the President of the United States?"

results = db.similarity_search(query, k=5)

results

### Similarity search by vector

In [None]:
embedding_vector = embedding_model.embed_query(query)
docs = db.similarity_search_by_vector(embedding_vector, k=5)

docs

## Asynchronous Vector Store

In [None]:
docs = await db.asimilarity_search(query)
docs


# Chat with Vector Store

In [None]:
from langchain.prompts import PromptTemplate

llm = ChatOllama(model='llama3.2:1b')

In [None]:
# Perform a vector store similarity search
context_results = db.similarity_search(query, k=4)

# Create a prompt template and fill it with context
prompt_template = PromptTemplate(
    input_variables=["context", "query"],
    template="Based on the following context:\n{context}\n\n{query}"
)

# Fill the prompt template with the search results and the query
full_prompt = prompt_template.format(
    context="\n".join([doc.page_content for doc in context_results]),
    query=query
)

# Use the language model to generate a response
response = llm.invoke(full_prompt)
print(response.content)