# Vector Stores
Vector stores are specialized databases that:
- Store and manage large amounts of numerical data.
- Enable semantic search capabilities

In [None]:
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_ollama import ChatOllama, OllamaEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

embedding_model = OllamaEmbeddings(model='snowflake-arctic-embed:22m')

## Basics of Vector Store

In [None]:
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_core.documents import Document

# Initialize with an embedding model
vector_store = InMemoryVectorStore(embedding=embedding_model)

# Create a document
document_1 = Document(
    page_content="I had chocalate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "news"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

documents = [document_1, document_2]
# Add documents
vector_store.add_documents(documents=documents)

# Add documents with custom IDs
vector_store.add_documents(documents=documents, ids=["doc1", "doc2"])

# Delete documents
vector_store.delete(ids=["doc1"])

# Search for similar documents
query = "What is the weather forecast for tomorrow?"

results = vector_store.similarity_search(query, k=5)
results

## Embedding Models and Text Loaders

In [None]:
fp = "data/constitution.pdf"

loader = PyPDFLoader(fp)

docs = loader.load()
print(docs[4])

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)

documents = text_splitter.split_documents(docs)
print(len(documents))
print(documents[4])

## Vector Store Creation Using Chroma and FAISS

In [None]:
from uuid import uuid4
from langchain_chroma import Chroma
# init vector store
vector_store = Chroma(
    collection_name="constitution",
    embedding_function=embedding_model,
    persist_directory="chroma",
)

uuids = [str(uuid4()) for _ in range(len(documents))]

# Add documents
vector_store.add_documents(documents=documents, ids=uuids)


In [None]:
# Update documents
vector_store.update_documents(
    ids=uuids[:2], documents=[document_1, document_2]
)

### FAISS

In [21]:
from langchain_community.vectorstores import FAISS

################### FAISS ###################
vector_store_faiss = FAISS.from_documents(documents, embedding_model, ids=uuids)

vector_store_faiss.save_local('faiss')

## Similarity search

In [22]:
query = "What is the role of the President of the United States? What are the powers of the President of the United States?"

results = vector_store.similarity_search(query, k=4,  filter={"source": "news"})

results

[Document(metadata={'page': 0, 'source': 'news'}, page_content='I had chocalate chip pancakes and scrambled eggs for breakfast this morning.'),
 Document(metadata={'page': 0, 'source': 'news'}, page_content='I had chocalate chip pancakes and scrambled eggs for breakfast this morning.'),
 Document(metadata={'page': 0, 'source': 'news'}, page_content='I had chocalate chip pancakes and scrambled eggs for breakfast this morning.'),
 Document(metadata={'page': 0, 'source': 'news'}, page_content='I had chocalate chip pancakes and scrambled eggs for breakfast this morning.')]

### Similarity search by vector

In [23]:
embedding_vector = embedding_model.embed_query(query)

docs = vector_store_faiss.similarity_search_by_vector(embedding_vector, k=5)

docs

[Document(metadata={'source': 'data/constitution.pdf', 'page': 12}, page_content='to the President of the Senate;-the President of the Senate \nshall, in the presence of the Senate and House of Represen- \ntatives, open all the certificates and the votes shall then be \ncounted;-The person having the greatest number of votes \nfor President, shall be the President, if such number be a \nmajority of the whole number of Electors appointed; and if \nno person have such majority, then from the persons having \nthe highest numbers not exceeding three on the list of those'),
 Document(metadata={'source': 'data/constitution.pdf', 'page': 16}, page_content='shall be elected to the office of President more than once. \nBut this Article shall not apply to any person holding the \noffice of President when this Article was proposed by Con- \ngress, and shall not prevent any person who may be holding \nthe office of President, or acting as President, during the \nterm within which this Article beco

## Asynchronous Vector Store

In [None]:

docs = await vector_store_faiss.asimilarity_search(query)

docs

[Document(metadata={'source': 'data/constitution.pdf', 'page': 12}, page_content='to the President of the Senate;-the President of the Senate \nshall, in the presence of the Senate and House of Represen- \ntatives, open all the certificates and the votes shall then be \ncounted;-The person having the greatest number of votes \nfor President, shall be the President, if such number be a \nmajority of the whole number of Electors appointed; and if \nno person have such majority, then from the persons having \nthe highest numbers not exceeding three on the list of those'),
 Document(metadata={'source': 'data/constitution.pdf', 'page': 16}, page_content='shall be elected to the office of President more than once. \nBut this Article shall not apply to any person holding the \noffice of President when this Article was proposed by Con- \ngress, and shall not prevent any person who may be holding \nthe office of President, or acting as President, during the \nterm within which this Article beco

# Chat with Vector Store

In [25]:
from langchain.prompts import PromptTemplate

llm = ChatOllama(model='llama3.2:1b')

In [26]:
# Perform a vector store similarity search
context_results = vector_store_faiss.similarity_search(query, k=4)

# Create a prompt template and fill it with context
prompt_template = PromptTemplate(
    input_variables=["context", "query"],
    template="Based on the following context:\n{context}\n\n{query}"
)
print(prompt_template)

input_variables=['context', 'query'] input_types={} partial_variables={} template='Based on the following context:\n{context}\n\n{query}'


In [27]:
# Fill the prompt template with the search results and the query
full_prompt = prompt_template.format(
    context="\n".join([doc.page_content for doc in context_results]),
    query=query
)
print(full_prompt)

Based on the following context:
to the President of the Senate;-the President of the Senate 
shall, in the presence of the Senate and House of Represen- 
tatives, open all the certificates and the votes shall then be 
counted;-The person having the greatest number of votes 
for President, shall be the President, if such number be a 
majority of the whole number of Electors appointed; and if 
no person have such majority, then from the persons having 
the highest numbers not exceeding three on the list of those
shall be elected to the office of President more than once. 
But this Article shall not apply to any person holding the 
office of President when this Article was proposed by Con- 
gress, and shall not prevent any person who may be holding 
the office of President, or acting as President, during the 
term within which this Article becomes operative from 
holding the office of President or acting as President during 
the remainder of such term. 
SECTION 2
and Duties of the said Of

In [30]:
# Use the language model to generate a response
response = llm.invoke(full_prompt)
print(response.content)

Based on the provided text, the role of the President of the United States is as follows:

1. **President of the Senate**: The President shall open all certificates and votes in the presence of both the Senate and House of Representatives.
2. **Duties:** The duties devolve on the Vice President, which includes assuming the powers and duties of the office if the President is unable to perform them.

As for the powers of the President, they include:

1. **Dischargeing the Powers and Duties**: If the President is unable to discharge their powers and duties, the Vice President will assume those responsibilities.
2. **Transmitting to the Senate and House a Written Declaration**: The President must transmit a written declaration to Congress stating that there are no unable presidents or vice-presidents, which would trigger the assumption of presidential powers by the Vice President.
3. **Returning Powers Unless Ability Existed**: If the President transmits this declaration to Congress, they 