In [1]:
# Vector Store DB

# FAISS : Facebook AI Similarity Search

In [2]:
from langchain_community.document_loaders import TextLoader

from langchain_community.embeddings import OllamaEmbeddings

from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain_community.vectorstores import FAISS

In [7]:
# loading .txt

loader = TextLoader('speech.txt')

documents = loader.load()

In [9]:
# text splitting

text_splitter = RecursiveCharacterTextSplitter(chunk_size=200,chunk_overlap=30)

docs = text_splitter.split_documents(documents)

type(docs[0])

langchain_core.documents.base.Document

In [14]:
# embedding

embeds = OllamaEmbeddings(model='mxbai-embed-large')

In [15]:
query = "what is the main reason for united states to enter the war?"

In [None]:
# vector DB

# similarity search

db = FAISS.from_documents(docs,embeds)

result = db.similarity_search(query)

result[0]

Document(id='b177fea4-e20e-42b1-a254-8414a2b78b4f', metadata={'source': 'speech.txt'}, page_content='ahead of us. It is a fearful thing to lead this great peaceful people into war, into the most terrible and disastrous of all wars, civilization itself seeming to be in the balance. But the right is')

In [17]:
result[0].page_content

'ahead of us. It is a fearful thing to lead this great peaceful people into war, into the most terrible and disastrous of all wars, civilization itself seeming to be in the balance. But the right is'

In [18]:
# similarity search with score

result_score = db.similarity_search_with_score(query)

result_score[0]

(Document(id='b177fea4-e20e-42b1-a254-8414a2b78b4f', metadata={'source': 'speech.txt'}, page_content='ahead of us. It is a fearful thing to lead this great peaceful people into war, into the most terrible and disastrous of all wars, civilization itself seeming to be in the balance. But the right is'),
 267.1466)

In [20]:
# with retriever 

retriever = db.as_retriever()

res = retriever.invoke(query)

res[0]

Document(id='b177fea4-e20e-42b1-a254-8414a2b78b4f', metadata={'source': 'speech.txt'}, page_content='ahead of us. It is a fearful thing to lead this great peaceful people into war, into the most terrible and disastrous of all wars, civilization itself seeming to be in the balance. But the right is')

In [22]:
# vector similarity search

vecs = embeds.embed_query(query)

result = db.similarity_search_by_vector(vecs)

result[0]

Document(id='b177fea4-e20e-42b1-a254-8414a2b78b4f', metadata={'source': 'speech.txt'}, page_content='ahead of us. It is a fearful thing to lead this great peaceful people into war, into the most terrible and disastrous of all wars, civilization itself seeming to be in the balance. But the right is')

In [23]:
# saving vector DB locally 

db.save_local('faiss_index')

In [28]:
# loading vector DB

new_db = FAISS.load_local('faiss_index',embeds,allow_dangerous_deserialization=True)

docs = new_db.similarity_search(query)

docs[0]

Document(id='b177fea4-e20e-42b1-a254-8414a2b78b4f', metadata={'source': 'speech.txt'}, page_content='ahead of us. It is a fearful thing to lead this great peaceful people into war, into the most terrible and disastrous of all wars, civilization itself seeming to be in the balance. But the right is')