### FAISS VectoreStoreDB

In [5]:

from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_ollama import OllamaEmbeddings
from langchain_text_splitters import CharacterTextSplitter

loader=TextLoader("speech.txt")
documents=loader.load()
text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=30)
docs=text_splitter.split_documents(documents)

In [6]:
docs

[Document(metadata={'source': 'speech.txt'}, page_content='This speech txt file I am just added for test Text Loader')]

In [8]:
embeddings=OllamaEmbeddings(model="nomic-embed-text")
db=FAISS.from_documents(docs, embeddings)
db

<langchain_community.vectorstores.faiss.FAISS at 0x26abaa487f0>

In [10]:
# querying
query="What does the speaker believe is the main reason the united States should enter the war?"
docs=db.similarity_search(query)
docs[0].page_content


'This speech txt file I am just added for test Text Loader'

In [12]:
# retriever
retriever=db.as_retriever()
retriever.invoke(query)
docs[0].page_content

'This speech txt file I am just added for test Text Loader'

In [13]:
# Similarity Seach with score
docs_and_score=db.similarity_search_with_score(query)
docs_and_score

[(Document(id='e534883a-c517-4da8-8c6e-8c51d8a29b43', metadata={'source': 'speech.txt'}, page_content='This speech txt file I am just added for test Text Loader'),
  np.float32(1.0611517))]

In [14]:
embeddings_vector=embeddings.embed_query(query)
embeddings_vector

[0.0043114047,
 0.10160327,
 -0.15810421,
 -0.012295218,
 0.09473876,
 0.10567994,
 -0.039573167,
 -0.021435743,
 0.05098562,
 0.014055439,
 -0.018526731,
 0.028215587,
 0.016526649,
 0.05291578,
 0.060431696,
 -0.0348125,
 -0.0028744154,
 -0.07284176,
 -0.03155356,
 0.01908758,
 -0.058523554,
 -0.0474179,
 0.005752729,
 -0.02992029,
 0.08641977,
 0.03683409,
 0.012285733,
 0.049394622,
 -0.02168984,
 0.0042131557,
 0.05917311,
 -0.05904499,
 0.0013366098,
 0.0099699665,
 -0.054942105,
 -0.0694386,
 -0.016191265,
 0.045206208,
 -0.007775142,
 -0.056944635,
 -0.0352472,
 0.0062960195,
 0.020349342,
 -0.071583845,
 0.04725183,
 -0.031015558,
 -0.014217876,
 -0.008953043,
 0.028396962,
 -0.008386847,
 0.006542047,
 -0.0712151,
 0.016372249,
 -0.037037287,
 -0.0029085216,
 0.026993494,
 -0.004563828,
 -0.012540401,
 -0.011191115,
 -0.0029580412,
 0.0518671,
 0.038632777,
 -0.037911568,
 0.012649537,
 -0.009968393,
 -0.013989938,
 -0.033614952,
 0.022578122,
 0.057066288,
 -0.030092536,
 0.

In [15]:
docs_score=db.similarity_search_by_vector(embeddings_vector)
docs_score

[Document(id='e534883a-c517-4da8-8c6e-8c51d8a29b43', metadata={'source': 'speech.txt'}, page_content='This speech txt file I am just added for test Text Loader')]

In [16]:
# Saving and Loading
db.save_local("faiss_index")

In [19]:
new_df=FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
docs=new_df.similarity_search(query)

In [20]:
docs

[Document(id='e534883a-c517-4da8-8c6e-8c51d8a29b43', metadata={'source': 'speech.txt'}, page_content='This speech txt file I am just added for test Text Loader')]