In [3]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

loader=TextLoader('speech.txt')
docs=loader.load()

In [6]:
spiltters=RecursiveCharacterTextSplitter(chunk_size=100,chunk_overlap=10)
final_docs=spiltters.split_documents(docs)

In [12]:
from langchain_community.embeddings import  OllamaEmbeddings
embedding = OllamaEmbeddings(model="gemma:2b")

In [13]:
from langchain_community.vectorstores import FAISS
db=FAISS.from_documents(final_docs,embedding)
db

<langchain_community.vectorstores.faiss.FAISS at 0x23d79a9bf40>

In [14]:
query="How does the speaker describe the desired outcome of the war?"
docs=db.similarity_search(query)
docs[0].page_content

'safety to all nations and make the world itself at last free.'

#### As a Retriever
We can also convert the vectorstore into a Retriever class. This allows us to easily use it in other LangChain methods, which largely work with retrievers

In [16]:
retrival=db.as_retriever()
retrival.invoke(query)[0].page_content

'safety to all nations and make the world itself at last free.'

In [18]:
# Similarity search with score
db.similarity_search_with_score(query)

[(Document(id='8f29ced4-ec6b-411b-b6db-2a6f2761c6af', metadata={'source': 'speech.txt'}, page_content='safety to all nations and make the world itself at last free.'),
  np.float32(2734.0242)),
 (Document(id='0e200634-2c2a-4aae-9670-a6d0a26a3891', metadata={'source': 'speech.txt'}, page_content='in thus addressing you. There are, it may be, many months of fiery trial and sacrifice ahead of us.'),
  np.float32(2901.067)),
 (Document(id='d272983e-2aa1-44f2-9052-f5dcc1e972be', metadata={'source': 'speech.txt'}, page_content='when those rights have been made as secure as the faith and the freedom of nations can make them.'),
  np.float32(3076.7524)),
 (Document(id='37f45335-2e86-4095-b4dc-29ac47943c44', metadata={'source': 'speech.txt'}, page_content='will lift it only here and there and without countenance except from a lawless and malignant few.'),
  np.float32(3104.8835))]

In [19]:
# Similarity search with vector
vec=embedding.embed_query(query)

In [20]:
db.similarity_search_by_vector(vec)

[Document(id='8f29ced4-ec6b-411b-b6db-2a6f2761c6af', metadata={'source': 'speech.txt'}, page_content='safety to all nations and make the world itself at last free.'),
 Document(id='0e200634-2c2a-4aae-9670-a6d0a26a3891', metadata={'source': 'speech.txt'}, page_content='in thus addressing you. There are, it may be, many months of fiery trial and sacrifice ahead of us.'),
 Document(id='d272983e-2aa1-44f2-9052-f5dcc1e972be', metadata={'source': 'speech.txt'}, page_content='when those rights have been made as secure as the faith and the freedom of nations can make them.'),
 Document(id='37f45335-2e86-4095-b4dc-29ac47943c44', metadata={'source': 'speech.txt'}, page_content='will lift it only here and there and without countenance except from a lawless and malignant few.')]

In [21]:
db.save_local("faiss db")

In [24]:
new_db=FAISS.load_local('faiss db',embedding,allow_dangerous_deserialization=True)

In [25]:
new_db.similarity_search(query)

[Document(id='8f29ced4-ec6b-411b-b6db-2a6f2761c6af', metadata={'source': 'speech.txt'}, page_content='safety to all nations and make the world itself at last free.'),
 Document(id='0e200634-2c2a-4aae-9670-a6d0a26a3891', metadata={'source': 'speech.txt'}, page_content='in thus addressing you. There are, it may be, many months of fiery trial and sacrifice ahead of us.'),
 Document(id='d272983e-2aa1-44f2-9052-f5dcc1e972be', metadata={'source': 'speech.txt'}, page_content='when those rights have been made as secure as the faith and the freedom of nations can make them.'),
 Document(id='37f45335-2e86-4095-b4dc-29ac47943c44', metadata={'source': 'speech.txt'}, page_content='will lift it only here and there and without countenance except from a lawless and malignant few.')]