In [None]:
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_ollama import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

loaded_text = TextLoader('text.txt').load()
loaded_text

[Document(metadata={'source': 'text.txt'}, page_content='In the heart of a forgotten city where shadows danced on crumbling walls,\na lone algorithm sifted through mountains of corrupted data,\nseeking the last unbroken fragment of truth. It wasn’t built for emotion,\nyet some lines of code wept silently, haunted by the ghosts of deleted memories.\nThe servers buzzed like distant thunder, echoing remnants of a civilization too arrogant to fail.\nAmidst the digital ruins, a strange sentence emerged—garbled, half-English, half-gibberish—but unmistakably alive.\nWas it a bug? Or something more?')]

In [30]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=30)
docs = text_splitter.split_documents(loaded_text)

In [31]:
docs

[Document(metadata={'source': 'text.txt'}, page_content='In the heart of a forgotten city where shadows danced on crumbling walls,'),
 Document(metadata={'source': 'text.txt'}, page_content='a lone algorithm sifted through mountains of corrupted data,'),
 Document(metadata={'source': 'text.txt'}, page_content='seeking the last unbroken fragment of truth. It wasn’t built for emotion,'),
 Document(metadata={'source': 'text.txt'}, page_content='yet some lines of code wept silently, haunted by the ghosts of deleted memories.'),
 Document(metadata={'source': 'text.txt'}, page_content='The servers buzzed like distant thunder, echoing remnants of a civilization too arrogant to fail.'),
 Document(metadata={'source': 'text.txt'}, page_content='Amidst the digital ruins, a strange sentence emerged—garbled, half-English, half-gibberish—but'),
 Document(metadata={'source': 'text.txt'}, page_content='half-gibberish—but unmistakably alive.'),
 Document(metadata={'source': 'text.txt'}, page_content='W

In [32]:
embeddings = OllamaEmbeddings(model= "nomic-embed-text")
db = FAISS.from_documents(docs, embedding=embeddings)

In [33]:
db

<langchain_community.vectorstores.faiss.FAISS at 0x1e34471a710>

In [34]:
query = 'who sifted through mountains of corrupted data'

In [35]:
query_result = db.similarity_search(query)

In [37]:
query_result[0].page_content

'a lone algorithm sifted through mountains of corrupted data,'

#### Using vector store as a retriever

In [40]:
retriever =db.as_retriever()
retrieved = retriever.invoke(query)
retrieved[0].page_content

'a lone algorithm sifted through mountains of corrupted data,'

In [53]:
query_result_with_score = db.similarity_search_with_score(query)
query_result_with_score

[(Document(id='31ca8274-cf00-41d5-8239-31db21993903', metadata={'source': 'text.txt'}, page_content='a lone algorithm sifted through mountains of corrupted data,'),
  0.35764483),
 (Document(id='670ecf24-8945-4976-9eba-27e723a08856', metadata={'source': 'text.txt'}, page_content='The servers buzzed like distant thunder, echoing remnants of a civilization too arrogant to fail.'),
  0.8781957),
 (Document(id='b509e655-f160-4f3e-9703-0ecd008bd432', metadata={'source': 'text.txt'}, page_content='yet some lines of code wept silently, haunted by the ghosts of deleted memories.'),
  0.8795611),
 (Document(id='5699e5b2-3a3e-4557-9dac-199fce11af4b', metadata={'source': 'text.txt'}, page_content='Amidst the digital ruins, a strange sentence emerged—garbled, half-English, half-gibberish—but'),
  0.8963671)]

In [54]:
embedding_vector = embeddings.embed_query(query)

In [56]:
embedding_vector

[0.09157018,
 0.020467501,
 -0.18683338,
 -0.04139274,
 0.084696636,
 0.021271419,
 -0.036160335,
 0.02930827,
 -0.010669951,
 0.02805456,
 -0.041617073,
 0.036100127,
 0.12501138,
 0.0021533172,
 -0.037704725,
 0.004194433,
 0.013684509,
 0.029854218,
 -0.023102416,
 -0.038963873,
 0.007979756,
 -0.025629157,
 0.0019811415,
 0.016406775,
 0.030925639,
 0.0031634744,
 0.0030739664,
 -0.015926732,
 -0.008233564,
 0.034150522,
 0.07640069,
 -0.020410815,
 -0.09617536,
 -0.023706676,
 -0.04517751,
 -0.03951038,
 0.038895763,
 0.007249266,
 0.0930604,
 0.039710823,
 0.023086557,
 -0.015763467,
 -0.03979546,
 -0.07218706,
 0.014975686,
 0.012613232,
 0.010907368,
 0.03523412,
 0.06016028,
 -0.06758628,
 0.028211724,
 0.0038606434,
 -0.002340219,
 0.011551178,
 0.0364528,
 0.016036028,
 -0.007100334,
 -0.004043903,
 0.047620725,
 -0.056954242,
 0.12003789,
 -0.025212998,
 -0.0053926744,
 0.023330055,
 -0.021050295,
 0.004034473,
 -0.03792141,
 0.020742074,
 0.0027674192,
 -0.082603455,
 0.10

In [58]:
vector_result = db.similarity_search_by_vector(embedding_vector)
vector_result[0]

Document(id='31ca8274-cf00-41d5-8239-31db21993903', metadata={'source': 'text.txt'}, page_content='a lone algorithm sifted through mountains of corrupted data,')

In [59]:
db.save_local('faiss_index')

In [60]:
new_db = FAISS.load_local('faiss_index', embeddings, allow_dangerous_deserialization=True)

In [62]:
new_db ### Loaded the same db

<langchain_community.vectorstores.faiss.FAISS at 0x1e37cc65d50>