## Faiss --> Facebook AI Similarity Search

In [10]:
from langchain_community.document_loaders import TextLoader

text = TextLoader("data/intro.txt").load()

In [23]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

docs = RecursiveCharacterTextSplitter(chunk_size = 200, chunk_overlap = 30).split_documents(text)

In [24]:
from langchain_community.embeddings import OllamaEmbeddings

embeddings = OllamaEmbeddings(model = "gemma2:2b")

In [None]:
from langchain_community.vectorstores import FAISS

db = FAISS.from_documents(docs, embeddings)
db

<langchain_community.vectorstores.faiss.FAISS at 0x21d73094370>

In [25]:
query = "I recently completed my Post Graduation Diploma in Artificial Intelligence from?"
docs = db.similarity_search(query)
docs[3].page_content

'I recently completed my Post Graduation Diploma in Artificial Intelligence from CDAC pune. and I have completed my graduation from Dy Patil RAIT, Navi Mumbai in year 2023.'

## Retriever

In [26]:
retriever = db.as_retriever()
retriever.invoke(query)

[Document(metadata={'source': 'data/intro.txt'}, page_content='and Algorithms and have solved DSA problems on websites like LeetCode, HackerRank, GeeksforGeeks and AlgoExpert.'),
 Document(metadata={'source': 'data/intro.txt'}, page_content='Based on all this knowledge, I recently created a project named code mixed text translation using python and NLP libraries like PyTorch, NumPy and regular expression.'),
 Document(metadata={'source': 'data/intro.txt'}, page_content="First of all, Thank you for giving me this opportunity.\nI'm Rishikesh Krishna Patil, from New Mumbai."),
 Document(metadata={'source': 'data/intro.txt'}, page_content='I recently completed my Post Graduation Diploma in Artificial Intelligence from CDAC pune. and I have completed my graduation from Dy Patil RAIT, Navi Mumbai in year 2023.')]

## Similarity Search With Score

In [27]:
docs_and_score = db.similarity_search_with_score(query)
docs_and_score

[(Document(metadata={'source': 'data/intro.txt'}, page_content='and Algorithms and have solved DSA problems on websites like LeetCode, HackerRank, GeeksforGeeks and AlgoExpert.'),
  8498.629),
 (Document(metadata={'source': 'data/intro.txt'}, page_content='Based on all this knowledge, I recently created a project named code mixed text translation using python and NLP libraries like PyTorch, NumPy and regular expression.'),
  9705.582),
 (Document(metadata={'source': 'data/intro.txt'}, page_content="First of all, Thank you for giving me this opportunity.\nI'm Rishikesh Krishna Patil, from New Mumbai."),
  11257.398),
 (Document(metadata={'source': 'data/intro.txt'}, page_content='I recently completed my Post Graduation Diploma in Artificial Intelligence from CDAC pune. and I have completed my graduation from Dy Patil RAIT, Navi Mumbai in year 2023.'),
  11566.864)]

## Saving and Loading
Saving in the form of pickle

In [28]:
db.save_local("faiss_index")

In [None]:
new_db = FAISS.load_local("faiss_index", embeddings) 
# we get error because we are loading a pickle file, which can be modified to deliver a malicious payload

In [33]:
new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True) 
docs = db.similarity_search(query)
docs

[Document(metadata={'source': 'data/intro.txt'}, page_content='and Algorithms and have solved DSA problems on websites like LeetCode, HackerRank, GeeksforGeeks and AlgoExpert.'),
 Document(metadata={'source': 'data/intro.txt'}, page_content='Based on all this knowledge, I recently created a project named code mixed text translation using python and NLP libraries like PyTorch, NumPy and regular expression.'),
 Document(metadata={'source': 'data/intro.txt'}, page_content="First of all, Thank you for giving me this opportunity.\nI'm Rishikesh Krishna Patil, from New Mumbai."),
 Document(metadata={'source': 'data/intro.txt'}, page_content='I recently completed my Post Graduation Diploma in Artificial Intelligence from CDAC pune. and I have completed my graduation from Dy Patil RAIT, Navi Mumbai in year 2023.')]