In [None]:
# FAISS -> FACEBOOK AI SIMILARITY SEARCH
# LIBRARY FOR EFFICINET SIMILARITY SEARCHA AND CLUSTERING OF DENSE VECTORS
# IT CONTAINS ALGORITHMS THAT SEARCH IN SETS OF VECTORS OF ANY SIZE, UP TO ONES THAT POSSIBLY DO NOT FIT IN RAM
# IT ALSO CONTAINS SUPPORTING CODE FOR EVALUATION AND PARAMETER TUNING

In [None]:
from langchain_community.document_loaders import TextLoader    # can load text into documents
from langchain_community.vectorstores import FAISS             # can store embedded data
from langchain_community.embeddings import OllamaEmbeddings    # text can be embed using ollamaembeddings 
from langchain.text_splitter import CharacterTextSplitter      # split the text into chunks

loader = TextLoader("speech.txt")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000,chunk_overlap=30)  
docs = text_splitter.split_documents(documents)

In [None]:
# Convert to embeddings
embeddings = OllamaEmbeddings()            
db = FAISS.from_documents(docs,embeddings)    # will receive document and type(embedding) performed on it
db

In [None]:
# querying
query = "What does the speaker believe is the main reason the United State should enter the war?"
query2 = "How does the speaker describe the desired outcome of the war?"
docs = db.similarity_search(query2)
docs[0].page_content

In [None]:
# As a Retrieverr
# we can also store retriever into a retriever class -> this allows us to easily use it in other langchain
# methods, which largely work with retrievers
# Why convert to retriever -> to work with different llm models requirement is to be converted into retriever
# so that able to use with any llm model
# as largely work with other langchain methods 

retriever = db.as_retriever()
docs = retriever.invoke(query2)
docs[0].page_content

In [None]:
# Similarity Search with Score 
# returns not only document but also the distance score of the query to them

docs_and_score = db.similarity_search_with_score(query2)
docs_and_score

In [None]:
embedding_vector = embeddings.embed_query(query2)

In [None]:
docs_score = db.similarity_search_by_vector(embedding_vector)
docs_score

In [None]:
# Saving and Loading

db.save_local("faiss_index")

In [None]:
new_db = FAISS.load_local("faiss_index",embeddings,allow_dangerous_deserialization=True)
docs = new_db.similarity_search(query2)