# FAISS(facebook AI similarity search)

In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import CharacterTextSplitter

In [2]:
loader = TextLoader(r"speeh.txt")
documents = loader.load()
documents

[Document(metadata={'source': 'speeh.txt'}, page_content='I have a dream that one day every valley shall be exalted, every hill and mountain shall be made low, the rough places will be made plain, and the crooked places will be made straight; and the glory of the Lord shall be revealed, and all flesh shall see it together.\n\nThis is our hope. This is the faith that I go back to the South with.\n\nWith this faith, we will be able to hew out of the mountain of despair a stone of hope.')]

In [3]:
text_splitter = CharacterTextSplitter(chunk_size = 200 , chunk_overlap = 10)
docs = text_splitter.split_documents(documents)
docs

Created a chunk of size 266, which is longer than the specified 200


[Document(metadata={'source': 'speeh.txt'}, page_content='I have a dream that one day every valley shall be exalted, every hill and mountain shall be made low, the rough places will be made plain, and the crooked places will be made straight; and the glory of the Lord shall be revealed, and all flesh shall see it together.'),
 Document(metadata={'source': 'speeh.txt'}, page_content='This is our hope. This is the faith that I go back to the South with.\n\nWith this faith, we will be able to hew out of the mountain of despair a stone of hope.')]

In [4]:
embeddings = OllamaEmbeddings(model ="gemma:2b")  #bydefault is llama2 but we have installed gemm2:b
db = FAISS.from_documents(docs, embeddings)
db   
# params splitted docs and embeddings model

  embeddings = OllamaEmbeddings(model ="gemma:2b")  #bydefault is llama2 but we have installed gemm2:b


<langchain_community.vectorstores.faiss.FAISS at 0x2b9c27300d0>

In [5]:
# query
query = "What is the main topic of the speech?"
docs = db.similarity_search(query)
docs

[Document(id='fc2ef269-040a-45c1-b42e-4192a162c031', metadata={'source': 'speeh.txt'}, page_content='This is our hope. This is the faith that I go back to the South with.\n\nWith this faith, we will be able to hew out of the mountain of despair a stone of hope.'),
 Document(id='80c55ead-12ae-496c-87c6-222d46276258', metadata={'source': 'speeh.txt'}, page_content='I have a dream that one day every valley shall be exalted, every hill and mountain shall be made low, the rough places will be made plain, and the crooked places will be made straight; and the glory of the Lord shall be revealed, and all flesh shall see it together.')]

In [6]:
docs[0].page_content

'This is our hope. This is the faith that I go back to the South with.\n\nWith this faith, we will be able to hew out of the mountain of despair a stone of hope.'

# Retriver, we can also convert VectorDB into retriver class

In [7]:
retriever = db.as_retriever()  # Retriver, we can also convert VectorDB into retriver class
doc2 = retriever.invoke(query)
doc2


[Document(id='fc2ef269-040a-45c1-b42e-4192a162c031', metadata={'source': 'speeh.txt'}, page_content='This is our hope. This is the faith that I go back to the South with.\n\nWith this faith, we will be able to hew out of the mountain of despair a stone of hope.'),
 Document(id='80c55ead-12ae-496c-87c6-222d46276258', metadata={'source': 'speeh.txt'}, page_content='I have a dream that one day every valley shall be exalted, every hill and mountain shall be made low, the rough places will be made plain, and the crooked places will be made straight; and the glory of the Lord shall be revealed, and all flesh shall see it together.')]

In [8]:
doc2[0].page_content

'This is our hope. This is the faith that I go back to the South with.\n\nWith this faith, we will be able to hew out of the mountain of despair a stone of hope.'

# we can also get Score and Manhattun distance

In [9]:
docs_and_score = db.similarity_search_with_score(query)
docs_and_score

[(Document(id='fc2ef269-040a-45c1-b42e-4192a162c031', metadata={'source': 'speeh.txt'}, page_content='This is our hope. This is the faith that I go back to the South with.\n\nWith this faith, we will be able to hew out of the mountain of despair a stone of hope.'),
  np.float32(2805.1855)),
 (Document(id='80c55ead-12ae-496c-87c6-222d46276258', metadata={'source': 'speeh.txt'}, page_content='I have a dream that one day every valley shall be exalted, every hill and mountain shall be made low, the rough places will be made plain, and the crooked places will be made straight; and the glory of the Lord shall be revealed, and all flesh shall see it together.'),
  np.float32(3036.3738))]

# similarity search by vector

can we directly pass vector intead of sentnces

In [10]:
embedding_vector = embeddings.embed_query(query)
embedding_vector

[-0.2216968834400177,
 -1.4594179391860962,
 0.04175268113613129,
 1.3306292295455933,
 2.2590956687927246,
 2.094722270965576,
 1.5485420227050781,
 0.013651345856487751,
 0.14809277653694153,
 -0.5808722376823425,
 2.0421340465545654,
 0.9328432679176331,
 0.8309697508811951,
 1.6767103672027588,
 -0.5568175315856934,
 -0.48392319679260254,
 2.4952609539031982,
 1.6788465976715088,
 -0.4355024993419647,
 -0.10034627467393875,
 2.640976905822754,
 -0.387329638004303,
 0.25670549273490906,
 -1.1861199140548706,
 0.03440987691283226,
 -1.5759005546569824,
 -0.25240468978881836,
 0.8928446173667908,
 1.0484150648117065,
 -0.5874263048171997,
 -0.15426136553287506,
 0.25974780321121216,
 -0.44210153818130493,
 -0.4141502380371094,
 0.29593807458877563,
 0.7072330713272095,
 0.32057029008865356,
 0.5346298217773438,
 0.6970640420913696,
 -0.5594256520271301,
 -2.077662467956543,
 -0.9791064262390137,
 0.6519126892089844,
 -0.9759610295295715,
 -0.29215386509895325,
 -1.0008976459503174,
 1

In [11]:
doc_score = db.similarity_search_by_vector(embedding_vector)
doc_score

[Document(id='fc2ef269-040a-45c1-b42e-4192a162c031', metadata={'source': 'speeh.txt'}, page_content='This is our hope. This is the faith that I go back to the South with.\n\nWith this faith, we will be able to hew out of the mountain of despair a stone of hope.'),
 Document(id='80c55ead-12ae-496c-87c6-222d46276258', metadata={'source': 'speeh.txt'}, page_content='I have a dream that one day every valley shall be exalted, every hill and mountain shall be made low, the rough places will be made plain, and the crooked places will be made straight; and the glory of the Lord shall be revealed, and all flesh shall see it together.')]

# save and loading

In [12]:
db.save_local("faiss_index")  # save the index
# saved as .pkl file

In [13]:
# we also can load the file
new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
new_db 

<langchain_community.vectorstores.faiss.FAISS at 0x2b9fb818580>

In [14]:
docs = new_db.similarity_search(query)
docs

[Document(id='fc2ef269-040a-45c1-b42e-4192a162c031', metadata={'source': 'speeh.txt'}, page_content='This is our hope. This is the faith that I go back to the South with.\n\nWith this faith, we will be able to hew out of the mountain of despair a stone of hope.'),
 Document(id='80c55ead-12ae-496c-87c6-222d46276258', metadata={'source': 'speeh.txt'}, page_content='I have a dream that one day every valley shall be exalted, every hill and mountain shall be made low, the rough places will be made plain, and the crooked places will be made straight; and the glory of the Lord shall be revealed, and all flesh shall see it together.')]