# FAISS

Facebook AI Similarity Search

In [2]:
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_ollama import OllamaEmbeddings
from langchain_text_splitters import CharacterTextSplitter

loader = TextLoader("speech.txt")
document = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=30)
docs = text_splitter.split_documents(document)


In [3]:
docs

[Document(metadata={'source': 'speech.txt'}, page_content='Good morning everyone, Thank you all for being here today. It’s an honor to stand before you and share a few thoughts about the future of technology and innovation. Over the past decade, we’ve witnessed remarkable changes. Artificial Intelligence is no longer a distant dream—it’s transforming our lives in real time. From personalized recommendations to autonomous vehicles, AI is everywhere. But with great power comes great responsibility. (Sí, como dijo el Tío Ben — "With great power comes great responsibility.") We must ensure that our innovations are ethical, inclusive, and sustainable. Let’s use technology not just to build smarter machines, but to create a better world for everyone. Together, with passion and purpose, we can shape a brighter tomorrow. Thank you, and let’s get to work!')]

In [4]:
embeddings = OllamaEmbeddings(model="gemma:2b")
db = FAISS.from_documents(docs, embeddings)
db

<langchain_community.vectorstores.faiss.FAISS at 0x721ee41e0e00>

In [5]:
### Querying the Vector DataBase
query = "What is the main topic of the speech?"
similarity = db.similarity_search(query)
similarity[0].page_content

'Good morning everyone, Thank you all for being here today. It’s an honor to stand before you and share a few thoughts about the future of technology and innovation. Over the past decade, we’ve witnessed remarkable changes. Artificial Intelligence is no longer a distant dream—it’s transforming our lives in real time. From personalized recommendations to autonomous vehicles, AI is everywhere. But with great power comes great responsibility. (Sí, como dijo el Tío Ben — "With great power comes great responsibility.") We must ensure that our innovations are ethical, inclusive, and sustainable. Let’s use technology not just to build smarter machines, but to create a better world for everyone. Together, with passion and purpose, we can shape a brighter tomorrow. Thank you, and let’s get to work!'

#### As a Retriver

We can also convert the VectorStore into a Retrive class. This allows us to easily use it in other LangChain methods, which largely work with retirevers

In [6]:
retriever = db.as_retriever()
retriever.invoke(query)

[Document(id='0692d090-1106-4f85-9a84-e45eadc0e63c', metadata={'source': 'speech.txt'}, page_content='Good morning everyone, Thank you all for being here today. It’s an honor to stand before you and share a few thoughts about the future of technology and innovation. Over the past decade, we’ve witnessed remarkable changes. Artificial Intelligence is no longer a distant dream—it’s transforming our lives in real time. From personalized recommendations to autonomous vehicles, AI is everywhere. But with great power comes great responsibility. (Sí, como dijo el Tío Ben — "With great power comes great responsibility.") We must ensure that our innovations are ethical, inclusive, and sustainable. Let’s use technology not just to build smarter machines, but to create a better world for everyone. Together, with passion and purpose, we can shape a brighter tomorrow. Thank you, and let’s get to work!')]

### Similarity Search with score


In [None]:
docs_and_score = db.similarity_search_with_score(query)
docs_and_score

[(Document(id='0692d090-1106-4f85-9a84-e45eadc0e63c', metadata={'source': 'speech.txt'}, page_content='Good morning everyone, Thank you all for being here today. It’s an honor to stand before you and share a few thoughts about the future of technology and innovation. Over the past decade, we’ve witnessed remarkable changes. Artificial Intelligence is no longer a distant dream—it’s transforming our lives in real time. From personalized recommendations to autonomous vehicles, AI is everywhere. But with great power comes great responsibility. (Sí, como dijo el Tío Ben — "With great power comes great responsibility.") We must ensure that our innovations are ethical, inclusive, and sustainable. Let’s use technology not just to build smarter machines, but to create a better world for everyone. Together, with passion and purpose, we can shape a brighter tomorrow. Thank you, and let’s get to work!'),
  0.49191225)]

In [8]:
embedding_vector = embeddings.embed_query(query)
embedding_vector

[0.00028978632,
 -0.011724328,
 -0.0028519742,
 0.022267543,
 0.02938321,
 0.026097065,
 0.010458554,
 0.003190556,
 0.0019705582,
 -0.0025764012,
 0.025553439,
 0.0002656393,
 0.00024887963,
 0.02174082,
 -0.011695804,
 -0.007944859,
 0.036869075,
 0.013346077,
 -0.009612358,
 0.0004970742,
 0.034251012,
 -0.0024571917,
 -0.0048738867,
 -0.009729032,
 -0.008867427,
 -0.014459562,
 -0.0071055326,
 0.018284777,
 0.016856601,
 -0.00762889,
 0.008399721,
 -0.0069725495,
 8.7342574e-05,
 -0.007638046,
 0.0060817213,
 0.017767664,
 -0.0015139445,
 0.0027486046,
 0.014588318,
 -0.0069169835,
 -0.028687442,
 -0.01269479,
 0.005997105,
 -0.014562394,
 -0.0075492486,
 -0.015217048,
 0.020078482,
 0.016345903,
 0.01992014,
 -0.0045894776,
 -0.32476693,
 -0.2819034,
 0.010137328,
 -0.003469059,
 0.020949854,
 -0.008901532,
 -0.007962832,
 0.013005006,
 -0.01009704,
 -0.010033245,
 -0.0049236347,
 -0.0032070312,
 -0.007852924,
 -0.009939347,
 0.005986498,
 -0.0017338592,
 -0.01350925,
 -0.00516032

In [10]:
db.similarity_search_by_vector(embedding_vector)

[Document(id='0692d090-1106-4f85-9a84-e45eadc0e63c', metadata={'source': 'speech.txt'}, page_content='Good morning everyone, Thank you all for being here today. It’s an honor to stand before you and share a few thoughts about the future of technology and innovation. Over the past decade, we’ve witnessed remarkable changes. Artificial Intelligence is no longer a distant dream—it’s transforming our lives in real time. From personalized recommendations to autonomous vehicles, AI is everywhere. But with great power comes great responsibility. (Sí, como dijo el Tío Ben — "With great power comes great responsibility.") We must ensure that our innovations are ethical, inclusive, and sustainable. Let’s use technology not just to build smarter machines, but to create a better world for everyone. Together, with passion and purpose, we can shape a brighter tomorrow. Thank you, and let’s get to work!')]

In [11]:
### Saving and loading the Vector DataBase
db.save_local("faiss_index")

In [13]:
new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
new_db.similarity_search(query)

[Document(id='0692d090-1106-4f85-9a84-e45eadc0e63c', metadata={'source': 'speech.txt'}, page_content='Good morning everyone, Thank you all for being here today. It’s an honor to stand before you and share a few thoughts about the future of technology and innovation. Over the past decade, we’ve witnessed remarkable changes. Artificial Intelligence is no longer a distant dream—it’s transforming our lives in real time. From personalized recommendations to autonomous vehicles, AI is everywhere. But with great power comes great responsibility. (Sí, como dijo el Tío Ben — "With great power comes great responsibility.") We must ensure that our innovations are ethical, inclusive, and sustainable. Let’s use technology not just to build smarter machines, but to create a better world for everyone. Together, with passion and purpose, we can shape a brighter tomorrow. Thank you, and let’s get to work!')]

# Chroma

In [15]:
## Building sample vectordb
from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_ollama import OllamaEmbeddings
from langchain_text_splitters import CharacterTextSplitter

In [17]:
loader = TextLoader("speech.txt")
document = loader.load()
document

[Document(metadata={'source': 'speech.txt'}, page_content='Good morning everyone, Thank you all for being here today. It’s an honor to stand before you and share a few thoughts about the future of technology and innovation. Over the past decade, we’ve witnessed remarkable changes. Artificial Intelligence is no longer a distant dream—it’s transforming our lives in real time. From personalized recommendations to autonomous vehicles, AI is everywhere. But with great power comes great responsibility. (Sí, como dijo el Tío Ben — "With great power comes great responsibility.") We must ensure that our innovations are ethical, inclusive, and sustainable. Let’s use technology not just to build smarter machines, but to create a better world for everyone. Together, with passion and purpose, we can shape a brighter tomorrow. Thank you, and let’s get to work!\n')]

In [18]:
splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=30)
doc = splitter.split_documents(document)
doc

[Document(metadata={'source': 'speech.txt'}, page_content='Good morning everyone, Thank you all for being here today. It’s an honor to stand before you and share a few thoughts about the future of technology and innovation. Over the past decade, we’ve witnessed remarkable changes. Artificial Intelligence is no longer a distant dream—it’s transforming our lives in real time. From personalized recommendations to autonomous vehicles, AI is everywhere. But with great power comes great responsibility. (Sí, como dijo el Tío Ben — "With great power comes great responsibility.") We must ensure that our innovations are ethical, inclusive, and sustainable. Let’s use technology not just to build smarter machines, but to create a better world for everyone. Together, with passion and purpose, we can shape a brighter tomorrow. Thank you, and let’s get to work!')]

In [23]:
embeddings = OllamaEmbeddings(model="gemma:2b")
vectordb = Chroma.from_documents(doc, embeddings)
vectordb

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


<langchain_chroma.vectorstores.Chroma at 0x721ee4029340>

In [24]:
## Query it
query = "What is the main topic of the speech?"
docs = vectordb.similarity_search(query)
docs[0].page_content

Failed to send telemetry event CollectionQueryEvent: capture() takes 1 positional argument but 3 were given


'Good morning everyone, Thank you all for being here today. It’s an honor to stand before you and share a few thoughts about the future of technology and innovation. Over the past decade, we’ve witnessed remarkable changes. Artificial Intelligence is no longer a distant dream—it’s transforming our lives in real time. From personalized recommendations to autonomous vehicles, AI is everywhere. But with great power comes great responsibility. (Sí, como dijo el Tío Ben — "With great power comes great responsibility.") We must ensure that our innovations are ethical, inclusive, and sustainable. Let’s use technology not just to build smarter machines, but to create a better world for everyone. Together, with passion and purpose, we can shape a brighter tomorrow. Thank you, and let’s get to work!'