In [4]:
from dotenv import load_dotenv
import faiss
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore

In [5]:
load_dotenv(dotenv_path="/Users/rhy/Library/CloudStorage/OneDrive-ArizonaStateUniversity/SelfStudy/Agentic & Gen AI with Cloud/.env")

True

In [6]:
from langchain_community.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
index = faiss.IndexFlatL2(384)

In [8]:
index

<faiss.swigfaiss.IndexFlatL2; proxy of <Swig Object of type 'faiss::IndexFlatL2 *' at 0x13e73f7b0> >

In [None]:
vector_score = FAISS(
    embedding_function = embeddings,
    index = index,
    docstore = InMemoryDocstore(),
    index_to_docstore_id = {})

In [10]:
vector_score.add_texts(["Ai is the future", "AI is the present", "I like Dogs"])

['f174b730-3f94-4478-87b4-45c1b85c84e1',
 '0743f914-fd7b-4e2b-b768-595439b246d9',
 'b22f8485-542f-4594-939b-0ab7b9b7da6e']

In [11]:
vector_score.index_to_docstore_id

{0: 'f174b730-3f94-4478-87b4-45c1b85c84e1',
 1: '0743f914-fd7b-4e2b-b768-595439b246d9',
 2: 'b22f8485-542f-4594-939b-0ab7b9b7da6e'}

In [13]:
results = vector_score.similarity_search("Tell me about AI", k=2)
results

[Document(id='f174b730-3f94-4478-87b4-45c1b85c84e1', metadata={}, page_content='Ai is the future'),
 Document(id='0743f914-fd7b-4e2b-b768-595439b246d9', metadata={}, page_content='AI is the present')]

In [14]:
from langchain_core.documents import Document

document_1 = Document(page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.", metadata={"source": "tweet"})

document_2 = Document(page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.", metadata={"source": "news"})

document_3 = Document(page_content="Building an exciting new project with LangChain - come check it out!", metadata={"source": "tweet"})

document_4 = Document(page_content="Robbers broke into the city bank and stole $1M in cash", metadata={"source": "news"})

document_5 = Document(page_content="wow! That was an amazing movie. I can't wait to see it again.",metadata={"source": "tweet"})

document_6 = Document(page_content="Is the new iPhone worth the price? Read this review to find out.", metadata={"source":"website"})

document_7 = Document(page_content="The top 10 soccer players in the world right now.", metadata={"source": "website"})

document_8 = Document(page_content="LangGraph is the best framework for building stateful, agentic applications!", metadata={"source": "tweet"})

document_9 = Document(page_content="The stock market is down 500 points today due to fears of a recession.",metadata={"source": "news"})

document_10 = Document(page_content="I have a bad feeling I am going to get deleted :(", metadata={"source": "tweet"})

documents = [document_1, document_2, document_3, document_4, document_5, document_6, document_7, document_8, document_9, document_10]

In [15]:
index = faiss.IndexFlatIP(384)

vector_score = FAISS(
    embedding_function = embeddings,
    index = index,
    docstore = InMemoryDocstore(),
    index_to_docstore_id = {})

In [16]:
vector_score.add_documents(documents)

['a963ef06-87d7-4ad0-9f9c-5e20cd50f338',
 '87b5ed7e-0889-4928-8b1b-94891d06223d',
 '291d83de-1787-4860-b3bc-a07685eb3d21',
 '3b9d1be3-7b4f-4cf2-9498-e4946859b9fb',
 'bb8d2e45-c391-49de-8c26-e519e1fe58f3',
 '885b8a68-251b-4344-929b-420453197feb',
 'ae13e59f-a0a6-4be0-9c39-e43dd0a6c75c',
 'c4685234-e565-425a-a8ae-9fb83bf24b74',
 '32437050-e2bb-4448-8a65-83a2f2d30776',
 '25432dae-88eb-4461-967a-3f9167a707b4']

In [None]:
vector_score.similarity_search("LangChain provides abstractions to make working with LLMs easy")

[Document(id='291d83de-1787-4860-b3bc-a07685eb3d21', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(id='c4685234-e565-425a-a8ae-9fb83bf24b74', metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic applications!'),
 Document(id='3b9d1be3-7b4f-4cf2-9498-e4946859b9fb', metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1M in cash'),
 Document(id='25432dae-88eb-4461-967a-3f9167a707b4', metadata={'source': 'tweet'}, page_content='I have a bad feeling I am going to get deleted :(')]

In [19]:
vector_score.similarity_search("LangChain provides abstractions to make working with LLMs easy", k=2)

[Document(id='291d83de-1787-4860-b3bc-a07685eb3d21', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(id='c4685234-e565-425a-a8ae-9fb83bf24b74', metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic applications!')]

In [20]:
vector_score.similarity_search("LangChain provides abstractions to make working with LLMs easy", filter={"source": "tweet"})

[Document(id='291d83de-1787-4860-b3bc-a07685eb3d21', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(id='c4685234-e565-425a-a8ae-9fb83bf24b74', metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic applications!'),
 Document(id='25432dae-88eb-4461-967a-3f9167a707b4', metadata={'source': 'tweet'}, page_content='I have a bad feeling I am going to get deleted :('),
 Document(id='a963ef06-87d7-4ad0-9f9c-5e20cd50f338', metadata={'source': 'tweet'}, page_content='I had chocolate chip pancakes and scrambled eggs for breakfast this morning.')]

In [26]:
result = vector_score.similarity_search("LangChain provides abstractions to make working with LLMs easy", k=2, filter={"source": "tweet"})
result

[Document(id='291d83de-1787-4860-b3bc-a07685eb3d21', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(id='c4685234-e565-425a-a8ae-9fb83bf24b74', metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic applications!')]

In [24]:
result[0].metadata

{'source': 'tweet'}

In [25]:
result[0].page_content

'Building an exciting new project with LangChain - come check it out!'