In [1]:
# !uv pip install langchain faiss-cpu openai langchain_huggingface tiktoken pypdf langchain-community

In [2]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.schema import Document

# Hugging Face embeddings
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Create Langchain documents for IPL Players

doc1 = Document(
    page_content="Virat Kohli is a right-handed batsman and former captain of the Indian cricket team.",
    metadata={"team": "Royal Challengers Bangalore", "role": "Batsman"}
)

doc2 = Document (
    page_content="Jasprit Bumrah is a right-arm fast bowler known for his unique bowling action and yorkers.",
    metadata={"team": "Mumbai Indians", "role": "Bowler"}
)

doc3 = Document (
    page_content="MS Dhoni is a wicket-keeper batsman and former captain of the Indian cricket team.",
    metadata={"team": "Chennai Super Kings", "role": "Wicket-Keeper"}
)

doc4 = Document (
    page_content="Rohit Sharma is a right-handed batsman and the current captain of the Indian cricket team.",
    metadata={"team": "Mumbai Indians", "role": "Batsman"}
)

doc5 = Document (
    page_content="Hardik Pandya is an all-rounder who can contribute with both bat and ball.",
    metadata={"team": "Mumbai Indians", "role": "All-Rounder"}
)

In [4]:
# List of documents

docs = [doc1, doc2, doc3, doc4, doc5]

In [5]:
# Create FAISS DB using HuggingFace embeddings
vector_store = FAISS.from_documents(
    documents=docs,
    embedding=embeddings
)


  return forward_call(*args, **kwargs)


In [6]:
# Save FAISS index
vector_store.save_local("02_FAISS_db")

In [7]:
# Load FAISS index back
loaded_vector_store = FAISS.load_local("02_FAISS_db", embeddings, allow_dangerous_deserialization=True)
print(f"Number of documents in loaded FAISS DB: {loaded_vector_store.index.ntotal}")

Number of documents in loaded FAISS DB: 5


In [8]:
# Inspect all stored docs in FAISS
for doc_id, doc in vector_store.docstore._dict.items():
    print(f"ID: {doc_id}")
    print(f"Content: {doc.page_content}")
    print(f"Metadata: {doc.metadata}")
    print("-" * 50)

ID: a4457e49-4d31-46b5-8fdf-aaedcc2353c5
Content: Virat Kohli is a right-handed batsman and former captain of the Indian cricket team.
Metadata: {'team': 'Royal Challengers Bangalore', 'role': 'Batsman'}
--------------------------------------------------
ID: f7172fcc-6e3f-41f2-9806-fc2074b29e8f
Content: Jasprit Bumrah is a right-arm fast bowler known for his unique bowling action and yorkers.
Metadata: {'team': 'Mumbai Indians', 'role': 'Bowler'}
--------------------------------------------------
ID: ccfc415f-5288-4d71-9f9d-b080879eca92
Content: MS Dhoni is a wicket-keeper batsman and former captain of the Indian cricket team.
Metadata: {'team': 'Chennai Super Kings', 'role': 'Wicket-Keeper'}
--------------------------------------------------
ID: b741b752-021f-4420-b35b-e496b3b5e682
Content: Rohit Sharma is a right-handed batsman and the current captain of the Indian cricket team.
Metadata: {'team': 'Mumbai Indians', 'role': 'Batsman'}
--------------------------------------------------

In [9]:
# Search for similar documents
query = "Who among the players is a wicket-keeper?"

results = vector_store.similarity_search(query, k=2)  # k is the number of similar documents to retrieve
print(results)

[Document(id='86d9ce1d-4cd0-459d-8e4b-3fe3c354e2b4', metadata={'team': 'Mumbai Indians', 'role': 'All-Rounder'}, page_content='Hardik Pandya is an all-rounder who can contribute with both bat and ball.'), Document(id='ccfc415f-5288-4d71-9f9d-b080879eca92', metadata={'team': 'Chennai Super Kings', 'role': 'Wicket-Keeper'}, page_content='MS Dhoni is a wicket-keeper batsman and former captain of the Indian cricket team.')]


In [10]:
# search with similarity score

results_with_score = vector_store.similarity_search_with_score(query, k=2)
print(results_with_score)

[(Document(id='86d9ce1d-4cd0-459d-8e4b-3fe3c354e2b4', metadata={'team': 'Mumbai Indians', 'role': 'All-Rounder'}, page_content='Hardik Pandya is an all-rounder who can contribute with both bat and ball.'), np.float32(0.9861963)), (Document(id='ccfc415f-5288-4d71-9f9d-b080879eca92', metadata={'team': 'Chennai Super Kings', 'role': 'Wicket-Keeper'}, page_content='MS Dhoni is a wicket-keeper batsman and former captain of the Indian cricket team.'), np.float32(1.0380211))]


In [11]:
# meta data filter
filter_criteria = {"role": "Batsman"}

filtered_results = vector_store.similarity_search(
    # query, k=2, filter=filter_criteria
    query="", k=2, filter=filter_criteria
)
print(filtered_results)

[Document(id='b741b752-021f-4420-b35b-e496b3b5e682', metadata={'team': 'Mumbai Indians', 'role': 'Batsman'}, page_content='Rohit Sharma is a right-handed batsman and the current captain of the Indian cricket team.'), Document(id='a4457e49-4d31-46b5-8fdf-aaedcc2353c5', metadata={'team': 'Royal Challengers Bangalore', 'role': 'Batsman'}, page_content='Virat Kohli is a right-handed batsman and former captain of the Indian cricket team.')]


In [12]:
# update document 
updated_doc3 = Document (
    page_content="Mahendra Singh Dhoni is a wicket-keeper batsman and former captain of the Indian cricket team. He is known for his calm demeanor and finishing abilities." ,
    metadata={"team": "Chennai Super Kings", "role": "Wicket-Keeper"}
) 

vector_store.add_documents([updated_doc3], ids=["5b179085-f3e5-4148-be16-f622e6fc25ef"])  # Assuming doc3 has an ID of "3"

# 5b179085-f3e5-4148-be16-f622e6fc25ef

['5b179085-f3e5-4148-be16-f622e6fc25ef']

In [13]:
# View documents in the collection
# Inspect all stored docs in FAISS
for doc_id, doc in vector_store.docstore._dict.items():
    print(f"ID: {doc_id}")
    print(f"Content: {doc.page_content}")
    print(f"Metadata: {doc.metadata}")
    print("-" * 50)


ID: a4457e49-4d31-46b5-8fdf-aaedcc2353c5
Content: Virat Kohli is a right-handed batsman and former captain of the Indian cricket team.
Metadata: {'team': 'Royal Challengers Bangalore', 'role': 'Batsman'}
--------------------------------------------------
ID: f7172fcc-6e3f-41f2-9806-fc2074b29e8f
Content: Jasprit Bumrah is a right-arm fast bowler known for his unique bowling action and yorkers.
Metadata: {'team': 'Mumbai Indians', 'role': 'Bowler'}
--------------------------------------------------
ID: ccfc415f-5288-4d71-9f9d-b080879eca92
Content: MS Dhoni is a wicket-keeper batsman and former captain of the Indian cricket team.
Metadata: {'team': 'Chennai Super Kings', 'role': 'Wicket-Keeper'}
--------------------------------------------------
ID: b741b752-021f-4420-b35b-e496b3b5e682
Content: Rohit Sharma is a right-handed batsman and the current captain of the Indian cricket team.
Metadata: {'team': 'Mumbai Indians', 'role': 'Batsman'}
--------------------------------------------------

In [14]:
# Delete document
deleted_doc2 = vector_store.delete(ids=["5b179085-f3e5-4148-be16-f622e6fc25ef"])   # Replace "3" with your actual stored ID

In [15]:
# View documents in the collection after update and delete
# Inspect all stored docs in FAISS
for doc_id, doc in vector_store.docstore._dict.items():
    print(f"ID: {doc_id}")
    print(f"Content: {doc.page_content}")
    print(f"Metadata: {doc.metadata}")
    print("-" * 50)

ID: a4457e49-4d31-46b5-8fdf-aaedcc2353c5
Content: Virat Kohli is a right-handed batsman and former captain of the Indian cricket team.
Metadata: {'team': 'Royal Challengers Bangalore', 'role': 'Batsman'}
--------------------------------------------------
ID: f7172fcc-6e3f-41f2-9806-fc2074b29e8f
Content: Jasprit Bumrah is a right-arm fast bowler known for his unique bowling action and yorkers.
Metadata: {'team': 'Mumbai Indians', 'role': 'Bowler'}
--------------------------------------------------
ID: ccfc415f-5288-4d71-9f9d-b080879eca92
Content: MS Dhoni is a wicket-keeper batsman and former captain of the Indian cricket team.
Metadata: {'team': 'Chennai Super Kings', 'role': 'Wicket-Keeper'}
--------------------------------------------------
ID: b741b752-021f-4420-b35b-e496b3b5e682
Content: Rohit Sharma is a right-handed batsman and the current captain of the Indian cricket team.
Metadata: {'team': 'Mumbai Indians', 'role': 'Batsman'}
--------------------------------------------------