In [3]:
# !uv pip install langchain chromadb openai langchain_huggingface tiktoken pypdf langchain-community

In [4]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.schema import Document

# Hugging Face embeddings
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

In [5]:
from langchain.schema import Document

# Create Langchain documents for IPL Players

doc1 = Document(
    page_content="Virat Kohli is a right-handed batsman and former captain of the Indian cricket team.",
    metadata={"team": "Royal Challengers Bangalore", "role": "Batsman"}
)

doc2 = Document (
    page_content="Jasprit Bumrah is a right-arm fast bowler known for his unique bowling action and yorkers.",
    metadata={"team": "Mumbai Indians", "role": "Bowler"}
)

doc3 = Document (
    page_content="MS Dhoni is a wicket-keeper batsman and former captain of the Indian cricket team.",
    metadata={"team": "Chennai Super Kings", "role": "Wicket-Keeper"}
)

doc4 = Document (
    page_content="Rohit Sharma is a right-handed batsman and the current captain of the Indian cricket team.",
    metadata={"team": "Mumbai Indians", "role": "Batsman"}
)

doc5 = Document (
    page_content="Hardik Pandya is an all-rounder who can contribute with both bat and ball.",
    metadata={"team": "Mumbai Indians", "role": "All-Rounder"}
)

In [6]:
# List of documents

docs = [doc1, doc2, doc3, doc4, doc5]

In [7]:
# Create Chroma DB using HuggingFace embeddings
vector_store = Chroma.from_documents(
    documents=docs,
    embedding=embeddings,
    persist_directory="01_chroma_db",
    collection_name="ipl_players"
)

# Persist the DB
# vector_store.persist()

  return forward_call(*args, **kwargs)


In [8]:
# Add documents to the vector store
# vector_store.add_documents(docs)

In [9]:
# View documents in the collection
vector_store.get(include=["embeddings", "metadatas", "documents"])

{'ids': ['b686756f-c3ff-46db-b07b-1e6415c81dce',
  '2e7c76a6-1d4c-4627-8277-853760e4e98c',
  'f701db84-ec28-4913-8f72-8af039f329c3',
  '6696ae04-63d7-4d9f-816d-3d142a5e47b0',
  '00186463-3a7b-4fa3-94ce-c42f937dbccb'],
 'embeddings': array([[ 0.01214669,  0.08446035, -0.03096429, ...,  0.06186824,
          0.03981312,  0.00301823],
        [ 0.01034031, -0.00808001, -0.04141308, ..., -0.07776487,
         -0.00688125,  0.11869267],
        [-0.03746631,  0.03963504,  0.04159963, ...,  0.01288937,
          0.02804302, -0.04915711],
        [ 0.00272104,  0.02640508, -0.01630465, ...,  0.05252214,
          0.00943196,  0.00743705],
        [ 0.04585668,  0.02439576, -0.08657685, ..., -0.00734251,
          0.00753077,  0.02940826]], shape=(5, 384)),
 'documents': ['Virat Kohli is a right-handed batsman and former captain of the Indian cricket team.',
  'Jasprit Bumrah is a right-arm fast bowler known for his unique bowling action and yorkers.',
  'MS Dhoni is a wicket-keeper batsman an

In [10]:
# Search for similar documents
query = "Who among the players is a wicket-keeper?"

results = vector_store.similarity_search(query, k=2)  # k is the number of similar documents to retrieve
print(results)

[Document(metadata={'team': 'Mumbai Indians', 'role': 'All-Rounder'}, page_content='Hardik Pandya is an all-rounder who can contribute with both bat and ball.'), Document(metadata={'team': 'Chennai Super Kings', 'role': 'Wicket-Keeper'}, page_content='MS Dhoni is a wicket-keeper batsman and former captain of the Indian cricket team.')]


In [11]:
# search with similarity score

results_with_score = vector_store.similarity_search_with_score(query, k=2)

In [12]:
# meta data filter
filter_criteria = {"role": "Batsman"}

filtered_results = vector_store.similarity_search(
    # query, k=2, filter=filter_criteria
    query="", k=2, filter=filter_criteria
)

In [13]:
# update document 
updated_doc3 = Document (
    page_content="Mahendra Singh Dhoni is a wicket-keeper batsman and former captain of the Indian cricket team. He is known for his calm demeanor and finishing abilities." ,
    metadata={"team": "Chennai Super Kings", "role": "Wicket-Keeper"}
) 

vector_store.add_documents([updated_doc3], ids=["5b179085-f3e5-4148-be16-f622e6fc25ef"])  # Assuming doc3 has an ID of "3"

# 5b179085-f3e5-4148-be16-f622e6fc25ef

['5b179085-f3e5-4148-be16-f622e6fc25ef']

In [14]:
# View documents in the collection
vector_store.get(include=["embeddings", "metadatas", "documents"])

{'ids': ['b686756f-c3ff-46db-b07b-1e6415c81dce',
  '2e7c76a6-1d4c-4627-8277-853760e4e98c',
  'f701db84-ec28-4913-8f72-8af039f329c3',
  '6696ae04-63d7-4d9f-816d-3d142a5e47b0',
  '00186463-3a7b-4fa3-94ce-c42f937dbccb',
  '5b179085-f3e5-4148-be16-f622e6fc25ef'],
 'embeddings': array([[ 1.21466946e-02,  8.44603479e-02, -3.09642926e-02, ...,
          6.18682392e-02,  3.98131236e-02,  3.01823393e-03],
        [ 1.03403069e-02, -8.08001403e-03, -4.14130762e-02, ...,
         -7.77648687e-02, -6.88124821e-03,  1.18692674e-01],
        [-3.74663100e-02,  3.96350436e-02,  4.15996313e-02, ...,
          1.28893703e-02,  2.80430224e-02, -4.91571091e-02],
        [ 2.72103539e-03,  2.64050793e-02, -1.63046494e-02, ...,
          5.25221378e-02,  9.43196192e-03,  7.43704569e-03],
        [ 4.58566770e-02,  2.43957564e-02, -8.65768492e-02, ...,
         -7.34250713e-03,  7.53076887e-03,  2.94082556e-02],
        [-2.09149960e-02,  5.46629541e-02,  9.73453789e-05, ...,
         -8.30865279e-03, -3.35

In [15]:
print(vector_store.get()["ids"])

['b686756f-c3ff-46db-b07b-1e6415c81dce', '2e7c76a6-1d4c-4627-8277-853760e4e98c', 'f701db84-ec28-4913-8f72-8af039f329c3', '6696ae04-63d7-4d9f-816d-3d142a5e47b0', '00186463-3a7b-4fa3-94ce-c42f937dbccb', '5b179085-f3e5-4148-be16-f622e6fc25ef']


In [19]:
# Delete document
deleted_doc2 = vector_store.delete(ids=["2e7c76a6-1d4c-4627-8277-853760e4e98c"])   # Replace "3" with your actual stored ID

In [20]:
# View documents in the collection after update and delete
vector_store.get(include=["embeddings", "metadatas", "documents"])

{'ids': ['b686756f-c3ff-46db-b07b-1e6415c81dce',
  'f701db84-ec28-4913-8f72-8af039f329c3',
  '6696ae04-63d7-4d9f-816d-3d142a5e47b0',
  '00186463-3a7b-4fa3-94ce-c42f937dbccb',
  '5b179085-f3e5-4148-be16-f622e6fc25ef'],
 'embeddings': array([[ 1.21466946e-02,  8.44603479e-02, -3.09642926e-02, ...,
          6.18682392e-02,  3.98131236e-02,  3.01823393e-03],
        [-3.74663100e-02,  3.96350436e-02,  4.15996313e-02, ...,
          1.28893703e-02,  2.80430224e-02, -4.91571091e-02],
        [ 2.72103539e-03,  2.64050793e-02, -1.63046494e-02, ...,
          5.25221378e-02,  9.43196192e-03,  7.43704569e-03],
        [ 4.58566770e-02,  2.43957564e-02, -8.65768492e-02, ...,
         -7.34250713e-03,  7.53076887e-03,  2.94082556e-02],
        [-2.09149960e-02,  5.46629541e-02,  9.73453789e-05, ...,
         -8.30865279e-03, -3.35441157e-03,  8.37296306e-04]],
       shape=(5, 384)),
 'documents': ['Virat Kohli is a right-handed batsman and former captain of the Indian cricket team.',
  'MS Dhon