In [None]:
from langchain_community.vectorstores import Chroma
from langchain.schema import Document
from langchain_huggingface import HuggingFaceEmbeddings
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
doc1 = Document(
    page_content="Virat kohli is one of the most successful and consistent batsmen in the world. Known for his aggressive batting style and ability to chase down targets, he has numerous records to his name.",
    metadata={"team": "Royal Challengers Bangalore"} 
)

doc2 = Document(
    page_content="Rohit Sharma is the captain of the Indian cricket team and is known for his elegant batting style. He has a unique ability to play long innings and is one of the few players to score multiple double centuries in ODIs.",
    metadata={"team": "Mumbai Indians"}
)

doc3 = Document(
    page_content="MS Dhoni is a legendary cricketer and former captain of the Indian cricket team. He is known for his calm demeanor and exceptional finishing skills in limited-overs cricket.",
    metadata={"team": "Chennai Super Kings"}
)

doc4 = Document(
    page_content="Jasprit Bumrah is one of the best fast bowlers in the world. Known for his unique bowling action and ability to bowl yorkers at will, he has been a key player for India in all formats of the game.",
    metadata={"team": "Mumbai Indians"}
)

doc4 = Document(
    page_content="Hardik Pandya is an all-rounder known for his explosive batting and effective bowling. He has been a game-changer for India in limited-overs cricket.",
    metadata={"team": "Gujarat Titans"}
)

doc5 = Document(
    page_content="Ravindra Jadeja is one of the best all-rounders in the world. He is known for his exceptional fielding, left-arm spin bowling, and explosive batting.",
    metadata={"team": "Chennai Super Kings"}
)


In [4]:
docs = [doc1, doc2, doc3, doc4, doc5]

In [None]:
# Initialize the Chroma vector store
vector_store = Chroma(
    embedding_function=HuggingFaceEmbeddings(),  
    persist_directory="chroma_db",
    collection_name="cricket_players",
)

  from .autonotebook import tqdm as notebook_tqdm
  vector_store = Chroma(


In [None]:
# Add documents to the vector store
vector_store.add_documents(docs)

['3ffdac26-451a-4a45-8b94-4e720c3ec048',
 '29e52398-7acb-43c2-8cc5-a98f9208e97f',
 'ae1d57f7-f4aa-4f77-8040-1531560ef3f6',
 '1c2c67cd-effb-46af-855b-f9258fc3c18c',
 '03ac6920-38db-4a53-9ea2-2174536b5f14']

In [8]:
# View Documents
vector_store.get(include=["documents", "metadatas", "embeddings"])

{'ids': ['3ffdac26-451a-4a45-8b94-4e720c3ec048',
  '29e52398-7acb-43c2-8cc5-a98f9208e97f',
  'ae1d57f7-f4aa-4f77-8040-1531560ef3f6',
  '1c2c67cd-effb-46af-855b-f9258fc3c18c',
  '03ac6920-38db-4a53-9ea2-2174536b5f14'],
 'embeddings': array([[-0.03409765, -0.00386583, -0.00877156, ...,  0.0657433 ,
         -0.00708175, -0.03216153],
        [-0.03392847, -0.01585073, -0.01058057, ...,  0.01322433,
         -0.01715081, -0.03074991],
        [-0.04560737,  0.01741896, -0.00883403, ...,  0.04687298,
         -0.00216252,  0.00471343],
        [ 0.00753436, -0.05783515, -0.01420634, ...,  0.03162378,
         -0.03665972, -0.02414312],
        [ 0.01240405, -0.02023672,  0.00796079, ...,  0.02058575,
          0.01793223, -0.03247325]], shape=(5, 768)),
 'documents': ['Virat kohli is one of the most successful and consistent batsmen in the world. Known for his aggressive batting style and ability to chase down targets, he has numerous records to his name.',
  'Rohit Sharma is the captain o

In [None]:
# Get document by its ID
result = vector_store.get("03ac6920-38db-4a53-9ea2-2174536b5f14")
result

{'ids': ['03ac6920-38db-4a53-9ea2-2174536b5f14'],
 'embeddings': None,
 'documents': ['Ravindra Jadeja is one of the best all-rounders in the world. He is known for his exceptional fielding, left-arm spin bowling, and explosive batting.'],
 'uris': None,
 'included': ['metadatas', 'documents'],
 'data': None,
 'metadatas': [{'team': 'Chennai Super Kings'}]}

In [25]:
# Search documents from the similariy search

query = "Who is the best batsman?"
vector_store.similarity_search(query, k=1)

[Document(metadata={'team': 'Royal Challengers Bangalore'}, page_content='Virat kohli is one of the most successful and consistent batsmen in the world. Known for his aggressive batting style and ability to chase down targets, he has numerous records to his name.')]

In [None]:
# Similarity with its score
vector_store.similarity_search_with_score(query, k=5)

# The lesser the score the better the similarity

[(Document(metadata={'team': 'Royal Challengers Bangalore'}, page_content='Virat kohli is one of the most successful and consistent batsmen in the world. Known for his aggressive batting style and ability to chase down targets, he has numerous records to his name.'),
  0.8112376928329468),
 (Document(metadata={'team': 'Chennai Super Kings'}, page_content='Ravindra Jadeja is one of the best all-rounders in the world. He is known for his exceptional fielding, left-arm spin bowling, and explosive batting.'),
  0.8738961815834045),
 (Document(metadata={'team': 'Mumbai Indians'}, page_content='Rohit Sharma is the captain of the Indian cricket team and is known for his elegant batting style. He has a unique ability to play long innings and is one of the few players to score multiple double centuries in ODIs.'),
  0.9843034744262695),
 (Document(metadata={'team': 'Chennai Super Kings'}, page_content='MS Dhoni is a legendary cricketer and former captain of the Indian cricket team. He is known 

In [31]:
# metadata filtering
vector_store.similarity_search_with_score(
    query="",
    filter={"team": "Mumbai Indians"},
)

[(Document(metadata={'team': 'Mumbai Indians'}, page_content='Rohit Sharma is the captain of the Indian cricket team and is known for his elegant batting style. He has a unique ability to play long innings and is one of the few players to score multiple double centuries in ODIs.'),
  1.953268051147461)]

In [32]:
# Update document

updated_doc = Document(
    page_content="Virat kohli is from RCB team and yet being so much best in batting he is not able to win the IPL trophy for his team. He is one of the best batsman in the world.",
    metadata={"team": "Royal Challengers Bangalore"}
)

vector_store.update_document(document_id="3ffdac26-451a-4a45-8b94-4e720c3ec048" , document=updated_doc)

In [34]:
# see the updated document
vector_store.get(limit=1)

{'ids': ['3ffdac26-451a-4a45-8b94-4e720c3ec048'],
 'embeddings': None,
 'documents': ['Virat kohli is from RCB team and yet being so much best in batting he is not able to win the IPL trophy for his team. He is one of the best batsman in the world.'],
 'uris': None,
 'included': ['metadatas', 'documents'],
 'data': None,
 'metadatas': [{'team': 'Royal Challengers Bangalore'}]}

In [38]:
# Delete the document

vector_store.delete(ids=["3ffdac26-451a-4a45-8b94-4e720c3ec048"])

In [39]:
vector_store.get()

{'ids': ['29e52398-7acb-43c2-8cc5-a98f9208e97f',
  'ae1d57f7-f4aa-4f77-8040-1531560ef3f6',
  '1c2c67cd-effb-46af-855b-f9258fc3c18c',
  '03ac6920-38db-4a53-9ea2-2174536b5f14'],
 'embeddings': None,
 'documents': ['Rohit Sharma is the captain of the Indian cricket team and is known for his elegant batting style. He has a unique ability to play long innings and is one of the few players to score multiple double centuries in ODIs.',
  'MS Dhoni is a legendary cricketer and former captain of the Indian cricket team. He is known for his calm demeanor and exceptional finishing skills in limited-overs cricket.',
  'Hardik Pandya is an all-rounder known for his explosive batting and effective bowling. He has been a game-changer for India in limited-overs cricket.',
  'Ravindra Jadeja is one of the best all-rounders in the world. He is known for his exceptional fielding, left-arm spin bowling, and explosive batting.'],
 'uris': None,
 'included': ['metadatas', 'documents'],
 'data': None,
 'meta