In [21]:
from langchain_cohere import CohereEmbeddings
from langchain_chroma import Chroma
from langchain_core.documents import Document
from dotenv import load_dotenv

In [7]:
doc1 = Document(
    page_content="Virat Kohli is one of the most successful and consistent batsmen in IPL history. Known for his aggressive batting style and leadership.",
    metadata={"team": "Royal Challengers Bangalore"}
)

doc2 = Document(
    page_content="Rohit Sharma is the most successful captain in IPL history, leading Mumbai Indians to five titles. He's known for his calm captaincy and explosive batting.",
    metadata={"team": "Mumbai Indians"}
)

doc3 = Document(
    page_content="MS Dhoni, also known as Captain Cool, has led Chennai Super Kings to multiple IPL victories. Famous for his finishing ability and strategic mind.",
    metadata={"team": "Chennai Super Kings"}
)

doc4 = Document(
    page_content="Jasprit Bumrah is one of the best T20 fast bowlers. Playing for Mumbai Indians, he is known for yorkers, pace variations, and match-winning spells.",
    metadata={"team": "Mumbai Indians"}
)

doc5 = Document(
    page_content="Rashid Khan is an exceptional leg-spinner who plays for Gujarat Titans. His quick arm action, googlies, and consistency make him a top T20 bowler.",
    metadata={"team": "Gujarat Titans"}
)

docs = [doc1, doc2, doc3, doc4, doc5]


In [8]:
load_dotenv()
vector_store=Chroma(
    embedding_function=CohereEmbeddings(model="embed-english-v3.0"),
    persist_directory="chroma_db",
    collection_name="sample"
)

In [None]:
vector_store.add_documents(docs)  #add the document to vector store

['d8cb3f2b-530c-4cf9-9e88-3da07d33963a',
 '059bc423-2dac-4fb4-b8bb-a84089d9da62',
 '3bafdbfb-4d7c-4634-95ea-fff404cf8e33',
 '35b560ed-aab3-4b7e-8ea8-51bf79785101',
 '6a12d6bc-fdc4-42cb-992e-936cc223c083']

In [None]:
vector_store.get(include=["metadatas","embeddings","documents"])    #to see the content of vector store

{'ids': ['d8cb3f2b-530c-4cf9-9e88-3da07d33963a',
  '059bc423-2dac-4fb4-b8bb-a84089d9da62',
  '3bafdbfb-4d7c-4634-95ea-fff404cf8e33',
  '35b560ed-aab3-4b7e-8ea8-51bf79785101',
  '6a12d6bc-fdc4-42cb-992e-936cc223c083'],
 'embeddings': array([[ 0.01948547,  0.00149822, -0.01165771, ...,  0.06109619,
          0.04293823,  0.04391479],
        [ 0.01190186,  0.00403976,  0.01571655, ...,  0.11126709,
          0.05737305,  0.04299927],
        [-0.00980377,  0.01152802, -0.01185608, ...,  0.10369873,
         -0.00835419,  0.01618958],
        [ 0.00119591,  0.00785828, -0.00401306, ...,  0.04772949,
          0.00847626,  0.01438141],
        [ 0.02464294,  0.01230621, -0.01789856, ...,  0.06970215,
          0.04608154,  0.0296936 ]], shape=(5, 1024)),
 'documents': ['Virat Kohli is one of the most successful and consistent batsmen in IPL history. Known for his aggressive batting style and leadership.',
  "Rohit Sharma is the most successful captain in IPL history, leading Mumbai Indians

In [None]:
#now lets do searching 
vector_store.similarity_search(
    query="Who among these are bowler?",
    k=2  #no of similar objects you wanna show "now it will show top 2 similar objects"
)

[Document(metadata={'team': 'Mumbai Indians'}, page_content='Jasprit Bumrah is one of the best T20 fast bowlers. Playing for Mumbai Indians, he is known for yorkers, pace variations, and match-winning spells.'),
 Document(metadata={'team': 'Gujarat Titans'}, page_content='Rashid Khan is an exceptional leg-spinner who plays for Gujarat Titans. His quick arm action, googlies, and consistency make him a top T20 bowler.')]

In [None]:
#now with searching lets see what is the similarity score
vector_store.similarity_search_with_score(
    query="Who among these are bowler?",
    k=2
) 
#the less the score ,the better it is
#as it uses distance matrix so the least distant object is more similar one

[(Document(metadata={'team': 'Mumbai Indians'}, page_content='Jasprit Bumrah is one of the best T20 fast bowlers. Playing for Mumbai Indians, he is known for yorkers, pace variations, and match-winning spells.'),
  1.0483856201171875),
 (Document(metadata={'team': 'Gujarat Titans'}, page_content='Rashid Khan is an exceptional leg-spinner who plays for Gujarat Titans. His quick arm action, googlies, and consistency make him a top T20 bowler.'),
  1.141374111175537)]

In [None]:
#metadata filtering
vector_store.similarity_search_with_score(
    query="",
    filter={"team":"Mumbai Indians"}
)
#we can filter the data based on the info available in meta data

[(Document(metadata={'team': 'Mumbai Indians'}, page_content='Jasprit Bumrah is one of the best T20 fast bowlers. Playing for Mumbai Indians, he is known for yorkers, pace variations, and match-winning spells.'),
  1.5712981224060059),
 (Document(metadata={'team': 'Mumbai Indians'}, page_content="Rohit Sharma is the most successful captain in IPL history, leading Mumbai Indians to five titles. He's known for his calm captaincy and explosive batting."),
  1.6565053462982178)]

In [17]:
#update the document in vector store
updated_doc1 = Document(
    page_content=(
        "Virat Kohli, the former captain of Royal Challengers Bangalore (RCB), "
        "is renowned for his aggressive leadership, fitness, and consistency as one of the greatest IPL batsmen."
    ),
    metadata={"team": "Royal Challengers Bangalore"}
)
vector_store.update_document(document_id="d8cb3f2b-530c-4cf9-9e88-3da07d33963a",document=updated_doc1)

In [None]:
vector_store.get(include=["metadatas","embeddings","documents"])

{'ids': ['d8cb3f2b-530c-4cf9-9e88-3da07d33963a',
  '059bc423-2dac-4fb4-b8bb-a84089d9da62',
  '3bafdbfb-4d7c-4634-95ea-fff404cf8e33',
  '35b560ed-aab3-4b7e-8ea8-51bf79785101',
  '6a12d6bc-fdc4-42cb-992e-936cc223c083'],
 'embeddings': array([[-0.00345802, -0.02952576,  0.01864624, ...,  0.09442139,
          0.04013062,  0.05505371],
        [ 0.01190186,  0.00403976,  0.01571655, ...,  0.11126709,
          0.05737305,  0.04299927],
        [-0.00980377,  0.01152802, -0.01185608, ...,  0.10369873,
         -0.00835419,  0.01618958],
        [ 0.00119591,  0.00785828, -0.00401306, ...,  0.04772949,
          0.00847626,  0.01438141],
        [ 0.02464294,  0.01230621, -0.01789856, ...,  0.06970215,
          0.04608154,  0.0296936 ]], shape=(5, 1024)),
 'documents': ['Virat Kohli, the former captain of Royal Challengers Bangalore (RCB), is renowned for his aggressive leadership, fitness, and consistency as one of the greatest IPL batsmen.',
  "Rohit Sharma is the most successful captain 

In [19]:
#delete document
vector_store.delete(ids=["d8cb3f2b-530c-4cf9-9e88-3da07d33963a"])

In [20]:
vector_store.get(include=["metadatas","embeddings","documents"])

{'ids': ['059bc423-2dac-4fb4-b8bb-a84089d9da62',
  '3bafdbfb-4d7c-4634-95ea-fff404cf8e33',
  '35b560ed-aab3-4b7e-8ea8-51bf79785101',
  '6a12d6bc-fdc4-42cb-992e-936cc223c083'],
 'embeddings': array([[ 0.01190186,  0.00403976,  0.01571655, ...,  0.11126709,
          0.05737305,  0.04299927],
        [-0.00980377,  0.01152802, -0.01185608, ...,  0.10369873,
         -0.00835419,  0.01618958],
        [ 0.00119591,  0.00785828, -0.00401306, ...,  0.04772949,
          0.00847626,  0.01438141],
        [ 0.02464294,  0.01230621, -0.01789856, ...,  0.06970215,
          0.04608154,  0.0296936 ]], shape=(4, 1024)),
 'documents': ["Rohit Sharma is the most successful captain in IPL history, leading Mumbai Indians to five titles. He's known for his calm captaincy and explosive batting.",
  'MS Dhoni, also known as Captain Cool, has led Chennai Super Kings to multiple IPL victories. Famous for his finishing ability and strategic mind.',
  'Jasprit Bumrah is one of the best T20 fast bowlers. Pla