In [22]:
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document
from langchain.vectorstores import Chroma

# Use a stable embedding model
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Documents
docs = [
    Document(
        page_content="Babar Azam is one of the most stylish and consistent batsmen in modern cricket.",
        metadata={"team": "Karachi Kings"}
    ),
    Document(
        page_content="Shan Masood is a solid top-order batsman.",
        metadata={"team": "Multan Sultans"}
    ),
    Document(
        page_content="Shoaib Malik, a veteran all-rounder.",
        metadata={"team": "Peshawar Zalmi"}
    ),
    Document(
        page_content="Shaheen Afridi is one of the premier fast bowlers.",
        metadata={"team": "Lahore Qalandars"}
    ),
    Document(
        page_content="Fakhar Zaman is a destructive opening batsman.",
        metadata={"team": "Islamabad United"}
    )
]


In [23]:
# Create vector store
vector_store = Chroma(
    embedding_function=embedding,
    collection_name="sample",
    persist_directory="chroma_db"
)

In [None]:
# Add documents
vector_store.add_documents(docs)

# Persist
vector_store.persist()



{'ids': ['0e6b2c9e-1198-4cf4-9914-8615da749749', 'a0bc017f-6fe2-4be2-9f16-e511a1e540d1', '303fcb43-0493-45a3-8e97-12af0f7c45f9', 'c61a0210-87c7-490b-8c8f-0db40c971113', 'bd44af84-546e-49b2-94c0-751ab30a0d62', '15a038e9-b30e-4aca-8b11-e4d7bc52cb54', '35bc92a5-70ad-4b50-9e01-c2340073ef8d', '480ec437-5506-4d08-83ec-1c5f24bacbfb', '6c1e3bba-9546-4ed8-8cf4-06e298acbd33', '71750c59-1b53-45cd-9f08-10c108edbbb1'], 'embeddings': None, 'documents': ['Babar Azam is one of the most stylish and consistent batsmen in modern cricket.', 'Shan Masood is a solid top-order batsman.', 'Shoaib Malik, a veteran all-rounder.', 'Shaheen Afridi is one of the premier fast bowlers.', 'Fakhar Zaman is a destructive opening batsman.', 'Babar Azam is one of the most stylish and consistent batsmen in modern cricket.', 'Shan Masood is a solid top-order batsman.', 'Shoaib Malik, a veteran all-rounder.', 'Shaheen Afridi is one of the premier fast bowlers.', 'Fakhar Zaman is a destructive opening batsman.'], 'uris': Non

In [None]:
# Check metadata and documents

print(vector_store.get(include=["metadatas", "documents"]))


{'ids': ['0e6b2c9e-1198-4cf4-9914-8615da749749', 'a0bc017f-6fe2-4be2-9f16-e511a1e540d1', '303fcb43-0493-45a3-8e97-12af0f7c45f9', 'c61a0210-87c7-490b-8c8f-0db40c971113', 'bd44af84-546e-49b2-94c0-751ab30a0d62', '15a038e9-b30e-4aca-8b11-e4d7bc52cb54', '35bc92a5-70ad-4b50-9e01-c2340073ef8d', '480ec437-5506-4d08-83ec-1c5f24bacbfb', '6c1e3bba-9546-4ed8-8cf4-06e298acbd33', '71750c59-1b53-45cd-9f08-10c108edbbb1'], 'embeddings': None, 'documents': ['Babar Azam is one of the most stylish and consistent batsmen in modern cricket.', 'Shan Masood is a solid top-order batsman.', 'Shoaib Malik, a veteran all-rounder.', 'Shaheen Afridi is one of the premier fast bowlers.', 'Fakhar Zaman is a destructive opening batsman.', 'Babar Azam is one of the most stylish and consistent batsmen in modern cricket.', 'Shan Masood is a solid top-order batsman.', 'Shoaib Malik, a veteran all-rounder.', 'Shaheen Afridi is one of the premier fast bowlers.', 'Fakhar Zaman is a destructive opening batsman.'], 'uris': Non

In [26]:
print(vector_store.get(include=["embeddings"]))


{'ids': ['0e6b2c9e-1198-4cf4-9914-8615da749749', 'a0bc017f-6fe2-4be2-9f16-e511a1e540d1', '303fcb43-0493-45a3-8e97-12af0f7c45f9', 'c61a0210-87c7-490b-8c8f-0db40c971113', 'bd44af84-546e-49b2-94c0-751ab30a0d62', '15a038e9-b30e-4aca-8b11-e4d7bc52cb54', '35bc92a5-70ad-4b50-9e01-c2340073ef8d', '480ec437-5506-4d08-83ec-1c5f24bacbfb', '6c1e3bba-9546-4ed8-8cf4-06e298acbd33', '71750c59-1b53-45cd-9f08-10c108edbbb1'], 'embeddings': array([[ 0.01091086,  0.03126683, -0.14648952, ..., -0.07961893,
        -0.00374227, -0.01499702],
       [ 0.04164712, -0.00928451, -0.00303885, ..., -0.06843381,
         0.05586556,  0.00038842],
       [-0.03041375,  0.03073549, -0.07586268, ..., -0.04603247,
         0.02403524,  0.00690805],
       ...,
       [-0.03041375,  0.03073549, -0.07586268, ..., -0.04603247,
         0.02403524,  0.00690805],
       [-0.00469202,  0.01975277, -0.06494538, ..., -0.05647345,
         0.07361937, -0.02279896],
       [ 0.02229059,  0.0785325 , -0.06662194, ..., -0.07336239,

In [28]:
## similarity search 
vector_store.similarity_search(
    query="who is shahid afridi",
    k=2
)

[Document(metadata={'team': 'Lahore Qalandars'}, page_content='Shaheen Afridi is one of the premier fast bowlers.'),
 Document(metadata={'team': 'Lahore Qalandars'}, page_content='Shaheen Afridi is one of the premier fast bowlers.')]

In [None]:
vector_store.similarity_search_with_score(   ## less score means small distance snd better response
    query="who is shahid afridi",
    k=2)

[(Document(metadata={'team': 'Lahore Qalandars'}, page_content='Shaheen Afridi is one of the premier fast bowlers.'),
  0.521966814994812),
 (Document(metadata={'team': 'Lahore Qalandars'}, page_content='Shaheen Afridi is one of the premier fast bowlers.'),
  0.521966814994812)]

In [34]:

## searching based on metadata
vector_store.similarity_search_with_score(   ## less score means small distance snd better response
    query="",
    filter={"team":"Peshawar Zalmi"},
    k=2
    )

[(Document(metadata={'team': 'Peshawar Zalmi'}, page_content='Shoaib Malik, a veteran all-rounder.'),
  1.6533385515213013),
 (Document(metadata={'team': 'Peshawar Zalmi'}, page_content='Shoaib Malik, a veteran all-rounder.'),
  1.6533385515213013)]

In [None]:
## update document
update_shan_masood=Document(
    page_content="""A solid and technically sound left-hand opener, 
    Shan Masood made it to the international side after impressing consistently at junior-level cricket. With Pakistan struggling to put together a solid opening combination, 
    Masood was the option they turned to in the Test series against South Africa in the UAE.""",
        metadata={"team": "Multan Sultans"}
)
vector_store.update_document(
    document_id="a0bc017f-6fe2-4be2-9f16-e511a1e540d1",
    document=update_shan_masood
)

AttributeError: 'str' object has no attribute 'page_content'