In [1]:
from langchain_huggingface import HuggingFaceEndpointEmbeddings
from dotenv import load_dotenv
from langchain_chroma import Chroma
from langchain_core.documents import Document

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()

True

In [3]:
embeddings = HuggingFaceEndpointEmbeddings(model="sentence-transformers/all-MiniLM-L6-v2")

In [4]:
doc1 = Document(page_content="Shahid Afridi is a former Pakistani cricketer and captain of the Pakistan national cricket team. He is a all-rounder.", metadata={"team":"Peshawar Zalmi"})

doc2 = Document(page_content="Shoaib Akhtar is a former Pakistani cricketer and captain of the Pakistan national cricket team. He is a fast bowler.", metadata={"team":"Karachi Kings"})

doc3 = Document(page_content="Shoaib Malik is a former Pakistani cricketer and captain of the Pakistan national cricket team. He is a all-rounder.", metadata={"team":"Multan Sultans"})

doc4 = Document(page_content="Sarfraz Ahmed is a former Pakistani cricketer and captain of the Pakistan national cricket team. He is a wicket-keeper batsman.", metadata={"team":"Quetta Gladiators"})

doc5 = Document(page_content="Babar Azam is a Pakistani cricketer and captain of the Pakistan national cricket team. He is a batsman.", metadata={"team":"Karachi Kings"})

docs = [doc1, doc2, doc3, doc4, doc5]

In [5]:
vector_store = Chroma(
    collection_name="cricket_players",
    embedding_function=embeddings
)

In [6]:
vector_store.add_documents(docs)

['067ab63f-42b4-4742-ab12-712ca232ed04',
 'd1940e12-74a3-4cc3-8706-0c8e31b40427',
 'af5a12f1-f6b1-4a87-bde2-41b2b25a5a3c',
 '741ce240-5717-4cc4-a579-bf136a007c19',
 'b961624e-4504-4aff-af48-4523b3157938']

In [7]:
vector_store.get()

{'ids': ['067ab63f-42b4-4742-ab12-712ca232ed04',
  'd1940e12-74a3-4cc3-8706-0c8e31b40427',
  'af5a12f1-f6b1-4a87-bde2-41b2b25a5a3c',
  '741ce240-5717-4cc4-a579-bf136a007c19',
  'b961624e-4504-4aff-af48-4523b3157938'],
 'embeddings': None,
 'documents': ['Shahid Afridi is a former Pakistani cricketer and captain of the Pakistan national cricket team. He is a all-rounder.',
  'Shoaib Akhtar is a former Pakistani cricketer and captain of the Pakistan national cricket team. He is a fast bowler.',
  'Shoaib Malik is a former Pakistani cricketer and captain of the Pakistan national cricket team. He is a all-rounder.',
  'Sarfraz Ahmed is a former Pakistani cricketer and captain of the Pakistan national cricket team. He is a wicket-keeper batsman.',
  'Babar Azam is a Pakistani cricketer and captain of the Pakistan national cricket team. He is a batsman.'],
 'uris': None,
 'included': ['metadatas', 'documents'],
 'data': None,
 'metadatas': [{'team': 'Peshawar Zalmi'},
  {'team': 'Karachi Kin

In [8]:
vector_store.similarity_search("Who is the all-rounder from Peshawer Zalmi?", k=1)

[Document(id='067ab63f-42b4-4742-ab12-712ca232ed04', metadata={'team': 'Peshawar Zalmi'}, page_content='Shahid Afridi is a former Pakistani cricketer and captain of the Pakistan national cricket team. He is a all-rounder.')]

In [9]:
vector_store.similarity_search_with_score(
     query="",
     filter={"team": "Peshawar Zalmi"},
)

[(Document(id='067ab63f-42b4-4742-ab12-712ca232ed04', metadata={'team': 'Peshawar Zalmi'}, page_content='Shahid Afridi is a former Pakistani cricketer and captain of the Pakistan national cricket team. He is a all-rounder.'),
  1.832358479499817)]

In [25]:
vector_store.get(where={"team": "Peshawar Zalmi"})['ids']

['067ab63f-42b4-4742-ab12-712ca232ed04']

In [27]:
vector_store.update_document(
    document_id='067ab63f-42b4-4742-ab12-712ca232ed04',
    document=Document(
        page_content="Shahid Afridi is a Bowler",
        metadata={"team": "Peshawar Zalmi"}
    )
)

In [28]:
vector_store.get()

{'ids': ['067ab63f-42b4-4742-ab12-712ca232ed04',
  'd1940e12-74a3-4cc3-8706-0c8e31b40427',
  'af5a12f1-f6b1-4a87-bde2-41b2b25a5a3c',
  '741ce240-5717-4cc4-a579-bf136a007c19',
  'b961624e-4504-4aff-af48-4523b3157938'],
 'embeddings': None,
 'documents': ['Shahid Afridi is a Bowler',
  'Shoaib Akhtar is a former Pakistani cricketer and captain of the Pakistan national cricket team. He is a fast bowler.',
  'Shoaib Malik is a former Pakistani cricketer and captain of the Pakistan national cricket team. He is a all-rounder.',
  'Sarfraz Ahmed is a former Pakistani cricketer and captain of the Pakistan national cricket team. He is a wicket-keeper batsman.',
  'Babar Azam is a Pakistani cricketer and captain of the Pakistan national cricket team. He is a batsman.'],
 'uris': None,
 'included': ['metadatas', 'documents'],
 'data': None,
 'metadatas': [{'team': 'Peshawar Zalmi'},
  {'team': 'Karachi Kings'},
  {'team': 'Multan Sultans'},
  {'team': 'Quetta Gladiators'},
  {'team': 'Karachi Kin

In [29]:
vector_store.get(where={"team": "Peshawar Zalmi"})

{'ids': ['067ab63f-42b4-4742-ab12-712ca232ed04'],
 'embeddings': None,
 'documents': ['Shahid Afridi is a Bowler'],
 'uris': None,
 'included': ['metadatas', 'documents'],
 'data': None,
 'metadatas': [{'team': 'Peshawar Zalmi'}]}

In [30]:
vector_store.delete(ids="067ab63f-42b4-4742-ab12-712ca232ed04")

In [31]:
vector_store.get()

{'ids': ['d1940e12-74a3-4cc3-8706-0c8e31b40427',
  'af5a12f1-f6b1-4a87-bde2-41b2b25a5a3c',
  '741ce240-5717-4cc4-a579-bf136a007c19',
  'b961624e-4504-4aff-af48-4523b3157938'],
 'embeddings': None,
 'documents': ['Shoaib Akhtar is a former Pakistani cricketer and captain of the Pakistan national cricket team. He is a fast bowler.',
  'Shoaib Malik is a former Pakistani cricketer and captain of the Pakistan national cricket team. He is a all-rounder.',
  'Sarfraz Ahmed is a former Pakistani cricketer and captain of the Pakistan national cricket team. He is a wicket-keeper batsman.',
  'Babar Azam is a Pakistani cricketer and captain of the Pakistan national cricket team. He is a batsman.'],
 'uris': None,
 'included': ['metadatas', 'documents'],
 'data': None,
 'metadatas': [{'team': 'Karachi Kings'},
  {'team': 'Multan Sultans'},
  {'team': 'Quetta Gladiators'},
  {'team': 'Karachi Kings'}]}