In [1]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.schema import Document
from dotenv import load_dotenv

load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm
None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


True

In [2]:
documents = [
    Document(
        page_content="Virat Kohli is a former captain of the Indian cricket team and one of the greatest modern batsmen. He is known for his aggressive leadership and consistency across formats.",
        metadata={"name": "Virat Kohli", "role": "Batsman", "era": "Modern", "format": "All"}
    ),
    Document(
        page_content="Sachin Tendulkar, also known as the Little Master, is a legendary Indian cricketer with 100 international centuries. He played international cricket for over 24 years.",
        metadata={"name": "Sachin Tendulkar", "role": "Batsman", "era": "Classic", "format": "All"}
    ),
    Document(
        page_content="MS Dhoni is a former Indian captain famous for his calm demeanor and finishing ability. He led India to victories in the 2007 T20 World Cup, 2011 ODI World Cup, and 2013 Champions Trophy.",
        metadata={"name": "MS Dhoni", "role": "Wicketkeeper-Batsman", "era": "Modern", "format": "Limited Overs"}
    ),
    Document(
        page_content="Rohit Sharma is the current Indian captain in limited-overs formats. He holds the record for the highest individual score in ODIs with 264 runs.",
        metadata={"name": "Rohit Sharma", "role": "Batsman", "era": "Modern", "format": "ODI/T20"}
    ),
    Document(
        page_content="Jasprit Bumrah is an Indian fast bowler known for his unorthodox action and deadly yorkers. He is a key player across all formats, especially in death overs.",
        metadata={"name": "Jasprit Bumrah", "role": "Bowler", "era": "Modern", "format": "All"}
    )
]

In [3]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")
vector_store = Chroma.from_documents(documents=documents, embedding=embeddings, collection_name="cricket_players", persist_directory="./chroma_db")

In [4]:
retriever = vector_store.as_retriever(search_kwargs={"k": 2})
query = "Who is the highest run scorer?"
results = retriever.invoke(query)

In [8]:
for i, result in enumerate(results):
    # doc = result[0]
    # score = result[1]
    print(f"Result {i+1}:")
    print(f"Content: {result.page_content}")
    print(f"Metadata: {result.metadata}")
    # print(f"Score: {result.score}")
    print("-" * 50)

Result 1:
Content: Rohit Sharma is the current Indian captain in limited-overs formats. He holds the record for the highest individual score in ODIs with 264 runs.
Metadata: {'era': 'Modern', 'name': 'Rohit Sharma', 'role': 'Batsman', 'format': 'ODI/T20'}
--------------------------------------------------
Result 2:
Content: Rohit Sharma is the current Indian captain in limited-overs formats. He holds the record for the highest individual score in ODIs with 264 runs.
Metadata: {'role': 'Batsman', 'era': 'Modern', 'name': 'Rohit Sharma', 'format': 'ODI/T20'}
--------------------------------------------------


In [22]:
vector_store.similarity_search_with_score("Who is the highest run scorer?")

[(Document(metadata={'role': 'Batsman', 'era': 'Modern', 'format': 'ODI/T20', 'name': 'Rohit Sharma'}, page_content='Rohit Sharma is the current Indian captain in limited-overs formats. He holds the record for the highest individual score in ODIs with 264 runs.'),
  0.6388497352600098),
 (Document(metadata={'format': 'All', 'name': 'Virat Kohli', 'era': 'Modern', 'role': 'Batsman'}, page_content='Virat Kohli is a former captain of the Indian cricket team and one of the greatest modern batsmen. He is known for his aggressive leadership and consistency across formats.'),
  0.6724350452423096),
 (Document(metadata={'format': 'All', 'era': 'Classic', 'role': 'Batsman', 'name': 'Sachin Tendulkar'}, page_content='Sachin Tendulkar, also known as the Little Master, is a legendary Indian cricketer with 100 international centuries. He played international cricket for over 24 years.'),
  0.6947336196899414),
 (Document(metadata={'format': 'Limited Overs', 'name': 'MS Dhoni', 'era': 'Modern', 'rol

In [27]:
vector_store.similarity_search(query="Best batsman", filter={"role": "Batsman"})

[Document(metadata={'role': 'Batsman', 'era': 'Modern', 'name': 'Virat Kohli', 'format': 'All'}, page_content='Virat Kohli is a former captain of the Indian cricket team and one of the greatest modern batsmen. He is known for his aggressive leadership and consistency across formats.'),
 Document(metadata={'format': 'All', 'era': 'Classic', 'name': 'Sachin Tendulkar', 'role': 'Batsman'}, page_content='Sachin Tendulkar, also known as the Little Master, is a legendary Indian cricketer with 100 international centuries. He played international cricket for over 24 years.'),
 Document(metadata={'era': 'Modern', 'format': 'ODI/T20', 'name': 'Rohit Sharma', 'role': 'Batsman'}, page_content='Rohit Sharma is the current Indian captain in limited-overs formats. He holds the record for the highest individual score in ODIs with 264 runs.')]