In [None]:
# Import necessary libraries and modules
from langchain_openai import OpenAIEmbeddings
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()  

In [4]:
from langchain.schema import Document

# Create LangChain documents for IPL players
doc1 = Document(
    page_content="Virat Kohli is one of the most successful and consistent batsmen in IPL history.Known for his aggressive batting style and exceptional run-scoring ability, he has been the backbone of Royal Challengers Bangalore for many seasons.",
    metadata={"team": "Royal Challengers Bangalore"}
)

doc2 = Document(
    page_content="Rohit Sharma is the most successful captain in IPL history, leading Mumbai Indians to five titles. He's known for his calm demeanor, explosive batting, and brilliant leadership on the field.",
    metadata={"team": "Mumbai Indians"}
)

doc3 = Document(
    page_content="MS Dhoni, famously known as Captain Cool, has led Chennai Super Kings to multiple IPL titles.His finishing skills, wicket-keeping excellence, and leadership qualities make him one of the greatest players in IPL history.",
    metadata={"team": "Chennai Super Kings"}
)

doc4 = Document(
    page_content="Jasprit Bumrah is considered one of the best fast bowlers in T20 cricket. Playing for Mumbai Indians, he is known for his deadly yorkers, consistency, and ability to perform under pressure.",
    metadata={"team": "Mumbai Indians"}
)

doc5 = Document(
    page_content="Ravindra Jadeja is a dynamic all-rounder who contributes with both bat and ball. Representing Chennai Super Kings, his aggressive batting, accurate left-arm spin, and sharp fielding have made him a match-winner on many occasions.",
    metadata={"team": "Chennai Super Kings"}
)


In [5]:
docs = [doc1, doc2, doc3, doc4, doc5]

In [10]:
# Create a vector store using Chroma and OpenAI embeddings
vector_store = Chroma(
    embedding_function=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2"),
    persist_directory="my_chroma_db",
    collection_name="sample"
)

In [11]:
# Add documents to the vector store
vector_store.add_documents(docs)

['de8123b0-c166-43e0-ad17-3e90316684ae',
 'fe876535-1b3e-422d-978f-c0293be11db8',
 '00ee6392-b7f9-4e05-9d77-f0962cfd4b7f',
 '802354cb-d6c1-4782-b39b-ad23913ac35c',
 'd3e3d362-b744-4e05-bf6a-62933a6d7800']

In [14]:
# Check the embedding from the vector store
vector_store.get(include=["embeddings","documents", "metadatas"])

{'ids': ['de8123b0-c166-43e0-ad17-3e90316684ae',
  'fe876535-1b3e-422d-978f-c0293be11db8',
  '00ee6392-b7f9-4e05-9d77-f0962cfd4b7f',
  '802354cb-d6c1-4782-b39b-ad23913ac35c',
  'd3e3d362-b744-4e05-bf6a-62933a6d7800'],
 'embeddings': array([[-0.00168742,  0.06090459, -0.05350135, ..., -0.02980726,
          0.02090832,  0.00157771],
        [-0.00630059,  0.02149991, -0.02465472, ..., -0.00917051,
         -0.02858398,  0.02728118],
        [-0.09095022,  0.03579462,  0.03354594, ..., -0.02800354,
         -0.07701195, -0.01171459],
        [ 0.01451916, -0.03316108, -0.03911339, ..., -0.10427225,
          0.00339566,  0.09204794],
        [ 0.00241315,  0.0432018 , -0.04259784, ..., -0.07994307,
         -0.06304957,  0.01494549]], shape=(5, 384)),
 'documents': ['Virat Kohli is one of the most successful and consistent batsmen in IPL history.Known for his aggressive batting style and exceptional run-scoring ability, he has been the backbone of Royal Challengers Bangalore for many sea

In [17]:
# Get similar documents based on a query
query = "Who is the best captain in IPL?"
vector_store.similarity_search(
    query=query,
    k=1
)

[Document(metadata={'team': 'Mumbai Indians'}, page_content="Rohit Sharma is the most successful captain in IPL history, leading Mumbai Indians to five titles. He's known for his calm demeanor, explosive batting, and brilliant leadership on the field.")]

In [20]:
# Get similarity search with scores
query = "Who is the best captain in IPL?"
vector_store.similarity_search_with_score(
    query=query,
    k=1
)

[(Document(metadata={'team': 'Mumbai Indians'}, page_content="Rohit Sharma is the most successful captain in IPL history, leading Mumbai Indians to five titles. He's known for his calm demeanor, explosive batting, and brilliant leadership on the field."),
  0.48918014764785767)]

In [21]:
# Meta data filtering
vector_store.similarity_search(
    filter={"team": "Mumbai Indians"},
    query="Who is the best captain in IPL?",
    k=1
)

[Document(metadata={'team': 'Mumbai Indians'}, page_content="Rohit Sharma is the most successful captain in IPL history, leading Mumbai Indians to five titles. He's known for his calm demeanor, explosive batting, and brilliant leadership on the field.")]

In [24]:
updated_doc1 = Document(
    page_content="Virat Kohli, often called the 'Run Machine', is one of the greatest batsmen in modern cricket.He has consistently dominated all formats with his exceptional batting technique, fitness, and determination. As the former captain of the Indian cricket team, he has numerous records to his name, including being one of the fastest players to reach 8000, 9000, and 10000 ODI runs.",
    metadata={"team": "Royal Challengers Bangalore"}
)

In [27]:
# Update a document in the vector store
vector_store.update_document(document_id="de8123b0-c166-43e0-ad17-3e90316684ae", document=updated_doc1)

In [28]:
# Check the embedding from the vector store
vector_store.get(include=["embeddings","documents", "metadatas"])

{'ids': ['de8123b0-c166-43e0-ad17-3e90316684ae',
  'fe876535-1b3e-422d-978f-c0293be11db8',
  '00ee6392-b7f9-4e05-9d77-f0962cfd4b7f',
  '802354cb-d6c1-4782-b39b-ad23913ac35c',
  'd3e3d362-b744-4e05-bf6a-62933a6d7800'],
 'embeddings': array([[ 0.04452872,  0.08289852, -0.08427917, ..., -0.06355004,
          0.05513578, -0.01004694],
        [-0.00630059,  0.02149991, -0.02465472, ..., -0.00917051,
         -0.02858398,  0.02728118],
        [-0.09095022,  0.03579462,  0.03354594, ..., -0.02800354,
         -0.07701195, -0.01171459],
        [ 0.01451916, -0.03316108, -0.03911339, ..., -0.10427225,
          0.00339566,  0.09204794],
        [ 0.00241315,  0.0432018 , -0.04259784, ..., -0.07994307,
         -0.06304957,  0.01494549]], shape=(5, 384)),
 'documents': ["Virat Kohli, often called the 'Run Machine', is one of the greatest batsmen in modern cricket.He has consistently dominated all formats with his exceptional batting technique, fitness, and determination. As the former captai

In [31]:
# Delete a document from the vector store
vector_store.delete(ids="de8123b0-c166-43e0-ad17-3e90316684ae")

In [32]:
# Check the embedding from the vector store
vector_store.get(include=["embeddings","documents", "metadatas"])

{'ids': ['fe876535-1b3e-422d-978f-c0293be11db8',
  '00ee6392-b7f9-4e05-9d77-f0962cfd4b7f',
  '802354cb-d6c1-4782-b39b-ad23913ac35c',
  'd3e3d362-b744-4e05-bf6a-62933a6d7800'],
 'embeddings': array([[-0.00630059,  0.02149991, -0.02465472, ..., -0.00917051,
         -0.02858398,  0.02728118],
        [-0.09095022,  0.03579462,  0.03354594, ..., -0.02800354,
         -0.07701195, -0.01171459],
        [ 0.01451916, -0.03316108, -0.03911339, ..., -0.10427225,
          0.00339566,  0.09204794],
        [ 0.00241315,  0.0432018 , -0.04259784, ..., -0.07994307,
         -0.06304957,  0.01494549]], shape=(4, 384)),
 'documents': ["Rohit Sharma is the most successful captain in IPL history, leading Mumbai Indians to five titles. He's known for his calm demeanor, explosive batting, and brilliant leadership on the field.",
  'MS Dhoni, famously known as Captain Cool, has led Chennai Super Kings to multiple IPL titles.His finishing skills, wicket-keeping excellence, and leadership qualities make 