In [1]:
from langchain_chroma import Chroma

In [2]:
from langchain.schema import Document

In [3]:

doc1 = Document(
    page_content='''
    Virat Kohli – Known as the “Run Machine,” 
    Virat is one of the most consistent batsmen in the world, 
    admired for his aggressive batting style and match-winning performances across formats.''',
    metadata={'team': 'RCB'}
)

doc2 = Document(
    page_content='''
    Rohit Sharma – The “Hitman” of Indian cricket, 
    Rohit is famous for his elegant batting, ability to score big hundreds, 
    and holding the record for the highest individual ODI score (264).''',
    metadata={'team': 'MI'}
)

doc3 = Document(
    page_content='''
    Jasprit Bumrah – A world-class fast bowler with a unique action, 
    Bumrah is known for his deadly yorkers, pinpoint accuracy, 
    and match-winning spells in all formats.''',
    metadata={'team': 'MI'}
)

doc4 = Document(
    page_content='''
    Ravindra Jadeja – A true all-rounder, 
    Jadeja is valued for his explosive batting, accurate left-arm spin bowling, 
    and exceptional fielding skills.''',
    metadata={'team': 'CSK'}
)


In [4]:
from langchain_community.embeddings import HuggingFaceEmbeddings

In [5]:
vs = Chroma(
    embedding_function= HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2"),
    #directory to store
    persist_directory = 'chroma-db-created',
    collection_name='sample'
)

  embedding_function= HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2"),


In [6]:
docs = [doc1,doc2,doc3,doc4]

In [7]:
vs.add_documents(docs)

['ab12596c-cb28-4365-8370-1d13a6bbd856',
 '70981c68-5665-4808-a54e-d9c97a8531e6',
 'a8951195-69b6-45c0-9122-43006e52ea60',
 'aee21396-4734-43a3-912d-ed5b8d9bb5ab']

<!-- ['ab12596c-cb28-4365-8370-1d13a6bbd856',
 '70981c68-5665-4808-a54e-d9c97a8531e6',
 'a8951195-69b6-45c0-9122-43006e52ea60',
 'aee21396-4734-43a3-912d-ed5b8d9bb5ab'] -->

In [10]:
vs.get(include=['embeddings','documents','metadatas'])

{'ids': ['ab12596c-cb28-4365-8370-1d13a6bbd856',
  '70981c68-5665-4808-a54e-d9c97a8531e6',
  'a8951195-69b6-45c0-9122-43006e52ea60',
  'aee21396-4734-43a3-912d-ed5b8d9bb5ab'],
 'embeddings': array([[ 0.03101856,  0.09696905, -0.09267526, ..., -0.0657092 ,
          0.07973398,  0.01059875],
        [ 0.06148205,  0.03313651, -0.10538162, ..., -0.03063969,
         -0.0005053 ,  0.00797402],
        [ 0.01353442, -0.02332489, -0.05451009, ..., -0.09496049,
          0.00836866,  0.12527466],
        [ 0.00501741,  0.0428577 , -0.04647219, ..., -0.11968888,
         -0.05092554,  0.01488515]], shape=(4, 384)),
 'documents': ['\n    Virat Kohli – Known as the “Run Machine,” \n    Virat is one of the most consistent batsmen in the world, \n    admired for his aggressive batting style and match-winning performances across formats.',
  '\n    Rohit Sharma – The “Hitman” of Indian cricket, \n    Rohit is famous for his elegant batting, ability to score big hundreds, \n    and holding the reco

In [15]:
vs.similarity_search(
    query='who is best batsman?',
    #how many similar objects may appear
    k=2
)

[Document(id='70981c68-5665-4808-a54e-d9c97a8531e6', metadata={'team': 'MI'}, page_content='\n    Rohit Sharma – The “Hitman” of Indian cricket, \n    Rohit is famous for his elegant batting, ability to score big hundreds, \n    and holding the record for the highest individual ODI score (264).'),
 Document(id='ab12596c-cb28-4365-8370-1d13a6bbd856', metadata={'team': 'RCB'}, page_content='\n    Virat Kohli – Known as the “Run Machine,” \n    Virat is one of the most consistent batsmen in the world, \n    admired for his aggressive batting style and match-winning performances across formats.')]

In [19]:
vs.similarity_search_with_score(
    query='who is best batsman?',
    #how many similar objects may appear
    k=2
)

[(Document(id='70981c68-5665-4808-a54e-d9c97a8531e6', metadata={'team': 'MI'}, page_content='\n    Rohit Sharma – The “Hitman” of Indian cricket, \n    Rohit is famous for his elegant batting, ability to score big hundreds, \n    and holding the record for the highest individual ODI score (264).'),
  0.820034384727478),
 (Document(id='ab12596c-cb28-4365-8370-1d13a6bbd856', metadata={'team': 'RCB'}, page_content='\n    Virat Kohli – Known as the “Run Machine,” \n    Virat is one of the most consistent batsmen in the world, \n    admired for his aggressive batting style and match-winning performances across formats.'),
  0.8626238107681274)]

In [20]:
#add some data on existing data 

In [23]:
new_doc = Document(
    page_content='virat kohli is the captain of RCB ',
    metadata = {'team':'RCB'}
)

In [24]:
vs.update_document(document_id='ab12596c-cb28-4365-8370-1d13a6bbd856',document=new_doc)

In [26]:
vs.similarity_search(
    query='who is captain of RCB',
    k=1
)

[Document(id='ab12596c-cb28-4365-8370-1d13a6bbd856', metadata={'team': 'RCB'}, page_content='virat kohli is the captain of RCB ')]

In [27]:
vs.delete(ids=['aee21396-4734-43a3-912d-ed5b8d9bb5ab'])

In [30]:
vs.get(include=['embeddings','documents','metadatas'])

{'ids': ['ab12596c-cb28-4365-8370-1d13a6bbd856',
  '70981c68-5665-4808-a54e-d9c97a8531e6',
  'a8951195-69b6-45c0-9122-43006e52ea60'],
 'embeddings': array([[-0.02296758,  0.09286902, -0.08985404, ..., -0.00612818,
          0.07119832, -0.06337535],
        [ 0.06148205,  0.03313651, -0.10538162, ..., -0.03063969,
         -0.0005053 ,  0.00797402],
        [ 0.01353442, -0.02332489, -0.05451009, ..., -0.09496049,
          0.00836866,  0.12527466]], shape=(3, 384)),
 'documents': ['virat kohli is the captain of RCB ',
  '\n    Rohit Sharma – The “Hitman” of Indian cricket, \n    Rohit is famous for his elegant batting, ability to score big hundreds, \n    and holding the record for the highest individual ODI score (264).',
  '\n    Jasprit Bumrah – A world-class fast bowler with a unique action, \n    Bumrah is known for his deadly yorkers, pinpoint accuracy, \n    and match-winning spells in all formats.'],
 'uris': None,
 'included': ['embeddings', 'documents', 'metadatas'],
 'data'