In [63]:
from sentence_transformers import  SentenceTransformer
from langchain.embeddings import  HuggingFaceBgeEmbeddings
from langchain.schema import Document
from langchain.vectorstores import  Chroma

1. Initialize model

In [57]:
embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")

2. Create LangChain Docs

In [58]:
# Create LangChain documents for famous scientists

doc1 = Document(
    page_content="Albert Einstein revolutionized modern physics with his theory of relativity. His work reshaped our understanding of space, time, and gravity.",
    metadata={"field": "Theoretical Physics"}
)

doc2 = Document(
    page_content="Marie Curie was a pioneering physicist and chemist who conducted groundbreaking research on radioactivity. She was the first woman to win a Nobel Prize and the only person to win in two scientific fields.",
    metadata={"field": "Chemistry"}
)

doc3 = Document(
    page_content="Isaac Newton formulated the laws of motion and universal gravitation, laying the foundation for classical mechanics. His work remained unchallenged for centuries.",
    metadata={"field": "Classical Physics"}
)

doc4 = Document(
    page_content="Rosalind Franklin's work with X-ray diffraction was crucial in the discovery of DNA’s double-helix structure. Her research played a pivotal role in molecular biology.",
    metadata={"field": "Molecular Biology"}
)

doc5 = Document(
    page_content="Alan Turing is considered the father of computer science. His conceptualization of the Turing Machine laid the groundwork for modern computing.",
    metadata={"field": "Computer Science"}
)

docs = [doc1, doc2,doc3, doc4,doc5]

3. Create Vector Store

In [67]:
chroma_store = Chroma(embedding_function=embedding_model,persist_directory="my_chroma_db",collection_name="sample")

4. Add documents

In [68]:
chroma_store.add_documents(docs)

['84d65304-6849-4c72-bb67-517dbcfffcea',
 '869ae007-1164-4c8e-a281-385a9d6024f1',
 '7d196e50-9644-4605-9fdd-84b53f12b44d',
 '3dc9c8ab-8f60-47bc-a618-dfacf78c187a',
 'cb0d6f59-79f0-4af7-8d2e-8e125e101c9a']

5. View documents

In [69]:
chroma_store.get()

{'ids': ['84d65304-6849-4c72-bb67-517dbcfffcea',
  '869ae007-1164-4c8e-a281-385a9d6024f1',
  '7d196e50-9644-4605-9fdd-84b53f12b44d',
  '3dc9c8ab-8f60-47bc-a618-dfacf78c187a',
  'cb0d6f59-79f0-4af7-8d2e-8e125e101c9a'],
 'embeddings': None,
 'documents': ['Albert Einstein revolutionized modern physics with his theory of relativity. His work reshaped our understanding of space, time, and gravity.',
  'Marie Curie was a pioneering physicist and chemist who conducted groundbreaking research on radioactivity. She was the first woman to win a Nobel Prize and the only person to win in two scientific fields.',
  'Isaac Newton formulated the laws of motion and universal gravitation, laying the foundation for classical mechanics. His work remained unchallenged for centuries.',
  "Rosalind Franklin's work with X-ray diffraction was crucial in the discovery of DNA’s double-helix structure. Her research played a pivotal role in molecular biology.",
  'Alan Turing is considered the father of computer

In [73]:
chroma_store.get(include=["embeddings","metadatas", "documents"])

{'ids': ['84d65304-6849-4c72-bb67-517dbcfffcea',
  '869ae007-1164-4c8e-a281-385a9d6024f1',
  '7d196e50-9644-4605-9fdd-84b53f12b44d',
  '3dc9c8ab-8f60-47bc-a618-dfacf78c187a',
  'cb0d6f59-79f0-4af7-8d2e-8e125e101c9a'],
 'embeddings': array([[-0.02001567,  0.01622077,  0.00762864, ...,  0.01686271,
         -0.010849  ,  0.01430714],
        [ 0.02898663,  0.07278389, -0.04395222, ...,  0.00076877,
          0.0473913 ,  0.01202501],
        [-0.03170357, -0.00321356,  0.00682998, ...,  0.01349299,
          0.03732628,  0.01961987],
        [-0.03042426,  0.03763274,  0.02568983, ..., -0.04152735,
          0.08620185,  0.04503268],
        [-0.02015273,  0.02899581, -0.0149154 , ...,  0.05353099,
          0.06611942, -0.00489779]], shape=(5, 384)),
 'documents': ['Albert Einstein revolutionized modern physics with his theory of relativity. His work reshaped our understanding of space, time, and gravity.',
  'Marie Curie was a pioneering physicist and chemist who conducted groundbreaki

6. Search documents

In [74]:
chroma_store.similarity_search(query="Computer Science", k=2)

[Document(metadata={'field': 'Computer Science'}, page_content='Alan Turing is considered the father of computer science. His conceptualization of the Turing Machine laid the groundwork for modern computing.'),
 Document(metadata={'field': 'Molecular Biology'}, page_content="Rosalind Franklin's work with X-ray diffraction was crucial in the discovery of DNA’s double-helix structure. Her research played a pivotal role in molecular biology.")]

7. Search with similarity score

In [75]:
chroma_store.similarity_search_with_score(query="computer science", k=2)

[(Document(metadata={'field': 'Computer Science'}, page_content='Alan Turing is considered the father of computer science. His conceptualization of the Turing Machine laid the groundwork for modern computing.'),
  0.5494447350502014),
 (Document(metadata={'field': 'Molecular Biology'}, page_content="Rosalind Franklin's work with X-ray diffraction was crucial in the discovery of DNA’s double-helix structure. Her research played a pivotal role in molecular biology."),
  0.9420791268348694)]

8. Metadata filtering

In [77]:
chroma_store.similarity_search_with_score(
    query="",
    filter={"field":"Computer Science"}
)

[(Document(metadata={'field': 'Computer Science'}, page_content='Alan Turing is considered the father of computer science. His conceptualization of the Turing Machine laid the groundwork for modern computing.'),
  1.1870946884155273)]

9. Delete + Update

In [78]:

# Step 1: Delete the old one
vector_store_chroma._collection.delete(where={"scientist": "Turing"})

# Step 2: Add the updated one
updated_doc = Document(
    page_content="Alan Turing laid the foundation of modern AI and computer science with his work during WWII.",
    metadata={"field": "Computer Science", "scientist": "Turing"}
)
vector_store_chroma.add_documents([updated_doc])

# Step 3: View all docs again
print("After update:")
for doc in vector_store_chroma.similarity_search("computer", k=5):
    print(doc.page_content)


After update:
Alan Turing is considered the father of computer science. His conceptualization of the Turing Machine laid the groundwork for modern computing.
Alan Turing is considered the father of computer science. His conceptualization of the Turing Machine laid the groundwork for modern computing.
Alan Turing is considered the father of computer science. His conceptualization of the Turing Machine laid the groundwork for modern computing.
Alan Turing laid the foundation of modern AI and computer science with his work during WWII.
Albert Einstein revolutionized modern physics with his theory of relativity. His work reshaped our understanding of space, time, and gravity.


10. Delete doc

In [80]:
chroma_store.delete('84d65304-6849-4c72-bb67-517dbcfffcea')
# chroma_store.delete(ids=['84d65304-6849-4c72-bb67-517dbcfffcea'])

In [82]:
# view documents
chroma_store.get(include=['embeddings','documents', 'metadatas'])

{'ids': ['869ae007-1164-4c8e-a281-385a9d6024f1',
  '7d196e50-9644-4605-9fdd-84b53f12b44d',
  '3dc9c8ab-8f60-47bc-a618-dfacf78c187a',
  'cb0d6f59-79f0-4af7-8d2e-8e125e101c9a'],
 'embeddings': array([[ 0.02898663,  0.07278389, -0.04395222, ...,  0.00076877,
          0.0473913 ,  0.01202501],
        [-0.03170357, -0.00321356,  0.00682998, ...,  0.01349299,
          0.03732628,  0.01961987],
        [-0.03042426,  0.03763274,  0.02568983, ..., -0.04152735,
          0.08620185,  0.04503268],
        [-0.02015273,  0.02899581, -0.0149154 , ...,  0.05353099,
          0.06611942, -0.00489779]], shape=(4, 384)),
 'documents': ['Marie Curie was a pioneering physicist and chemist who conducted groundbreaking research on radioactivity. She was the first woman to win a Nobel Prize and the only person to win in two scientific fields.',
  'Isaac Newton formulated the laws of motion and universal gravitation, laying the foundation for classical mechanics. His work remained unchallenged for centur