In [1]:
import chromadb

# Use disk-based storage to prevent memory crashes
client = chromadb.PersistentClient(path="./chroma_db_storage")

# Create or load the collection
collection = client.get_or_create_collection(name="genai_collection")

# Add documents safely
try:
    collection.add(
        documents=[
            "This document is about New York",
            "This document is about Delhi"
        ],
        ids=["id1", "id2"]
    )
    print("Documents added successfully!")
except Exception as e:
    print(f"Error adding documents: {e}")


Documents added successfully!


In [2]:
all_docs = collection.get()
all_docs

{'ids': ['id1', 'id2'],
 'embeddings': None,
 'documents': ['This document is about New York',
  'This document is about Delhi'],
 'uris': None,
 'data': None,
 'metadatas': [None, None],
 'included': [<IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

In [3]:
documents = collection.get(ids=["id1"])
documents

{'ids': ['id1'],
 'embeddings': None,
 'documents': ['This document is about New York'],
 'uris': None,
 'data': None,
 'metadatas': [None],
 'included': [<IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

In [4]:
results = collection.query(
    query_texts=['Query is about big apple'],
    n_results=2
)
results

{'ids': [['id1', 'id2']],
 'embeddings': None,
 'documents': [['This document is about New York',
   'This document is about Delhi']],
 'uris': None,
 'data': None,
 'metadatas': [[None, None]],
 'distances': [[1.631218396756471, 1.8675585319946242]],
 'included': [<IncludeEnum.distances: 'distances'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

In [5]:
collection.delete(ids=all_docs['ids'])
collection.get()

{'ids': [],
 'embeddings': None,
 'documents': [],
 'uris': None,
 'data': None,
 'metadatas': [],
 'included': [<IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

In [6]:
collection.add(
    documents=[
        "This document is about New York",
        "This document is about Delhi"
    ],
    ids=["id3", "id4"],
    metadatas=[
        {"url": "https://en.wikipedia.org/wiki/New_York_City"},
        {"url": "https://en.wikipedia.org/wiki/New_Delhi"}
    ]
)

In [7]:
results = collection.query(
    query_texts=["Query is about Chhole Bhature"],
    n_results=2
)
results

{'ids': [['id4', 'id3']],
 'embeddings': None,
 'documents': [['This document is about Delhi',
   'This document is about New York']],
 'uris': None,
 'data': None,
 'metadatas': [[{'url': 'https://en.wikipedia.org/wiki/New_Delhi'},
   {'url': 'https://en.wikipedia.org/wiki/New_York_City'}]],
 'distances': [[1.558848018950743, 1.8114912512493335]],
 'included': [<IncludeEnum.distances: 'distances'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}