In [1]:
!pip install chromadb==1.0.12
!pip install sentence-transformers==4.1.0

Collecting chromadb==1.0.12
  Downloading chromadb-1.0.12-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.9 kB)
Collecting build>=1.0.3 (from chromadb==1.0.12)
  Downloading build-1.3.0-py3-none-any.whl.metadata (5.6 kB)
Collecting fastapi==0.115.9 (from chromadb==1.0.12)
  Downloading fastapi-0.115.9-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn>=0.18.3 (from uvicorn[standard]>=0.18.3->chromadb==1.0.12)
  Downloading uvicorn-0.38.0-py3-none-any.whl.metadata (6.8 kB)
Collecting numpy>=1.22.5 (from chromadb==1.0.12)
  Downloading numpy-2.3.4-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (62 kB)
Collecting posthog>=2.4.0 (from chromadb==1.0.12)
  Downloading posthog-6.7.14-py3-none-any.whl.metadata (6.0 kB)
Collecting onnxruntime>=1.14.1 (from chromadb==1.0.12)
  Downloading onnxruntime-1.23.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Collecting opentelemetry-api>=1.2.0 (from chromadb==1.0.12)
  Downlo

In [2]:
import chromadb
from chromadb.utils import embedding_functions

ef = embedding_functions.SentenceTransformerEmbeddingFunction(
    model_name="all-MiniLM-L6-v2"
)

client = chromadb.Client()

collection_name = "my_grocery_collection"

def main():
    try:
        collection = client.create_collection(
            name=collection_name,
            metadata={"description": "A collection for storing grocery data"},
            configuration={
                "hnsw": {"space": "cosine"},
                "embedding_function": ef
            }
        )
        print(f"Collection created: {collection.name}")

        texts = [
            'fresh red apples',
            'organic bananas',
            'ripe mangoes',
            'whole wheat bread',
            'farm-fresh eggs',
            'natural yogurt',
            'frozen vegetables',
            'grass-fed beef',
            'free-range chicken',
            'fresh salmon fillet',
            'aromatic coffee beans',
            'pure honey',
            'golden apple',
            'red fruit'
        ]

        ids = [f"food_{index + 1}" for index, _ in enumerate(texts)]

        collection.add(
            documents=texts,
            metadatas=[{"source": "grocery_store", "category": "food"} for _ in texts],
            ids=ids
        )

        all_items = collection.get()
        print("Collection contents:")
        print(f"Number of documents: {len(all_items['documents'])}")

        def perform_similarity_search(collection, all_items):
            try:
                query_term = "apple"
                results = collection.query(
                    query_texts=[query_term],
                    n_results=3
                )
                print(f"Query results for '{query_term}':")
                print(results)

                if not results or not results['ids'] or len(results['ids'][0]) == 0:
                    print(f'No documents found similar to "{query_term}"')
                    return
                
                print(f'Top 3 similar documents to "{query_term}":')
                for i in range(min(3, len(results['ids'][0]))):
                    doc_id = results['ids'][0][i]
                    score = results['distances'][0][i]
                    text = results['documents'][0][i]
                    if not text:
                        print(f' - ID: {doc_id}, Text: "Text not available", Score: {score:.4f}')
                    else:
                        print(f' - ID: {doc_id}, Text: "{text}", Score: {score:.4f}')
            except Exception as error:
                print(f"Error in similarity search: {error}")
        
        perform_similarity_search(collection, all_items)
    except Exception as error:
        print(f"Error: {error}")

if __name__ == "__main__":
    main()


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event CollectionAddEvent: capture() takes 1 positional argument but 3 were given


Collection created: my_grocery_collection


Failed to send telemetry event CollectionGetEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event CollectionQueryEvent: capture() takes 1 positional argument but 3 were given


Collection contents:
Number of documents: 14
Query results for 'apple':
{'ids': [['food_13', 'food_1', 'food_14']], 'embeddings': None, 'documents': [['golden apple', 'fresh red apples', 'red fruit']], 'uris': None, 'included': ['metadatas', 'documents', 'distances'], 'data': None, 'metadatas': [[{'category': 'food', 'source': 'grocery_store'}, {'source': 'grocery_store', 'category': 'food'}, {'category': 'food', 'source': 'grocery_store'}]], 'distances': [[0.3824649453163147, 0.480892539024353, 0.5965152978897095]]}
Top 3 similar documents to "apple":
 - ID: food_13, Text: "golden apple", Score: 0.3825
 - ID: food_1, Text: "fresh red apples", Score: 0.4809
 - ID: food_14, Text: "red fruit", Score: 0.5965


In [4]:
import chromadb
from chromadb.utils import embedding_functions

ef = embedding_functions.SentenceTransformerEmbeddingFunction(
    model_name="all-MiniLM-L6-v2"
)

client = chromadb.Client()

collection_name = "my_grocery_collection_1"

def main():
    try:
        collection = client.create_collection(
            name=collection_name,
            metadata={"description": "A collection for storing grocery data"},
            configuration={
                "hnsw": {"space": "cosine"},
                "embedding_function": ef
            }
        )
        print(f"Collection created: {collection.name}")

        texts = [
            'fresh red apples',
            'organic bananas',
            'ripe mangoes',
            'whole wheat bread',
            'farm-fresh eggs',
            'natural yogurt',
            'frozen vegetables',
            'grass-fed beef',
            'free-range chicken',
            'fresh salmon fillet',
            'aromatic coffee beans',
            'pure honey',
            'golden apple',
            'red fruit'
        ]

        ids = [f"food_{index + 1}" for index, _ in enumerate(texts)]

        collection.add(
            documents=texts,
            metadatas=[{"source": "grocery_store", "category": "food"} for _ in texts],
            ids=ids
        )

        all_items = collection.get()
        print("Collection contents:")
        print(f"Number of documents: {len(all_items['documents'])}")

        def perform_similarity_search(collection, all_items):
            try:
                query_term = ["red", "fresh"]
                if isinstance(query_term, str):
                    query_term = [query_term]

                results = collection.query(
                    query_texts=query_term,
                    n_results=3
                )
                print(f"Query results for '{query_term}':")
                print(results)

                if not results or not results['ids'] or len(results['ids'][0]) == 0:
                    print(f'No documents found similar to "{query_term}"')
                    return
                
                for q in range(len(query_term)):
                    print(f'Top 3 similar documents to "{query_term[q]}":')
                    for i in range(min(3, len(results['ids'][q]))):
                        doc_id = results['ids'][q][i]
                        score = results['distances'][q][i]
                        text = results['documents'][q][i]
                        if not text:
                            print(f' - ID: {doc_id}, Text: "Text not available", Score: {score:.4f}')
                        else:
                            print(f' - ID: {doc_id}, Text: "{text}", Score: {score:.4f}')
            except Exception as error:
                print(f"Error in similarity search: {error}")
        
        perform_similarity_search(collection, all_items)
    except Exception as error:
        print(f"Error: {error}")

if __name__ == "__main__":
    main()


Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event CollectionAddEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event CollectionGetEvent: capture() takes 1 positional argument but 3 were given


Collection created: my_grocery_collection_1
Collection contents:
Number of documents: 14


Failed to send telemetry event CollectionQueryEvent: capture() takes 1 positional argument but 3 were given


Query results for '['red', 'fresh']':
{'ids': [['food_14', 'food_1', 'food_13'], ['food_1', 'food_5', 'food_12']], 'embeddings': None, 'documents': [['red fruit', 'fresh red apples', 'golden apple'], ['fresh red apples', 'farm-fresh eggs', 'pure honey']], 'uris': None, 'included': ['metadatas', 'documents', 'distances'], 'data': None, 'metadatas': [[{'category': 'food', 'source': 'grocery_store'}, {'source': 'grocery_store', 'category': 'food'}, {'category': 'food', 'source': 'grocery_store'}], [{'category': 'food', 'source': 'grocery_store'}, {'category': 'food', 'source': 'grocery_store'}, {'category': 'food', 'source': 'grocery_store'}]], 'distances': [[0.3132774233818054, 0.45399630069732666, 0.7393020391464233], [0.4773758053779602, 0.48541051149368286, 0.6252564191818237]]}
Top 3 similar documents to "red":
 - ID: food_14, Text: "red fruit", Score: 0.3133
 - ID: food_1, Text: "fresh red apples", Score: 0.4540
 - ID: food_13, Text: "golden apple", Score: 0.7393
Top 3 similar docum