# Chroma DB Cloud integration

https://docs.trychroma.com/docs/overview/getting-started

In [3]:
%pip install -qU chromadb python-dotenv langchain-chroma

Note: you may need to restart the kernel to use updated packages.


In [4]:
from dotenv import load_dotenv
load_dotenv()

True

### Functions

In [5]:
# Instanciate chroma client

import os
import chromadb
from chromadb.api import ClientAPI

def get_chroma_client() -> ClientAPI:
  chroma_client = chromadb.CloudClient(
    api_key=os.getenv("CHROMA_API_KEY"),
    tenant=os.getenv("CHROMA_TENANT"),
    database=os.getenv("CHROMA_DATABASE")
  )

  return chroma_client

In [6]:
# Instanciate langchain vector store

from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma

def get_chroma_collection(collection_name: str):
  collection = Chroma(
      client=get_chroma_client(),
      embedding_function=OpenAIEmbeddings(model="text-embedding-3-small"),
      collection_name=collection_name,
  )

  return collection

In [7]:
get_chroma_collection("test_collection")

<langchain_chroma.vectorstores.Chroma at 0x111a72960>

In [9]:
%pip install -q uuid

Note: you may need to restart the kernel to use updated packages.


In [10]:
# Add documents

from uuid import uuid4
from langchain_core.documents import Document

document_1 = Document(page_content="Ceci est un document de test. Je suis en train de tester l'embedding.", metadata={"tag": "test"})
document_2 = Document(page_content="Ceci est un document de production. Je suis en train de tester l'embedding pour la production.", metadata={"tag": "prod"})
document_3 = Document(page_content="Ma nourriture préférée est la pizza.", metadata={"tag": "nourriture"})

documents = [document_1, document_2, document_3]

uuids = [str(uuid4()) for _ in range(len(documents))]

collection = get_chroma_collection("test_collection")
collection.add_documents(documents=documents, ids=uuids)

['8b609594-c5c4-455e-bb66-dc4c3022e36a',
 '5079f18a-56b0-41da-bd14-c2f7cf333751',
 '68631453-bd3f-4062-9950-c1ccba0d8bba']

In [15]:
document_4 = Document(page_content="J'adore également les sushis.", metadata={"tag": "mourritures"})

collection.add_documents(documents=[document_4], ids=[str(uuid4())])

['1de3cfd6-e178-4b7a-8a4d-d7b9e704305c']

In [16]:
# Update document (one or many)

updated_document_4 = Document(
    page_content="J'adore également les sushis.",
    metadata={"tag": "nourriture"},
    id=str(uuid4()),
)

collection.update_documents(ids=["1de3cfd6-e178-4b7a-8a4d-d7b9e704305c"], documents=[updated_document_4])

In [12]:
# Update document (one or many)

updated_document_1 = Document(
    page_content="J'adore le chocolat !",
    metadata={"tag": "nourriture"},
    id=str(uuid4()),
)

collection.update_documents(ids=uuids[:1], documents=[updated_document_1])

In [None]:
# Delete document

collection.delete(ids=[uuids[-1]])

In [19]:
# Query by retriever

retriever = collection.as_retriever(
    search_type="mmr", search_kwargs={"k": 1, "fetch_k": 5}
)

print(retriever.invoke("Quelle est ma nourriture préférée ?", filter=None, score_threshold=0))
print(retriever.invoke("Est-ce que j'aime le chocolat ?", filter=None, score_threshold=0))
print(retriever.invoke("What is my favorite dinner?", filter=None, score_threshold=0))
print(retriever.invoke("What is my favorite dessert?", filter=None, score_threshold=0))



[Document(id='68631453-bd3f-4062-9950-c1ccba0d8bba', metadata={'tag': 'nourriture'}, page_content='Ma nourriture préférée est la pizza.')]
[Document(id='8b609594-c5c4-455e-bb66-dc4c3022e36a', metadata={'tag': 'nourriture'}, page_content="J'adore le chocolat !")]
[Document(id='68631453-bd3f-4062-9950-c1ccba0d8bba', metadata={'tag': 'nourriture'}, page_content='Ma nourriture préférée est la pizza.')]
[Document(id='8b609594-c5c4-455e-bb66-dc4c3022e36a', metadata={'tag': 'nourriture'}, page_content="J'adore le chocolat !")]
