# Starting ChromaDB 

#### In-Memory / Ephemeral Client

In [None]:
# import chromadb
# chroma_client = chromadb.Client()
# collection = chroma_client.get_collection() # get the list of collections in this directory

#### Persistent Client

In [None]:
# import chromadb

# chroma_client = chromadb.PersistentClient(path="Storage/persistent_storage")
# collection = chroma_client.get_collection() # get the list of collections in this directory

#### Client Server Mode

Client Vs Persistent
- Client is just seting up a connection to Persistent server
- So if a remote project use persistent, but if planning to scale the project use Client 
- personally i will use client as i will be learning to work in a company
- only syntax changes i would recommend seeing docs, i will mention the persistent syntax as much as i can.

In [None]:
# Here it runs like MySQL
# run this command : 
"""
chroma run \
  --path ./path/to/ \
  --host 0.0.0.0 \
  --port 8000 \
  --log-level DEBUG \
  --settings ./settings.json \
  --impl rest

my command
  chroma run \
  --path ./Storage/server \
  --host 0.0.0.0 \
  --port 8000 \
  --log-level DEBUG \
  --settings ./settings.json \
  --impl rest
"""

import chromadb

chroma_client = chromadb.HttpClient(host='localhost', port=8000)
collection = chroma_client.get_collection() # get the list of collections in this server


# adding Asyncronous Behaviour

# import asyncio
# import chromadb

# async def main():
#     client = await chromadb.AsyncHttpClient()

#     collection = await client.create_collection(name="my_collection")
#     await collection.add(
#         documents=["hello world"],
#         ids=["id1"]
#     )

# asyncio.run(main())


# Downloading Embedding models
> **Note:** 
> - Only to be used in In-Memory or Persistent not in Server
> - For Server We need to pass embedding model too when adding.
> -


In [3]:
# I am using hugginf face and not following documents here
# I will tell further why

from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction

embedding_fn = SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
# collection = chroma_client.create_collection(name="my_collection")
collection = chroma_client.create_collection(name="my_collection", embedding_function=embedding_fn)

  from .autonotebook import tqdm as notebook_tqdm


# Adding in Collection

### For Persistent Storage and In Memory

In [None]:
collection.add(
    documents=[
        "This is a document about pineapple",
        "This is a document about oranges"
    ],
    ids=["id1", "id2"],
    metadatas=[{"source": "I love Colors"}, {"source": "I Love Hawaii"}]
)
# --------------------------------------------------
#                        |
#                        V
# This will through error on macos intel chips (atleast in my case)
# If i used default embedding function we get a onnx model and
# cause onnx core ml is not supported well on this system
# I have to change my embedding model from hugging face 
# and hence deviated from Docs earlier.

### For Server Client Mode

In [None]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2")
embedding = model.encode(["Chroma is an embedding database"]).tolist()

collection.add(
    ids=["id1"],
    embeddings=embedding, # Embeddings of Documents
    documents=["Chroma is an embedding database"],
    metadatas=[{"source": "ChromaDB Docs"}]
)

# So whenever i am using langchain, i will use embed model on docs
# then add in chromaDB server

# Upsert in Documents

In [None]:
# Upsert a document
collection.upsert(
    ids=["doc1"],  # Document ID
    documents=["Updated content"],  # New or updated document content
    metadatas=[{"category": "science"}],  # Updated metadata
    embeddings=[[0.1, 0.2, 0.3, ...]]  # Updated embedding vector
)

# It Replaces the whole doc not append

# Removing Docs


In [None]:
# Delete documents by their IDs
collection.delete(ids=["doc1", "doc2"])

# Delet Collections

In [None]:
# Delete a collection by name
chroma_client.delete_collection(name="my_collection")
chroma_client.reset() # Restart

# Qurying Database

In [None]:
results = collection.query(
    # Chroma will embed this for you in persistent and in memory storage
    query_texts=["This is a query document about hawaii"], 

    # using Meta tags
    where={"source": "wiki"}, 

    # Returns document that have phrase "Quamtum Mechanics"
    where_document={"$contains": "quantum mechanics"}, 

    # how many results to return
    n_results=2,

    # Includes the spcific field, can be used in cosine similarity search
    include=["distances"]
)

print(results)

# For server you must pass Query embeddings
# Here you can also pass an image eembeddings for multi-modal Query
# and it retrieve most relevant Documents.



{'ids': [['id1', 'id2']], 'embeddings': None, 'documents': [['This is a document about pineapple', 'This is a document about oranges']], 'uris': None, 'included': ['metadatas', 'documents', 'distances'], 'data': None, 'metadatas': [[None, None]], 'distances': [[1.0404011011123657, 1.2430799007415771]]}
