In [1]:
!pip install chromadb



In [2]:
pip show chromadb

Name: chromadb
Version: 1.0.7
Summary: Chroma.
Home-page: https://github.com/chroma-core/chroma
Author: 
Author-email: Jeff Huber <jeff@trychroma.com>, Anton Troynikov <anton@trychroma.com>
License: 
Location: c:\users\scl\anaconda3\envs\assignment_grader\lib\site-packages
Requires: bcrypt, build, chroma-hnswlib, fastapi, grpcio, httpx, importlib-resources, jsonschema, kubernetes, mmh3, numpy, onnxruntime, opentelemetry-api, opentelemetry-exporter-otlp-proto-grpc, opentelemetry-instrumentation-fastapi, opentelemetry-sdk, orjson, overrides, posthog, pydantic, pypika, pyyaml, rich, tenacity, tokenizers, tqdm, typer, typing-extensions, uvicorn
Required-by: 
Note: you may need to restart the kernel to use updated packages.


In [3]:
import chromadb
from chromadb.utils import embedding_functions

In [None]:
# Generate embeddings with vector operations
import requests
import numpy as np

EURI_API_KEY= "euri-......"

def generate_embeddings(text):
    url = "https://api.euron.one/api/v1/euri/alpha/embeddings"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {EURI_API_KEY}"
    }
    payload = {
        "input": text,
        "model": "text-embedding-3-small"
    }

    response = requests.post(url, headers=headers, json=payload)
    data = response.json()
    
    # Convert to numpy array for vector operations
    embedding = np.array(data['data'][0]['embedding'])
    
    return embedding


In [4]:
client = chromadb.Client()

ValueError: An instance of Chroma already exists for ephemeral with different settings

In [5]:
collection = client.create_collection(name='sudhanshu_journey')

In [6]:
collection

Collection(name=sudhanshu_journey)

In [7]:
texts = [
    "Sudhanshu Kumar was born in Jamshedpur, Jharkhand, India, to a modest family.",
    "His early life was marked by financial hardship and limited resources.",
    "He believed education could transform his life and pursued it relentlessly.",
    "Sudhanshu earned a Computer Science and Engineering degree.",
    "He worked at Wipro, Deloitte, Verizon Labs, and Ernst & Young.",
    "He gained expertise in SAP WebDynpro, Fiori UI5 HANA, Java, Big Data, and Data Analytics.",
    "Sudhanshu remained committed to making education accessible for everyone.",
    "His teaching empowered countless students to build their careers.",
    "Many students credit him with changing their lives through affordable learning.",
    "His journey is a testament to triumph over adversity and the power of knowledge."
]


In [8]:
embeddings = [generate_embeddings(i).tolist() for i in texts]

In [9]:
len(embeddings[0])

1536

In [10]:
collection.add(
    documents=texts,
    embeddings=embeddings,
    ids=[f"rec_{i}" for i in range(len(texts))]
)

In [11]:
collection.count()

10

In [12]:
collection.get()

{'ids': ['rec_0',
  'rec_1',
  'rec_2',
  'rec_3',
  'rec_4',
  'rec_5',
  'rec_6',
  'rec_7',
  'rec_8',
  'rec_9'],
 'embeddings': None,
 'documents': ['Sudhanshu Kumar was born in Jamshedpur, Jharkhand, India, to a modest family.',
  'His early life was marked by financial hardship and limited resources.',
  'He believed education could transform his life and pursued it relentlessly.',
  'Sudhanshu earned a Computer Science and Engineering degree.',
  'He worked at Wipro, Deloitte, Verizon Labs, and Ernst & Young.',
  'He gained expertise in SAP WebDynpro, Fiori UI5 HANA, Java, Big Data, and Data Analytics.',
  'Sudhanshu remained committed to making education accessible for everyone.',
  'His teaching empowered countless students to build their careers.',
  'Many students credit him with changing their lives through affordable learning.',
  'His journey is a testament to triumph over adversity and the power of knowledge.'],
 'uris': None,
 'included': ['metadatas', 'documents'],
 '

In [13]:
query= "sudhanshu worked at wipro"


In [14]:
query_emb = generate_embeddings(query).tolist()

In [15]:
query_emb

[-0.057407707,
 -0.015191598,
 0.04691827,
 -0.012575698,
 -0.011367862,
 -0.030331524,
 -0.0034297358,
 0.07394278,
 -0.0044857846,
 -0.048029218,
 0.0061263735,
 -0.020048775,
 -0.06071472,
 -0.023446217,
 0.052162986,
 0.008144815,
 0.009152421,
 -0.049786072,
 -0.058596164,
 0.00987583,
 0.030641556,
 0.07006737,
 0.0026578838,
 0.004227424,
 0.021560185,
 -0.038909093,
 -0.04821007,
 -0.0010156798,
 0.011471206,
 -0.02348497,
 -0.03854739,
 -0.018576121,
 0.03201087,
 0.021573102,
 0.010773634,
 0.008002717,
 0.01661258,
 0.042241942,
 -0.0022057528,
 0.0019102531,
 -0.027799591,
 -0.020914283,
 0.02256779,
 0.0077056023,
 0.008202946,
 -0.022322347,
 -0.041725222,
 0.035498735,
 -0.0067755044,
 -0.025332248,
 0.011606846,
 -0.028729688,
 0.03565375,
 0.023381626,
 -0.045445614,
 -0.010579863,
 0.035757095,
 0.05503079,
 -0.02270989,
 -0.035059523,
 0.020100448,
 -0.0036880963,
 0.0655719,
 -0.0020701136,
 -0.012149403,
 -0.014455271,
 -0.009475372,
 -0.01216878,
 -0.012885731,
 -

In [16]:
result = collection.query(query_embeddings=[query_emb],n_results=2)

In [17]:
print(result)

{'ids': [['rec_4', 'rec_3']], 'embeddings': None, 'documents': [['He worked at Wipro, Deloitte, Verizon Labs, and Ernst & Young.', 'Sudhanshu earned a Computer Science and Engineering degree.']], 'uris': None, 'included': ['metadatas', 'documents', 'distances'], 'data': None, 'metadatas': [[None, None]], 'distances': [[0.8196437954902649, 0.8583085536956787]]}


In [18]:
next_sent = "sudhanshu kumar has competed his degree in computer science"

In [19]:
next_sent_embd = generate_embeddings(next_sent).tolist()

In [20]:
collection.update(ids=['rec_3'],
                  documents=[next_sent],
                  embeddings=[next_sent_embd])

In [21]:
collection.get()


{'ids': ['rec_0',
  'rec_1',
  'rec_2',
  'rec_3',
  'rec_4',
  'rec_5',
  'rec_6',
  'rec_7',
  'rec_8',
  'rec_9'],
 'embeddings': None,
 'documents': ['Sudhanshu Kumar was born in Jamshedpur, Jharkhand, India, to a modest family.',
  'His early life was marked by financial hardship and limited resources.',
  'He believed education could transform his life and pursued it relentlessly.',
  'sudhanshu kumar has competed his degree in computer science',
  'He worked at Wipro, Deloitte, Verizon Labs, and Ernst & Young.',
  'He gained expertise in SAP WebDynpro, Fiori UI5 HANA, Java, Big Data, and Data Analytics.',
  'Sudhanshu remained committed to making education accessible for everyone.',
  'His teaching empowered countless students to build their careers.',
  'Many students credit him with changing their lives through affordable learning.',
  'His journey is a testament to triumph over adversity and the power of knowledge.'],
 'uris': None,
 'included': ['metadatas', 'documents'],
 '

In [22]:
collection.add(
    documents=["sudhanshu kumar is working with euron for now"],
    embeddings=[generate_embeddings("sudhanshu kumar is working with euron for now").tolist()],
    ids=["rec_10"],
    metadatas=[{'catagories':"sudh_info","year":2024}]
)

In [23]:
collection.get()

{'ids': ['rec_0',
  'rec_1',
  'rec_2',
  'rec_3',
  'rec_4',
  'rec_5',
  'rec_6',
  'rec_7',
  'rec_8',
  'rec_9',
  'rec_10'],
 'embeddings': None,
 'documents': ['Sudhanshu Kumar was born in Jamshedpur, Jharkhand, India, to a modest family.',
  'His early life was marked by financial hardship and limited resources.',
  'He believed education could transform his life and pursued it relentlessly.',
  'sudhanshu kumar has competed his degree in computer science',
  'He worked at Wipro, Deloitte, Verizon Labs, and Ernst & Young.',
  'He gained expertise in SAP WebDynpro, Fiori UI5 HANA, Java, Big Data, and Data Analytics.',
  'Sudhanshu remained committed to making education accessible for everyone.',
  'His teaching empowered countless students to build their careers.',
  'Many students credit him with changing their lives through affordable learning.',
  'His journey is a testament to triumph over adversity and the power of knowledge.',
  'sudhanshu kumar is working with euron for no

In [24]:
collection.query(
    query_embeddings=generate_embeddings(["sudhanshu is working with euron"]).tolist(),
    n_results=2,
    where={'catagories':"sudh_info"}
)

{'ids': [['rec_10']],
 'embeddings': None,
 'documents': [['sudhanshu kumar is working with euron for now']],
 'uris': None,
 'included': ['metadatas', 'documents', 'distances'],
 'data': None,
 'metadatas': [[{'year': 2024, 'catagories': 'sudh_info'}]],
 'distances': [[0.14023710787296295]]}

In [30]:
from chromadb import PersistentClient

client = PersistentClient(path="./chroma_db")

collection = client.get_or_create_collection("oldstyle")

collection.add(
    documents=["Hello old style"],
    ids=["id2"]
)

