## Chroma Vector Database

In [22]:
# Load environment variables
from dotenv import load_dotenv

load_dotenv()

True

In [7]:
# Define a function to generate embeddings
import os
import requests
import numpy as np

EURIAI_API_AKY = os.getenv("EURIAI_API_KEY")

def generate_embeddings(text):
    url = "https://api.euron.one/api/v1/euri/embeddings"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {EURIAI_API_AKY}"
    }
    payload = {
        "input": text,
        "model": "text-embedding-3-small"
    }

    response = requests.post(url, headers=headers, json=payload)
    data = response.json()
    
    embedding = np.array(data['data'][0]['embedding'])
    return embedding

In [8]:
text = "The weather is sunny today."
embedding = generate_embeddings(text)
embedding

array([ 0.01067466, -0.03126568,  0.00883722, ..., -0.03035571,
        0.01221461,  0.02594584], shape=(1536,))

In [11]:
texts = [
  "John was born in Jamshedpur, Jharkhand, India, to a modest family.",
  "His early life was marked by financial hardship and limited resources.",
  "He believed education could transform his life and pursued it relentlessly.",
  "John earned a Computer Science and Engineering degree.",
  "He worked at Wipro, Deloitte, Verizon Labs, and Ernst & Young.",
  "He gained expertise in SAP WebDynpro, FiorI-UI5 HANA, Java, Big Data, and Data Analytics.",
  "John remained committed to making education accessible for everyone.",
  "His teaching empowered countless students to build their careers.",
  "Many students credit him with changing their lives through affordable learning.",
  "His journey is a testament to triumph over adversity and the power of knowledge."
]

texts


['John was born in Jamshedpur, Jharkhand, India, to a modest family.',
 'His early life was marked by financial hardship and limited resources.',
 'He believed education could transform his life and pursued it relentlessly.',
 'John earned a Computer Science and Engineering degree.',
 'He worked at Wipro, Deloitte, Verizon Labs, and Ernst & Young.',
 'He gained expertise in SAP WebDynpro, FiorI-UI5 HANA, Java, Big Data, and Data Analytics.',
 'John remained committed to making education accessible for everyone.',
 'His teaching empowered countless students to build their careers.',
 'Many students credit him with changing their lives through affordable learning.',
 'His journey is a testament to triumph over adversity and the power of knowledge.']

In [12]:
embeddings = [generate_embeddings(i).tolist() for i in texts]
embeddings

[[0.02132355,
  -0.005195533,
  -0.009459201,
  0.03009036,
  0.020376068,
  -0.026113018,
  0.053683702,
  0.025196772,
  0.0037430744,
  -0.06513678,
  0.029403176,
  -0.008470072,
  -0.030506836,
  0.021115312,
  0.019647235,
  0.02761233,
  0.020438539,
  -0.029444823,
  -0.029111642,
  0.01862687,
  0.051684618,
  -0.008084832,
  0.043854877,
  -0.022802038,
  0.022343915,
  -0.018637283,
  -0.008709545,
  -0.032047797,
  0.012535915,
  -0.04418806,
  0.0027539448,
  0.0013834798,
  0.027487388,
  -0.034983948,
  0.020646777,
  -0.03627502,
  0.044812772,
  -0.022156501,
  0.008975049,
  0.027487388,
  -0.026196314,
  -0.043396756,
  -0.016825613,
  0.085169256,
  -0.01923076,
  0.01719003,
  0.0068301996,
  0.011348959,
  0.021292314,
  0.029986242,
  0.01741909,
  -0.033755347,
  0.016409138,
  -0.015857307,
  -0.009183287,
  -0.02380158,
  0.020667601,
  0.003204259,
  0.00010590844,
  -0.024238879,
  -0.0068093757,
  -0.020855015,
  0.0060857492,
  0.008241011,
  0.051059905,


In [13]:
print(embeddings[0])

[0.02132355, -0.005195533, -0.009459201, 0.03009036, 0.020376068, -0.026113018, 0.053683702, 0.025196772, 0.0037430744, -0.06513678, 0.029403176, -0.008470072, -0.030506836, 0.021115312, 0.019647235, 0.02761233, 0.020438539, -0.029444823, -0.029111642, 0.01862687, 0.051684618, -0.008084832, 0.043854877, -0.022802038, 0.022343915, -0.018637283, -0.008709545, -0.032047797, 0.012535915, -0.04418806, 0.0027539448, 0.0013834798, 0.027487388, -0.034983948, 0.020646777, -0.03627502, 0.044812772, -0.022156501, 0.008975049, 0.027487388, -0.026196314, -0.043396756, -0.016825613, 0.085169256, -0.01923076, 0.01719003, 0.0068301996, 0.011348959, 0.021292314, 0.029986242, 0.01741909, -0.033755347, 0.016409138, -0.015857307, -0.009183287, -0.02380158, 0.020667601, 0.003204259, 0.00010590844, -0.024238879, -0.0068093757, -0.020855015, 0.0060857492, 0.008241011, 0.051059905, -0.030215302, 0.030777546, -0.019314054, -0.007309146, -0.008589809, 0.021844145, 0.033963583, -0.012994038, 0.021594258, -0.0542

In [None]:
# Create chroma db client
import chromadb

client = chromadb.Client()
client

<chromadb.api.client.Client at 0x21db70c7e60>

In [16]:
# Create chroma db collection
collection = client.create_collection(name="genai_sandbox")
collection

Collection(name=genai_sandbox)

In [17]:
# Store data into collection
collection.add(
    documents=texts,
    embeddings=embeddings,
    ids=[str(i) for i in range(len(texts))]
)

In [19]:
query = "John worked at deloitte"
query_embed = generate_embeddings(query).tolist()
print(query_embed)

[0.0027935111, 0.019962465, 0.03027275, -0.01471135, 0.0036572716, 0.015629953, 0.013909287, 0.09147635, -0.017439736, -0.08527921, 0.008418238, 0.01926323, -0.00019237524, 0.00904892, 0.012024095, 0.032137375, -0.013511683, -0.025241002, -0.045930125, 0.054704834, 0.038745828, 0.050838478, 0.026118472, -0.003547588, 0.025912816, -0.025432948, -0.049440008, -0.03290516, 0.036634415, 0.008575908, 0.0032562402, -0.020154413, 0.06005192, -0.016370319, 0.0025912817, 0.046286594, 0.06087455, 0.041433085, 0.015204927, -0.013024961, -0.007211715, 0.019372914, 0.002317072, 0.024459504, 0.03827967, -0.012791882, -0.032658376, 0.008274278, 0.05064653, 0.043379974, -0.015191217, 0.015725926, 0.049714215, 0.026817707, -0.027763732, -0.012785027, 0.025241002, -0.010317139, 0.029477542, -0.010762731, 0.007904095, -0.0013144927, -0.0012853579, 0.030711485, -0.007561332, -0.032055113, -0.011043795, 0.043050922, -0.012572514, -0.04159761, 0.0052305497, 0.13447243, -0.031040536, -4.2202584e-05, 0.009350

In [20]:
# Query results
collection.query(query_embeddings=[query_embed], n_results=2)

{'ids': [['4', '3']],
 'embeddings': None,
 'documents': [['He worked at Wipro, Deloitte, Verizon Labs, and Ernst & Young.',
   'John earned a Computer Science and Engineering degree.']],
 'uris': None,
 'included': ['metadatas', 'documents', 'distances'],
 'data': None,
 'metadatas': [[None, None]],
 'distances': [[0.9585179686546326, 1.1966933012008667]]}

### Chroma Database - Cloud 

In [23]:
import chromadb

chromadb_api_key = os.getenv("CHROMADB_API_KEY")
chromadb_tenant = os.getenv("CHROMADB_TENANT")

cloud_client = chromadb.CloudClient(
    api_key=chromadb_api_key,
    tenant=chromadb_tenant,
    database='Sandbox'
)

cloud_client

<chromadb.api.client.Client at 0x21db9702de0>

In [25]:
collection = cloud_client.create_collection(name="genai_sandbox")
collection

Collection(name=genai_sandbox)

In [26]:
collection.add(
    documents=texts,
    embeddings=embeddings,
    ids=[str(i) for i in range(len(texts))]
)