# Vector Embeddings
1. ChromaDB
2. Pinecone
3. Weaviate
4. FAISS (Facebook AI Semantic Search)

### Embedding Generation - Euri

In [7]:

import requests
import numpy as np

def generate_embeddings(text):
    url = "https://api.euron.one/api/v1/euri/embeddings"
    headers = {
        "Content-Type": "application/json",
        "Authorization": key
    }
    payload = {
        "input": text,
        "model": "text-embedding-3-small"
    }

    response = requests.post(url, headers=headers, json=payload)
    data = response.json()
    
    embedding = np.array(data['data'][0]['embedding'])
    
    return embedding

text = "The food was delicious and the service was excellent."

data1 = generate_embeddings(text)

In [8]:
data1

array([-0.03244253, -0.00839565, -0.06356495, ..., -0.00225143,
        0.05896459, -0.03152246], shape=(1536,))

In [9]:
data2 = generate_embeddings("my name is Abhishek Maity")

In [10]:
data1, data2

(array([-0.03244253, -0.00839565, -0.06356495, ..., -0.00225143,
         0.05896459, -0.03152246], shape=(1536,)),
 array([ 3.0995198e-02, -3.2230254e-02,  2.2112282e-02, ...,
         7.5242770e-05,  4.4208606e-05,  1.2130879e-02], shape=(1536,)))

In [11]:
len(data2)

1536

### Cosine Similarity

In [12]:
cosine_similarity = np.dot(data1, data2)/ (np.linalg.norm(data1)*np.linalg.norm(data2))
cosine_similarity

np.float64(0.0696385883855918)

In [13]:
cosine_distance = 1 - cosine_similarity
cosine_distance

np.float64(0.9303614116144082)

In [14]:
data3 = generate_embeddings("people call by my name Abhishek Maity")
data3

array([ 0.04484749, -0.0365818 ,  0.02437423, ..., -0.00608674,
        0.01644954,  0.01113003], shape=(1536,))

In [16]:
cosine_similarity = np.dot(data3, data2)/ (np.linalg.norm(data3)*np.linalg.norm(data2))

cosine_distance = 1 - cosine_similarity

print(cosine_similarity, cosine_distance)

0.8418239127856914 0.15817608721430865


ChromaDB always tries to find cosine similarity between 2 vectors, the lesser the angle >> the more similar 2 vectors are.

- Euclidean dist is calculated by sqrt(square(x2-x1) + square(y2-y1).... Nth)

### Install Chroma library for ChromaDB

In [18]:
!pip install chromadb

Collecting chromadb
  Downloading chromadb-1.4.0-cp39-abi3-win_amd64.whl.metadata (7.3 kB)
Collecting build>=1.0.3 (from chromadb)
  Downloading build-1.3.0-py3-none-any.whl.metadata (5.6 kB)
Collecting pydantic>=1.9 (from chromadb)
  Downloading pydantic-2.12.5-py3-none-any.whl.metadata (90 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Downloading pybase64-1.4.3-cp312-cp312-win_amd64.whl.metadata (9.1 kB)
Collecting uvicorn>=0.18.3 (from uvicorn[standard]>=0.18.3->chromadb)
  Downloading uvicorn-0.40.0-py3-none-any.whl.metadata (6.7 kB)
Collecting posthog<6.0.0,>=2.4.0 (from chromadb)
  Downloading posthog-5.4.0-py3-none-any.whl.metadata (5.7 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.23.2-cp312-cp312-win_amd64.whl.metadata (5.3 kB)
Collecting opentelemetry-api>=1.2.0 (from chromadb)
  Downloading opentelemetry_api-1.39.1-py3-none-any.whl.metadata (1.5 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Downloading open

## Run this command to start the chroma db server

In [None]:
chroma run --path ./chromadb_data