## **Setup Timer**

In [29]:
import time

def timer(func):
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        print(f"Function '{func.__name__}' executed in {end_time - start_time:.10f} seconds")
        return result
    return wrapper

## **Setup Embedding Model**

In [30]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('all-MiniLM-L6-v2')

## **Create Embeddigns**

In [31]:
documents = [
    "Cats are independent pets that love to nap.",
    "Dogs are loyal companions who enjoy walks.",
    "Birds can fly and often sing beautiful songs.",
    "Fish swim in aquariums and come in many colors.",
    "Elephants are the largest land animals on Earth.",
    "Hamsters are small rodents often kept as pets.",
    "Rabbits have long ears and love to hop.",
    "Turtles have hard shells and can live both in water and on land.",
    "Lizards are reptiles that can regrow their tails.",
    "Frogs are amphibians known for their jumping abilities."
]
embeddings = model.encode(documents)
print(embeddings.shape)
print(embeddings[0][:5])  

(10, 384)
[ 0.05315838  0.01514426 -0.01166811  0.10676312 -0.02198896]


## **Working with Vector DB (ChromaDB)**

### **Create Client**

In [32]:
import chromadb

chroma_client = chromadb.Client()
persistent_chroma_client = chromadb.PersistentClient(path="./chroma_db")

### **Create Collections**

In [33]:
collection = chroma_client.create_collection(name="animal_collection",get_or_create=True)
persistent_collection = persistent_chroma_client.create_collection(name="persistent_animal_collection",get_or_create=True)

### **Add Embeddings to the Collection**

In [34]:
ids = [f"doc_{i}" for i in range(len(documents))]
collection.add(ids=ids, documents=documents, embeddings=embeddings.tolist())
persistent_collection.add(ids=ids, documents=documents, embeddings=embeddings.tolist())

### **Create Query and Proccess**

In [38]:
query = "Aquatic pets"
query_embedding = model.encode([query])[0]

### **Outputs**

In [39]:
@timer
def perform_query(collection, query_embedding):
    return collection.query(query_embeddings=[query_embedding], n_results=5)

result1 = perform_query(collection, query_embedding)
result2 = persistent_collection.query(query_embeddings=[query_embedding], n_results=5)

docs1 = result1.get('documents')
distances1 = result1.get('distances')

print("\nResults from in-memory collection:")
if docs1 and distances1:
    for doc, distance in zip(docs1[0], distances1[0]):
        print(f"Document: {doc}, Score: {distance:.3f}")

docs2 = result2.get('documents')
distances2 = result2.get('distances')

print("\nResults from persistent collection:")
if docs2 and distances2:
    for doc, distance in zip(docs2[0], distances2[0]):
        print(f"Document: {doc}, Score: {distance:.3f}")

Function 'perform_query' executed in 0.0015027523 seconds

Results from in-memory collection:
Document: Fish swim in aquariums and come in many colors., Score: 0.883
Document: Turtles have hard shells and can live both in water and on land., Score: 1.117
Document: Frogs are amphibians known for their jumping abilities., Score: 1.300
Document: Dogs are loyal companions who enjoy walks., Score: 1.357
Document: Hamsters are small rodents often kept as pets., Score: 1.376

Results from persistent collection:
Document: Fish swim in aquariums and come in many colors., Score: 0.883
Document: Turtles have hard shells and can live both in water and on land., Score: 1.117
Document: Frogs are amphibians known for their jumping abilities., Score: 1.300
Document: Dogs are loyal companions who enjoy walks., Score: 1.357
Document: Hamsters are small rodents often kept as pets., Score: 1.376
