# Adding a Database

Previous example kept embeddings in our computer's memory. If we want to have something useful, we will need to use a database that persists our data between sessions.

In [None]:
! pip install chromadb

# Chroma is a friend https://www.trychroma.com/

In [62]:
import requests
import chromadb
import numpy as np
import uuid
import time

In [63]:
texts = [
    "The cat sat on the mat",
    "A feline was resting on a rug",
    "Dogs are great pets",
    "I love having a canine companion",
    "Paris is the capital of France",
    "The Eiffel Tower is in Paris"
]

In [64]:
client = chromadb.PersistentClient(path="./chroma_db")

In [65]:
# Create a collection (or get existing one)

unique_collection_name = f"document_sentences_{int(time.time())}"

collection = client.get_or_create_collection(
    name=unique_collection_name,
    metadata={"hnsw:space": "cosine"}  # Using cosine similarity
)

In [66]:
# Using Nomic model served locally via Ollama for embedding
# Ollama is a friend --> https://ollama.com/
def get_embeddings_from_ollama(text, model="nomic-embed-text"):
    url = "http://localhost:11434/api/embeddings"
    
    payload = {
        "model": model,
        "prompt": text
    }
    
    response = requests.post(url, json=payload)
    return np.array(response.json()["embedding"], dtype=np.float32)

In [67]:
# Generate embeddings for all texts and add them to the collection in Chroma db
embeddings = []
for text in texts:
    embedding = get_embeddings_from_ollama(text)
    embeddings.append(embedding)

# Generate IDs for each sentence
ids = [str(uuid.uuid4()) for _ in embeddings]

collection.add(
    ids=ids,
    embeddings=embeddings,
    documents=texts
)


In [68]:
# Create an embedding for input 
# We will compare this sentence to the ones in the db
query_text = "A cat is sitting on a carpet"
query_vector = get_embeddings_from_ollama(query_text)

In [None]:
results = collection.query(
    query_embeddings=[query_vector],
    n_results=2
)

print("Query results for:", query_text)
for i, (doc, distance) in enumerate(zip(results["documents"][0], results["distances"][0])):
    print(f"Result {i+1}: {doc}")