In [1]:
%load_ext dotenv
%dotenv ../../05_src/.secrets

In [2]:
import os
from openai import OpenAI
import pandas as pd
import numpy as np

In [3]:
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
if not OPENAI_API_KEY:
    raise ValueError("OPENAI_API_KEY not found in environment variables")
else:
    client = OpenAI(api_key = OPENAI_API_KEY)

In [4]:
def get_embedding(text, model="text-embedding-ada-002"):
    text = text.replace("\n", " ")
    return client.embeddings.create(input=[text], model=model).data[0].embedding

In [5]:
documents = [
    # Freedom
    "Freedom consists not in doing what we like, but in having the right to do what we ought.",
    "Those who deny freedom to others deserve it not for themselves.",
    "Liberty, when it begins to take root, is a plant of rapid growth.",
    "Freedom lies in being bold.",
    "Is freedom anything else than the right to live as we wish?",
    "I am no bird and no net ensnares me: I am a free human being with an independent will.",
    "The secret to happiness is freedom... And the secret to freedom is courage."
    "Freedom is the oxygen of the soul.", 
    "Life without liberty is like a body without spirit."
    # Friendship
    "There is nothing on this earth more to be prized than true friendship.",
    "There are no strangers here; Only friends you haven’t yet met.",
    "Friendship is the only cement that will ever hold the world together.",
    "A true friend is someone who is there for you when he'd rather be anywhere else.",
    "Friendship is the golden thread that ties the heart of all the world.", 
    "Your friend is the man who knows all about you and still likes you.",
    "A single rose can be my garden... a single friend, my world."
]

In [None]:
embeddings = [get_embedding(doc) for doc in documents]


In [7]:
ids = [f"id{i}" for i in range(len(documents))]

Reference: [chromadb](https://cookbook.chromadb.dev/running/running-chroma/#chroma-cli)

In [11]:
import chromadb
from chromadb.config import Settings

chroma_client = chromadb.HttpClient(host="http://127.0.0.1:8000")

In [12]:
collection = chroma_client.create_collection(name = "good_vibe_documents")
collection.add(embeddings = embeddings, 
               documents = documents, 
               ids = ids)

In [13]:
def query_chromadb(query, top_n = 2):
    query_embedding = get_embedding(query)
    results = collection.query(query_embeddings = [query_embedding], n_results = top_n)
    return [(id, score, text) for id, score, text in zip(results['ids'][0], results['distances'][0], results['documents'][0])]

In [15]:
query = "What is a happiness?"

query_chromadb(query, top_n=3)

[('id6',
  0.31745452,
  'The secret to happiness is freedom... And the secret to freedom is courage.Freedom is the oxygen of the soul.'),
 ('id4',
  0.39661333,
  'Is freedom anything else than the right to live as we wish?'),
 ('id11',
  0.4249202,
  'Friendship is the golden thread that ties the heart of all the world.')]