In [42]:
from supabase import create_client, Client
from dotenv import load_dotenv
import os

load_dotenv()
url: str = os.environ.get("SUPABASE_URL")
key: str = os.environ.get("SUPABASE_KEY")
supabase: Client = create_client(url, key)


In [43]:
from supabase import create_client, Client
from dotenv import load_dotenv
import os

load_dotenv()
url: str = os.environ.get("SUPABASE_URL")
key: str = os.environ.get("SUPABASE_KEY")
supabase: Client = create_client(url, key)


In [44]:
table_name = "term_deposit_campaigns"

rows = supabase.table(table_name).select("*").execute().data

In [45]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("all-MiniLM-L6-v2")


In [46]:
import ast
import numpy as np

embeddings = []
row_data = []

for row in rows:
    emb_str = row.get("embedding_vector")  # your column name
    if emb_str:
        # Convert string to list
        emb_list = ast.literal_eval(emb_str)
        embeddings.append(np.array(emb_list, dtype=np.float32))
        row_data.append(row)

# Now stack into a matrix
embeddings = np.vstack(embeddings)
print(embeddings.shape)


(52, 384)


In [47]:
import numpy as np
def cosine_similarity(a, b):
    a_norm = a / np.linalg.norm(a, axis=1, keepdims=True)
    b_norm = b / np.linalg.norm(b)
    return np.dot(a_norm, b_norm)


In [48]:
def query_rag(query, top_k=10):
    query_emb = model.encode(query)
    sims = cosine_similarity(embeddings, query_emb)  # shape: (num_rows,)
    top_idx = np.argsort(sims)[-top_k:][::-1]  # indices of top-k similar rows
    results = [row_data[i] for i in top_idx]
    return results


In [49]:
import openai 
client = openai.OpenAI(
    base_url="https://api.llm7.io/v1",
    api_key="unused"  # Or get it for free at https://token.llm7.io/ for higher rate limits.
)

query = "Find customers with high balance. I want top 10"
top_rows = query_rag(query, top_k=10)

context = "\n".join([str(r) for r in top_rows])

prompt = f"""
You are an AI assistant. You must answer ONLY using the information provided in the "Retrieved Data" section.
Do NOT use outside knowledge. Do NOT guess. Do NOT fill in missing details.

### Retrieved Data:
{context}

### User Question:
{query}
"""

response = client.chat.completions.create(
    model='default',
    messages=[{"role": "user", "content": prompt}]
)
print(response.choices[0].message.content)


Based on the retrieved data, the customers with the highest balances (top 10) are:

1. **ID: 342** – Balance: **693**
2. **ID: 557** – Balance: **471**
3. **ID: 279** – Balance: **570**
4. **ID: 179** – Balance: **214**
5. **ID: 455** – Balance: **270**
6. **ID: 796** – Balance: **144**
7. **ID: 998** – Balance: **61**
8. **ID: 832** – Balance: **19**
9. **ID: 28** – Balance: **113**
10. **ID: 895** – Balance: **56**

*(Sorted in descending order of balance.)*
