In [None]:
pip install -U openai supabase python-dotenv

In [8]:
import os
from datetime import date
from typing import List, Dict, Any
from openai import OpenAI
from supabase import create_client

# ==== CONFIG ====
OPENAI_CHAT_MODEL = os.getenv("OPENAI_CHAT_MODEL", "gpt-4o-mini")
OPENAI_EMBEDDING_MODEL = os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-3-small")  # 1536-dim
SUPABASE_URL = os.environ["SUPABASE_URL"]                    # set in your environment
SUPABASE_KEY = os.environ["SUPABASE_KEY"]       # or ANON if your RLS allows it
MATCH_RPC = os.getenv("RAG_MATCH_RPC", "match_documents")    # your RPC name
DEFAULT_FILTER = {"client_id": "ccs"}                        # mirrors your N8N filter
TOP_K = int(os.getenv("RAG_TOP_K", "6"))
FALLBACK = "I’m sorry, I couldn’t find relevant information based on your documents."

SYSTEM_PROMPT = f"""Today is: {date.today().isoformat()}
You are a Retrieval-Augmented Generation (RAG) assistant for CCS. Answer using ONLY the retrieved documents.
If the answer is not in the documents, reply exactly with: "{FALLBACK}".
Be concise, neutral, and factual.
"""

# ==== CLIENTS ====
oai = OpenAI()  # uses OPENAI_API_KEY
sb = create_client(SUPABASE_URL, SUPABASE_KEY)

# ==== RAG CORE ====
def _embed(text: str) -> List[float]:
    return oai.embeddings.create(model=OPENAI_EMBEDDING_MODEL, input=text).data[0].embedding

def retrieve(query: str, k: int = TOP_K, flt: Dict[str, Any] = DEFAULT_FILTER) -> List[Dict[str, Any]]:
    emb = _embed(query)
    res = sb.rpc(MATCH_RPC, {"query_embedding": emb, "match_count": k, "filter": flt}).execute()
    return res.data or []

def _format_context(rows: List[Dict[str, Any]]) -> str:
    parts = []
    for i, r in enumerate(rows, 1):
        meta = r.get("metadata") or {}
        src = meta.get("source") or meta.get("path") or meta.get("url") or f"doc_{i}"
        txt = r.get("content") or r.get("page_content") or r.get("text") or ""
        parts.append(f"[{i}] {src}\n{txt}")
    return "\n\n".join(parts)

def answer(question: str, k: int = TOP_K, flt: Dict[str, Any] = DEFAULT_FILTER) -> Dict[str, str]:
    rows = retrieve(question, k=k, flt=flt)
    if not rows:
        return {"answer": FALLBACK, "sources": ""}

    context = _format_context(rows)
    msgs = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": question},
        {"role": "system",
         "content": f'Use ONLY the context below. If it’s insufficient, reply exactly with "{FALLBACK}".\n\n{context}'}
    ]
    resp = oai.chat.completions.create(model=OPENAI_CHAT_MODEL, temperature=0, messages=msgs)
    out = resp.choices[0].message.content

    src_list = []
    for i, r in enumerate(rows, 1):
        meta = r.get("metadata") or {}
        src = meta.get("source") or meta.get("path") or meta.get("url") or f"doc_{i}"
        src_list.append(f"[{i}] {src}")

    return {"answer": out, "sources": "\n".join(src_list)}

In [10]:
res = answer("What are the ccs upcoming events?")
print(res["answer"])
print("\nSOURCES\n-------\n" + (res["sources"] or "—"))

The upcoming CCS events are as follows:

1. **CHRS Annual Meeting 2025**
   - Date: September 12-13, 2025
   - Location: Halifax, NS

2. **Pediatric Cardiology Trainee Review Program – Session 1**
   - Date: October 3, 2025
   - Location: Virtual via Zoom

3. **Pediatric Cardiology Trainee Review Program – Session 2**
   - Date: October 22, 2025
   - Location: In-person at the Canadian Cardiovascular Congress, Quebec City

4. **Canadian Cardiovascular Congress 2025**
   - Date: October 23-26, 2025
   - Location: Quebec City, QC

5. **Adult Cardiology Trainee Review Program**
   - Date: November 28-30, 2025
   - Location: TBD

SOURCES
-------
[1] https://ccs.ca/news/2023-call-for-ccs-guideline-and-clinical-practice-update-topics-now-live/
[2] https://ccs.ca/news/2023-call-for-ccs-guideline-and-clinical-practice-update-topics-now-live/
[3] https://ccs.ca/latest-events/
[4] https://ccs.ca/latest-events/
[5] https://ccs.ca/latest-events/
[6] https://ccs.ca/latest-events/
