<a href="https://colab.research.google.com/github/ShikharV010/gist_daily_runs/blob/main/JustCallAPICall_today.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests, json, time, pandas as pd
from datetime import datetime
from urllib.parse import urlparse, parse_qsl, urlencode, urlunparse

# ────────────────────────────────────────────────────────────
# 1) CONFIG
# ────────────────────────────────────────────────────────────
API_KEY    = "cc7718b616f3be5e663be9f132548cbf083fc5e9"
API_SECRET = "1f26c3c1e9bbf56324f5f9ddb70bab81b42cff38"

DATE_FROM = datetime.now().strftime("%Y-%m-%d")
DATE_TO   = datetime.now().strftime("%Y-%m-%d")

BASE_URL             = "https://api.justcall.io/v2.1/calls"
MAX_CALLS_PER_MIN    = 28         # pacing budget
MAX_RETRIES          = 8
BACKOFF_FACTOR       = 2
REQUEST_TIMEOUT      = 20
PAGE_GUARD_LIMIT     = 10_000     # absolute safety cap on pages

DEFAULT_FLAGS = {
    "fetch_queue_data": "false",
    "fetch_ai_data":    "false",
    "sort":             "id",
    "order":            "desc",
    "per_page":         100
}

# ────────────────────────────────────────────────────────────
# 2) SESSION
# ────────────────────────────────────────────────────────────
session = requests.Session()
session.auth = (API_KEY, API_SECRET)
session.headers.update({"Authorization": f"{API_KEY}:{API_SECRET}"})


# ────────────────────────────────────────────────────────────
# helpers
# ────────────────────────────────────────────────────────────
def _pace(last_ts: list[float]):
    """Simple token bucket: ensure <= MAX_CALLS_PER_MIN per 60s."""
    if not last_ts:
        last_ts.append(time.monotonic())
        return
    now = time.monotonic()
    elapsed = now - last_ts[0]
    # Remove timestamps older than 60s
    if elapsed >= 60:
        last_ts.clear()
        last_ts.append(now)
        return
    if len(last_ts) >= MAX_CALLS_PER_MIN:
        sleep_for = 60 - elapsed
        if sleep_for > 0:
            time.sleep(sleep_for)
        last_ts.clear()
        last_ts.append(time.monotonic())
    else:
        last_ts.append(now)

def _merge_query(url: str, extra: dict) -> str:
    """Merge/attach query params to a URL (without duplicating)."""
    u = urlparse(url)
    qs = dict(parse_qsl(u.query, keep_blank_values=True))
    qs.update(extra)
    return urlunparse((u.scheme, u.netloc, u.path, u.params, urlencode(qs, doseq=True), u.fragment))


# ────────────────────────────────────────────────────────────
# 3) Smart GET with retry/429 handling
# ────────────────────────────────────────────────────────────
def safe_get(url: str, params: dict | None = None) -> dict:
    for attempt in range(MAX_RETRIES):
        r = session.get(url, params=params, timeout=REQUEST_TIMEOUT)
        if r.status_code != 429:
            r.raise_for_status()
            return r.json()
        wait = int(r.headers.get("Retry-After", BACKOFF_FACTOR ** attempt))
        wait = max(wait, 1)
        print(f"429 → wait {wait}s (retry {attempt+1}/{MAX_RETRIES})")
        time.sleep(wait)
    raise RuntimeError(f"gave up after {MAX_RETRIES} retries → {url}")


# ────────────────────────────────────────────────────────────
# 4) Paginated call listing (robust)
# ────────────────────────────────────────────────────────────
def list_calls(date_from: str, date_to: str) -> list[dict]:
    """
    Robust pagination:
      • re-attaches from_datetime/to_datetime + per_page/sort/order on every hop
      • detects URL cycles
      • dedupes by call_id and stops when a page adds 0 new rows
      • respects a hard page guard and basic pacing
    """
    WINDOW_PARAMS = {"from_datetime": date_from, "to_datetime": date_to}

    all_rows: list[dict] = []
    seen_urls: set[str] = set()
    seen_ids: set[str | int] = set()
    last_ts: list[float] = []
    page_no = 0
    PAGE_GUARD_LIMIT = 10_000  # safety cap

    # first URL = base + flags + window
    first_params = DEFAULT_FLAGS | WINDOW_PARAMS
    url = _merge_query(BASE_URL, first_params)

    while url:
        page_no += 1
        if page_no > PAGE_GUARD_LIMIT:
            raise RuntimeError(f"page guard tripped after {PAGE_GUARD_LIMIT} pages; aborting")

        if url in seen_urls:
            print(f"⚠️  next_page_link repeated (cycle) at page {page_no}; stopping.")
            break
        seen_urls.add(url)

        _pace(last_ts)

        data = safe_get(url, params=None)  # url already has query params
        rows = data.get("data", []) or []
        ids = [r.get("id") for r in rows if r.get("id") is not None]

        # dedupe by id and count how many are actually new
        new_rows = [r for r in rows if r.get("id") not in seen_ids]
        for i in ids:
            if i is not None:
                seen_ids.add(i)

        min_id = min(ids) if ids else None
        max_id = max(ids) if ids else None
        print(f"• page {page_no}: got {len(rows)} calls, {len(new_rows)} new "
              f"(min_id={min_id}, max_id={max_id})")

        # Extend with only-new rows (prevents runaway duplicates)
        all_rows.extend(new_rows)

        # Stop conditions
        if len(rows) == 0:
            print("• empty page; done.")
            break
        if len(new_rows) == 0:
            print("• page added 0 new IDs; likely loop/duplicate page; stopping.")
            break

        # Next page URL — ensure flags AND window persist on every hop
        next_url = data.get("next_page_link")
        if not next_url:
            print("• no next_page_link; done.")
            break

        # Re-attach both paging flags and the date window (some APIs drop them)
        next_url = _merge_query(next_url, DEFAULT_FLAGS | WINDOW_PARAMS)

        if next_url == url:
            print("⚠️  next_page_link equals current URL; stopping to avoid loop.")
            break

        url = next_url

    print(f"✅ fetched total {len(all_rows)} unique calls across {page_no} page(s)")
    return all_rows



# ────────────────────────────────────────────────────────────
# 5) Flatten into DataFrame
# ────────────────────────────────────────────────────────────
def flatten(details: list[dict]) -> pd.DataFrame:
    stamp = datetime.now().strftime("%Y-%m-%d")
    rows = []
    for d in details:
        call_info = d.get("call_info", {}) or {}
        rows.append({
            "call_id":             d.get("id"),
            "call_sid":            d.get("call_sid"),
            "contact_number":      d.get("contact_number"),
            "contact_name":        d.get("contact_name"),
            "contact_email":       d.get("contact_email"),
            "justcall_number":     d.get("justcall_number"),
            "justcall_line_name":  d.get("justcall_line_name"),
            "agent_id":            d.get("agent_id"),
            "agent_name":          d.get("agent_name"),
            "agent_email":         d.get("agent_email"),
            "agent_active":        d.get("agent_active"),
            "call_date":           d.get("call_date"),
            "call_time":           d.get("call_time"),
            "call_user_date":      d.get("call_user_date"),
            "call_user_time":      d.get("call_user_time"),
            "cost_incurred":       d.get("cost_incurred"),
            # Nested call_info
            "direction":               call_info.get("direction"),
            "type":                    call_info.get("type"),
            "missed_call_reason":      call_info.get("missed_call_reason"),
            "status":                  call_info.get("status"),
            "disposition":             call_info.get("disposition"),
            "notes":                   call_info.get("notes"),
            "rating":                  call_info.get("rating"),
            "recording":               call_info.get("recording"),
            "recording_child":         call_info.get("recording_child"),
            "voicemail_transcription": call_info.get("voicemail_transcription"),
            "call_traits":             json.dumps(call_info.get("call_traits") or []),
            "date_ingested":           stamp,
        })
    return pd.DataFrame(rows)


# ────────────────────────────────────────────────────────────
# 6) Main workflow
# ────────────────────────────────────────────────────────────
def run_ingestion():
    print(f"\n⏳ Fetching calls {DATE_FROM} → {DATE_TO} …")
    rows = list_calls(DATE_FROM, DATE_TO)
    print(f"✓ {len(rows)} calls fetched")

    df = flatten(rows)
    print(f"\n🏁 finished – {len(df)} rows in final dataframe")
    return df


# ────────────────────────────────────────────────────────────
if __name__ == "__main__":
    df_calls = run_ingestion()
    # df_calls.to_csv("justcall_calls.csv", index=False)


In [None]:
import pandas as pd
import sqlalchemy                       # <- new (needed only if you add dtype=)
from sqlalchemy import create_engine, text
from datetime import datetime

# ───────────── DB config ─────────────
engine = create_engine(
    "postgresql://airbyte_user:airbyte_user_password@"
    "gw-rds-prod.celzx4qnlkfp.us-east-1.rds.amazonaws.com:5432/gw_prod"
)
TABLE_SCHEMA = "gist"
TABLE_NAME   = "gist_justcallcalldetails"
VIEW_NAME    = "vw_justcallcalldetails"

# ───────────── DataFrame from ingestion ─────────────
df = df_calls.copy()                    # <-- the only change
if df.empty:
    print("🛑 No new data to insert."); raise SystemExit

df["date_ingested"] = datetime.utcnow().date()   # keep stamp in UTC

try:
    # 1️⃣  pull existing call_ids (small result set, OK for now)
    with engine.connect() as conn:
        existing = {row[0] for row in conn.execute(
            text(f"SELECT call_id FROM {TABLE_SCHEMA}.{TABLE_NAME}")
        )}
    print(f"📦 existing rows in DB: {len(existing)}")

    # 2️⃣  filter out duplicates
    df_new = df[~df["call_id"].isin(existing)]
    print(f"🆕 rows to insert: {len(df_new)}")

    # 3️⃣  append
    if not df_new.empty:
        df_new.to_sql(
            name=TABLE_NAME,
            con=engine,
            schema=TABLE_SCHEMA,
            if_exists="append",
            index=False,
            method="multi"
            # dtype={"campaign": sqlalchemy.dialects.postgresql.JSONB,
            #        "call_info": sqlalchemy.dialects.postgresql.JSONB}
        )
        print("✅ new rows appended.")
    else:
        print("🛑 nothing new to append.")

except Exception as e:
    # table missing → create from scratch
    print(f"📭 table absent or error querying it → creating afresh.\n{e}")
    df.to_sql(
        name=TABLE_NAME,
        con=engine,
        schema=TABLE_SCHEMA,
        if_exists="replace",
        index=False,
        method="multi"
    )
    print(f"✅ table {TABLE_SCHEMA}.{TABLE_NAME} created.")

# 4️⃣  make / refresh view
with engine.begin() as conn:
    conn.execute(text(f"""
        CREATE OR REPLACE VIEW {TABLE_SCHEMA}.{VIEW_NAME} AS
        SELECT *
        FROM   {TABLE_SCHEMA}.{TABLE_NAME};
    """))
print(f"🪟 view {TABLE_SCHEMA}.{VIEW_NAME} refreshed.")
engine.dispose()
