# AstraDB Data API Query Verification

This notebook demonstrates how to execute business queries using the AstraDB Document API (via `astrapy`).

Prerequisites:
- AstraDB Token
- Database ID
- Region


In [None]:
!pip install astrapy pandas

In [None]:
import os
import pandas as pd
from astrapy.client import DataAPIClient

# ---------------------------------------------------------
# CONFIGURATION
# ---------------------------------------------------------
# Try loading from Colab UserData, fallback to Environment Variables
try:
    from google.colab import userdata
    ASTRA_TOKEN = userdata.get('ASTRA_TOKEN')
    ASTRA_DB_ID = userdata.get('ASTRA_DB_ID')
    ASTRA_DB_REGION = userdata.get('ASTRA_DB_REGION')
    ASTRA_KEYSPACE_NAME = userdata.get('ASTRA_KEYSPACE_NAME')
except ImportError:
    ASTRA_TOKEN = os.getenv("ASTRA_TOKEN")
    ASTRA_DB_ID = os.getenv("ASTRA_DB_ID")
    ASTRA_DB_REGION = os.getenv("ASTRA_DB_REGION")
    ASTRA_KEYSPACE_NAME = os.getenv("ASTRA_KEYSPACE_NAME")

# Query Parameters
ORG_ID = "org_123_test"
START_DATE = "2023-01-01"
END_DATE = "2023-01-31"

# ---------------------------------------------------------
# INITIALIZATION
# ---------------------------------------------------------
if not ASTRA_TOKEN or not ASTRA_DB_ID or not ASTRA_DB_REGION or not ASTRA_KEYSPACE_NAME:
    print(
        "WARNING: Missing AstraDB Configuration. "
        "Please ensure ASTRA_TOKEN, ASTRA_DB_ID, ASTRA_DB_REGION, and ASTRA_KEYSPACE_NAME are set."
    )
else:
    # Generic Endpoint Construction
    API_ENDPOINT = f"https://{ASTRA_DB_ID}-{ASTRA_DB_REGION}.apps.astra.datastax.com"

    print("Connecting to AstraDB...")
    client = DataAPIClient(token=ASTRA_TOKEN)

    db = client.get_database(
        API_ENDPOINT,
        keyspace=ASTRA_KEYSPACE_NAME
    )

    print(
        f"Connected to Database: {db.name} "
    )


### 1. Daily Cost & Requests by Org and Service (Range Query)
**Requirement**: Daily costs and requests by organization and service within a date range.
**Collection**: `org_daily_usage_by_service`

In [None]:
if 'db' in locals():
    col_usage = db.get_collection("org_daily_usage_by_service")

    query_filter = {
        "org_id": ORG_ID,
        "date": {"$gte": START_DATE, "$lte": END_DATE}
    }
    projection = {"date": 1, "service": 1, "total_cost_usd": 1, "total_requests": 1}

    print(f"Executing Query 1 on {col_usage.name}...")
    results = col_usage.find(query_filter, projection=projection)

    # Show first 5 results
    for doc in results.limit(5):
        print(doc)


### 2. Top-N Services by Cost
**Requirement**: Top-N services by accumulated cost using a client-side aggregation strategy.
**Collection**: `org_daily_usage_by_service`

In [None]:
if 'db' in locals():
    query_filter = {
        "org_id": ORG_ID,
        "date": {"$gte": START_DATE}
    }
    projection = {"service": 1, "total_cost_usd": 1}

    print(f"Executing Query 2 (Fetch) on {col_usage.name}...")
    cursor = col_usage.find(query_filter, projection=projection)

    # Client-side Aggregation with Pandas
    df = pd.DataFrame(list(cursor))

    if not df.empty:
        top_n = df.groupby('service')['total_cost_usd'].sum().nlargest(10)
        print("\nTop Services by Cost:")
        print(top_n)
    else:
        print("No data found for the given criteria.")


### 3. Tickets Evolution & SLA Breach Rate
**Requirement**: Evolution of critical tickets and SLA breach rate per day.
**Collection**: `tickets_by_org_date`

In [None]:
if 'db' in locals():
    col_tickets = db.get_collection("tickets_by_org_date")

    query_filter = {
        "org_id": ORG_ID,
        "date": {"$gte": START_DATE}
    }

    print(f"Executing Query 3 on {col_tickets.name}...")
    results = col_tickets.find(query_filter)

    for doc in results.limit(5):
        print("Date:", doc.get("date"), "| Critical Tickets:", doc.get("critical_tickets_count"))


### 4. Monthly Revenue
**Requirement**: Monthly revenue, ordered by latest date.
**Collection**: `revenue_by_org_month`

In [None]:
if 'db' in locals():
    col_revenue = db.get_collection("revenue_by_org_month")

    query_filter = {"org_id": ORG_ID}
    sort = {"year": -1, "month": -1}

    print(f"Executing Query 4 on {col_revenue.name}...")
    results = col_revenue.find(query_filter, sort=sort, limit=12)

    for doc in results:
        print(f"Period: {doc.get('year')}-{doc.get('month')} | Revenue: ${doc.get('total_revenue_usd')}")


### 5. GenAI Tokens & Cost
**Requirement**: Daily GenAI usage stats.
**Collection**: `genai_tokens_by_org_date`

In [None]:
if 'db' in locals():
    col_genai = db.get_collection("genai_tokens_by_org_date")

    query_filter = {
        "org_id": ORG_ID,
        "date": {"$gte": START_DATE}
    }

    print(f"Executing Query 5 on {col_genai.name}...")
    results = col_genai.find(query_filter)

    for doc in results.limit(5):
        print("Date:", doc.get("date"), "| Tokens:", doc.get("total_tokens"), "| Cost:", doc.get("genai_cost_usd"))
