In [1]:
"""
Topic: AWS Athena Basics (Querying S3 Data via SQL)
==================================================
Demonstrates how to run SQL queries on S3 data using Athena
via Python (boto3) and read results into pandas.
"""

import boto3
import time
import pandas as pd
import logging

# -------------------------------------------------------------------
# Step 1 ‚Äì Initialize Athena and S3 Clients
# -------------------------------------------------------------------
athena = boto3.client("athena")
s3 = boto3.client("s3")

DATABASE = "supplychain_catalog"
OUTPUT_S3 = "s3://aws-athena-query-results-demo/"
QUERY = """
SELECT region, COUNT(*) AS delivered_shipments
FROM trusted_shipments
WHERE status = 'Delivered'
GROUP BY region
ORDER BY delivered_shipments DESC;
"""

logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(message)s")
logger = logging.getLogger()

# -------------------------------------------------------------------
# Step 2 ‚Äì Execute Athena Query
# -------------------------------------------------------------------
def run_athena_query(query, database, output_s3):
    logger.info("üöÄ Starting Athena query...")
    response = athena.start_query_execution(
        QueryString=query,
        QueryExecutionContext={"Database": database},
        ResultConfiguration={"OutputLocation": output_s3}
    )
    execution_id = response["QueryExecutionId"]
    logger.info(f"üÜî Query Execution ID: {execution_id}")
    return execution_id

# -------------------------------------------------------------------
# Step 3 ‚Äì Wait for Query Completion
# -------------------------------------------------------------------
def wait_for_query(execution_id):
    logger.info("‚è≥ Waiting for query to complete...")
    while True:
        status = athena.get_query_execution(QueryExecutionId=execution_id)
        state = status["QueryExecution"]["Status"]["State"]
        if state in ["SUCCEEDED", "FAILED", "CANCELLED"]:
            break
        time.sleep(2)
    logger.info(f"‚úÖ Query State: {state}")
    if state != "SUCCEEDED":
        raise Exception(f"Query failed: {state}")

# -------------------------------------------------------------------
# Step 4 ‚Äì Fetch Results from S3
# -------------------------------------------------------------------
def fetch_results(execution_id):
    logger.info("üì• Fetching query results...")
    result_response = athena.get_query_results(QueryExecutionId=execution_id)
    rows = result_response["ResultSet"]["Rows"]
    headers = [col["VarCharValue"] for col in rows[0]["Data"]]
    data = []
    for row in rows[1:]:
        values = [col.get("VarCharValue", None) for col in row["Data"]]
        data.append(values)
    df = pd.DataFrame(data, columns=headers)
    logger.info("‚úÖ Query results loaded into DataFrame.")
    return df

# -------------------------------------------------------------------
# Step 5 ‚Äì Main Execution Flow
# -------------------------------------------------------------------
def main():
    logger.info("üèÅ Running Athena Demo Script")
    execution_id = run_athena_query(QUERY, DATABASE, OUTPUT_S3)
    wait_for_query(execution_id)
    df = fetch_results(execution_id)
    print("\nüéØ Query Output:")
    print(df)

if __name__ == "__main__":
    main()


NoRegionError: You must specify a region.