# Analyzing Data with Amazon Athena using Python

<img align="left" width="130" src="https://raw.githubusercontent.com/PacktPublishing/Amazon-SageMaker-Cookbook/master/Extra/cover-small-padded.png"/>

This notebook contains the code to help readers work through one of the recipes of the book [Machine Learning with Amazon SageMaker Cookbook: 80 proven recipes for data scientists and developers to perform ML experiments and deployments](https://www.amazon.com/Machine-Learning-Amazon-SageMaker-Cookbook/dp/1800567030)

### How to do it...

In [None]:
import boto3

In [None]:
athena = boto3.client('athena', region_name='us-east-1')

In [None]:
athena_results_bucket = "cookbook-athena-results"

!aws s3 mb s3://{athena_results_bucket}

In [None]:
query = "SELECT label, value FROM cookbook_athena_db.athena_table;"
database = "cookbook_athena_db"
results_bucket = "s3://" + athena_results_bucket

In [None]:
def execute_athena_query(query, database, results_bucket):
    response = athena.start_query_execution(
        QueryString = query,
        QueryExecutionContext = {
            'Database' : database
        },
        ResultConfiguration = {
            'OutputLocation': results_bucket
        }
    )
    
    return response['QueryExecutionId']

In [None]:
def get_output_path(execution_id):
    query_details = athena.get_query_execution(
        QueryExecutionId = execution_id
    )
    
    execution = query_details['QueryExecution']
    configuration = execution['ResultConfiguration']
    return configuration['OutputLocation']

In [None]:
execution_id = execute_athena_query(query, database, results_bucket)
output_path = get_output_path(execution_id)
output_path

In [None]:
!mkdir -p tmp

In [None]:
!aws s3 cp {output_path} tmp/output.csv

In [None]:
import pandas as pd

pd.read_csv("tmp/output.csv")

In [None]:
query = """
USING EXTERNAL FUNCTION detect_anomaly(value INT)
    RETURNS DOUBLE
    SAGEMAKER 'sagemaker-cookbook-rcf'
SELECT label, value, detect_anomaly(value) AS anomaly_score
    FROM cookbook_athena_db.athena_table
"""

In [None]:
execution_id = execute_athena_query(query, database, results_bucket)
output_path = get_output_path(execution_id)
output_path

In [None]:
!aws s3 cp {output_path} tmp/output.csv

In [None]:
df = pd.read_csv("tmp/output.csv")
df

In [None]:
len(df[df.anomaly_score > 2])