# Analyzing Data with Amazon Athena using R

<img align="left" width="130" src="https://raw.githubusercontent.com/PacktPublishing/Amazon-SageMaker-Cookbook/master/Extra/cover-small-padded.png"/>

This notebook contains the code to help readers work through one of the recipes of the book [Machine Learning with Amazon SageMaker Cookbook: 80 proven recipes for data scientists and developers to perform ML experiments and deployments](https://www.amazon.com/Machine-Learning-Amazon-SageMaker-Cookbook/dp/1800567030)

### How to do it...

In [None]:
library('reticulate')
boto3 <- import('boto3')
athena <- boto3$client('athena', region_name='us-east-1')

In [None]:
query <- "SELECT label, value FROM cookbook_athena_db.athena_table;"
database <- "cookbook_athena_db"
results_bucket <- "s3://cookbook-athena-results"

In [None]:
execute_athena_query <- function(query, database, results_bucket) {
    response <- athena$start_query_execution(
        QueryString = query,
        QueryExecutionContext = dict(Database=database),
        ResultConfiguration = dict(OutputLocation=results_bucket)
    )    
    
    return(response$QueryExecutionId)
}

In [None]:
get_output_path <- function(execution_id) {
    query_details <- athena$get_query_execution(
        QueryExecutionId = execution_id
    )
    
    execution <- query_details$QueryExecution
    configuration <- execution$ResultConfiguration
    return(configuration$OutputLocation)
}

In [None]:
cmd <- function(bash_command) {
    print(bash_command)
    output <- system(bash_command, intern=TRUE)
    last_line = ""
    
    for (line in output) { 
        cat(line)
        cat("\n")
        last_line = line 
    }
    
    return(last_line) 
}

In [None]:
cmd("mkdir -p tmp")

In [None]:
execution_id <- execute_athena_query(query, database, results_bucket)
output_path <- get_output_path(execution_id)
output_path

In [None]:
command <- paste("aws s3 cp", output_path, "tmp/output.csv")

cmd(command)

In [None]:
df <- read.csv("tmp/output.csv", header=TRUE)
df

In [None]:
query <- "
USING EXTERNAL FUNCTION detect_anomaly(value INT)
    RETURNS DOUBLE
    SAGEMAKER 'sagemaker-cookbook-rcf'
SELECT label, value, detect_anomaly(value) AS anomaly_score
    FROM cookbook_athena_db.athena_table"

In [None]:
execution_id <- execute_athena_query(query, database, results_bucket)
output_path <- get_output_path(execution_id)
output_path

In [None]:
command <- paste("aws s3 cp", output_path, "tmp/output.csv")
cmd(command)

In [None]:
df <- read.csv("tmp/output.csv", header=TRUE)
df

In [None]:
subset(df, anomaly_score > 2)