In [None]:
import boto3, botocore
from botocore.exceptions import ClientError
import os, time, json, io, zipfile, requests
from datetime import date
from dotenv import load_dotenv


from misc import load_from_yaml, save_to_yaml
import iam, s3, eventbridge, lambdafn as lfn

from ec2 import ALL_IN_ONE_INBOUND_RULES, ALL_IN_ONE_OUTBOUND_RULES

load_dotenv(".env")
# boto3.setup_default_session(profile_name="AMominNJ")

False

In [17]:
ACCOUNT_ID        = os.environ['AWS_ACCOUNT_ID_ROOT']
REGION            = os.environ['AWS_DEFAULT_REGION']
VPC_ID            = os.environ['AWS_DEFAULT_VPC']
SECURITY_GROUP_ID = os.environ['AWS_DEFAULT_SG_ID']
SUBNET_IDS        = SUBNET_IDS = os.environ["AWS_DEFAULT_SUBNET_IDS"].split(":")
SUBNET_ID         = SUBNET_IDS[0]
print(SUBNET_IDS)

['subnet-0a972b05a5b162feb', 'subnet-0ca765b361e4cb186', 'subnet-0de97821ddb8236f7', 'subnet-0a160fbe0fcafe373', 'subnet-0980ad10eb313405b']


In [18]:
sts_client           = boto3.client('sts')
rds_client           = boto3.client('rds')
iam_client           = boto3.client('iam')
s3_client            = boto3.client('s3')
glue_client          = boto3.client('glue')
lakeformation_client = boto3.client('lakeformation')
stepfunctions_client = boto3.client('stepfunctions')
lfn_client           = boto3.client('lambda')
events_client        = boto3.client('events')

In [19]:
ec2_client   = boto3.client('ec2', region_name=REGION)
ec2_resource = boto3.resource('ec2', region_name=REGION)
msk_client   = boto3.client('kafka')

- [Boto3 Docs: AWS Athena](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/athena.html)

### [Automate Daily Data Analysis with Amazon Athena, AWS Lambda, & S3](https://www.youtube.com/watch?v=72Qalunaru4)

<!-- <div><img src="./images/automate_daily_data_analysis.png" width="400px" length="400px"></div> -->
![](./images/automate_daily_data_analysis1.png)
![](./images/automate_daily_data_analysis2.png)

#### S3

In [None]:
S3_BUCKET_NAME = ""
raw, athena, processed = ['raw', 'athena', 'processed']


In [None]:
s3.create_s3_bucket(S3_BUCKET_NAME, folders=[raw, athena, processed])

In [None]:
s3. upload_file_to_s3(S3_BUCKET_NAME, "./kombucha_casks.csv", f"{raw}/rkombucha_casks.csv")

#### Athena

In [None]:
def create_athena_database(database_name, output_location):
    """
    Creates an AWS Athena database.

    Parameters:
    database_name (str): The name of the database to create.
    output_location (str): S3 bucket location for Athena query results, e.g., "s3://your-output-bucket/"

    Returns:
    dict: The query execution response from Athena.
    """
    athena_client = boto3.client('athena')
    
    # SQL query to create the database
    query = f"CREATE DATABASE IF NOT EXISTS {database_name};"

    try:
        # Start query execution
        response = athena_client.start_query_execution(
            QueryString=query,
            ResultConfiguration={
                'OutputLocation': output_location
            }
        )
        
        # Get the query execution ID
        query_execution_id = response['QueryExecutionId']
        print(f"Query Execution ID: {query_execution_id}")
        
        # Optionally, wait for the query to complete
        while True:
            status_response = athena_client.get_query_execution(QueryExecutionId=query_execution_id)
            status = status_response['QueryExecution']['Status']['State']
            
            if status in ['SUCCEEDED', 'FAILED', 'CANCELLED']: break
            
            print(f"Query is still running... (Status: {status})")
            time.sleep(2)
        
        # Check final status
        if status == 'SUCCEEDED': print(f"Database '{database_name}' created successfully.")
        else: print(f"Failed to create database. Status: {status}")
        
        return response

    except Exception as e:
        print(f"Error creating Athena database: {str(e)}")
        return None

In [None]:
athena_database_name = "example_database"
output_location = f"s3://{S3_BUCKET_NAME}/{athena}/"
create_athena_database(athena_database_name, output_location)


#### AMI

In [None]:
LFN_ROLE_NAME="athena-lambda-role"

In [None]:
assume_role_policy_doc = {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": [
                "sts:AssumeRole"
            ],
            "Principal": {
                "Service": [
                    "lambda.amazonaws.com"
                ]
            }
        }
    ]
}

In [None]:
policy_arns = [
    "arn:aws:iam::aws:policy/AmazonS3FullAccess",
    "arn:aws:iam::aws:policy/AmazonAthenaFullAccess",
    "arn:aws:iam::aws:policy/service-role/AmazonAPIGatewayPushToCloudWatchLogs",
    # "arn:aws:iam::aws:policy/AdministratorAccess",
    # "arn:aws:iam::aws:policy/PowerUserAccess"
]

In [None]:
LFN_ROLE_ARN = iam_client.create_role(
    RoleName=LFN_ROLE_NAME,
    AssumeRolePolicyDocument=json.dumps(assume_role_policy_doc),
    Description="Glue Service Role"
)['Role']['Arn']

In [None]:
# Attach AWS managed policy with the role
[iam_client.attach_role_policy(RoleName=LFN_ROLE_NAME, PolicyArn=parn) for parn in policy_arns]

#### Lambda

In [30]:
lfn.create_lambda_package(lfn_scripts=['./lambdafunction.py'], package_dir=".")

Package created successfully at package.zip


In [None]:
LFN_NAME="athena-lfn"

In [None]:
LFN_ARN = lfn.create_lambda_function(LFN_NAME, LFN_ROLE_ARN, ".", "lambda_handler")
print(LFN_ARN)

#### EventBridge

In [None]:
event_rule_name = "lfn-athena"
schedule_expression = "rate(5 minutes)"  # Example: trigger every 5 minutes
# schedule_expression = "cron(0 20 * * ? *)" # Everyday at 8PM.
description = "A rule to trigger an event every 5 minutes."

In [None]:
event_rule_arn = eventbridge.create_eventbridge_schedule_rule(
    event_rule_name,
    schedule_expression,
    description
)['RuleArn']

target_input = {'Arn': LFN_ARN,'Id': f"{event_rule_name}-target"}

# Add the target to the rule
events_client.put_targets(Rule=event_rule_name,EventBusName='default',Targets=[target_input])


In [None]:
# Grant EventBridge permission to invoke the Lambda function
lfn_client.add_permission(
    FunctionName=LFN_NAME,
    StatementId=f"{event_rule_name}-invoke-permission",
    Action="lambda:InvokeFunction",
    Principal="events.amazonaws.com",
    SourceArn=event_rule_arn
)