In [1]:
import json
import logging
import uuid
import boto3
from botocore.exceptions import ClientError

In [2]:
logger = logging.getLogger(__name__)

In [3]:
s3_resource = boto3.resource('s3')

In [4]:
def get_s3(region=None):
    global s3_resource 
    if not region or s3_resource.meta.client.meta.region_name == region:
        return s3_resource
    else:
        return boto3.resource('s3', region_name=region)

In [5]:
def create_bucket(name, region=None):
    s3 = get_s3(region)

    try:
        if region:
            bucket = s3.create_bucket(
                Bucket=name,
                CreateBucketConfiguration={
                    'LocationConstraint': region
                }
            )
        else:
            bucket = s3.create_bucket(Bucket=name)

        bucket.wait_until_exists()

        logger.info("Created bucket '%s' in region=%s", bucket.name,
                    s3.meta.client.meta.region_name)
    except ClientError as error:
        logger.exception("Could't create bucket name '%s' in region='%s'",
                        name, region)
        if error.response['Error']['Code'] == 'IllegalLocationConstarintException':
            logger.error("When the session Region is anything other than us-east-1, "
                         "you must specify a LocationConstraint that matches the "
                         "session Region. The current session Region is %s and the "
                         "LocationConstraint Region is %s.",
                         s3.meta.client.meta.region_name, region)
        raise error
    else:
        return bucket

In [6]:
create_bucket('settlers-of-catan-data-2', region='us-west-2')

s3.Bucket(name='settlers-of-catan-data-2')

In [7]:
def bucket_exists(bucket_name):
    s3 = get_s3()
    try:
        s3.meta.client.head_bucket(Bucket=bucket_name)
        logger.info("Bucket %s exists.", bucket_name)
        exists = True
    except ClientError:
        logger.warning("Bucket %s doesn't exist or you don't have access to it.",
                       bucket_name)
        exists = False
    return exists

In [8]:
bucket_exists('settlers-of-catan-data-2')

True

In [9]:
import os

In [10]:
# Not using this in upload cell. This is just for context reference.
def upload_file(file_name, bucket, object_name=None):
    # If S3 object_name was not specified, use file_name
    if object_name is None:
        object_name=os.path.basename(file_name)
    
    # Upload the file
    s3_client = boto3.client('s3')
    try:
        response = s3_client.upload_file(file_name, bucket, object_name)
    except ClientError as e:
        logging.error(e)
        return False
    return True

In [11]:
s3 = boto3.client('s3')

In [12]:
# Notice we are opening file in binary mode. must be used with 'fileobj'
with open('employee.csv', 'rb') as f:
    s3.upload_fileobj(f, 'settlers-of-catan-data-2', 'employee.csv')

In [13]:
# We are creating an external table that we will generate a DDL from for use with the query.
query = '''CREATE EXTERNAL TABLE settlers_of_catan_data_3(col0 bigint, 
    col1 string, col2 string, col3 string, col4 string, col5 string, col6 string, 
    col7 string, col8 string, col9 string, col10 string, col11 string, col12 string) 
    ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS INPUTFORMAT
    'org.apache.hadoop.mapred.TextInputFormat' 
    OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
    LOCATION 's3://settlers-of-catan-data-2/';'''


In [14]:
# Finally, we are getting to use Athena to query the data and return results to a separate S3 bucket!!!
import boto3
ath = boto3.client('athena')

query = 'SELECT * FROM "catan_data"."settlers_of_catan_data_3" limit 10;'

ath.start_query_execution(
    QueryString= query,

    QueryExecutionContext={
            'Database': 'catan_data',
            'Catalog': '982765153403'
        },

    ResultConfiguration={'OutputLocation': 's3://settlers-of-catan-qury-results-2'},
    WorkGroup='primary'
  
)

{'QueryExecutionId': 'b4aa2f27-147f-48fa-a3f8-03f19abaac0e',
 'ResponseMetadata': {'RequestId': '333a2bd0-b16e-4c34-aab5-4c23c2ce7b71',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/x-amz-json-1.1',
   'date': 'Thu, 04 Nov 2021 19:51:21 GMT',
   'x-amzn-requestid': '333a2bd0-b16e-4c34-aab5-4c23c2ce7b71',
   'content-length': '59',
   'connection': 'keep-alive'},
  'RetryAttempts': 0}}