code block 3.1

```json
{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Action": ["s3:ListBucket"],
      "Resource": ["arn:aws:s3:::ec2-testing-for-s3-permissions"]
    },
    {
      "Effect": "Allow",
      "Action": [
        "s3:PutObject",
        "s3:GetObject",
        "s3:DeleteObject"
      ],
      "Resource": ["arn:aws:s3:::ec2-testing-for-s3-permissions/*"]
    }
  ]
}
```

In [3]:
# Code block 3.2: creating S3 bucket

import logging
import boto3
from botocore.exceptions import ClientError

def create_bucket(bucket_name, region, ACL_type):
    '''pick an ACL from 'private'|'public-read'|'public-read-write'|'authenticated-read' '''

    # Create bucket
    try:
        
        s3_client = boto3.client('s3', region_name=region)
        location = {'LocationConstraint': region}
        s3_client.create_bucket(ACL = ACL_type, Bucket=bucket_name,
                                CreateBucketConfiguration=location)
    except ClientError as e:
        print(str(e))
        return False
    return True
create_bucket("test-jmp-book", 'us-east-2', 'private')


True

In [4]:
# Code block 3.3: uploading an object to S3
import boto3
def S3_upload(S3_bucket_name,local_filename, S3_keyname):
    S3 = boto3.client('s3')
    
    for attempt in range(1,6):
        try:
            # files automatically and upload parts in parallel.
            S3.upload_file(local_filename,S3_bucket_name, S3_keyname)
            
        except Exception as e:
            print(str(e))
        else:
            print("finished uploading to s3 in attempt ", attempt)
            break
S3_upload("test-jmp-book", "test.pdf", "upload_test.pdf")


finished uploading to s3 in attempt  1


In [5]:
# Code block 3.4: get last modified file from a bucket

from datetime import datetime

def get_last_mod_file(s3bucketname, file_type = None, substring_to_match = ''):

    s3 = boto3.resource('s3')

    my_bucket = s3.Bucket(s3bucketname)

    last_modified_date = datetime(1939, 9, 1).replace(tzinfo=None)
    if any(my_bucket.objects.all()) is False:
        last_modified_file = 'None'
    for file in my_bucket.objects.all():
    # print(file.key)
    
        file_date = file.last_modified.replace(tzinfo=None)
        file_name = file.key
        print(file_date, file.key)
        if file_type is None:
            if last_modified_date < file_date and substring_to_match in file_name:
                last_modified_date = file_date
                last_modified_file = file_name
        else:
            
            if last_modified_date < file_date and substring_to_match in file_name and file_type == file_name.split('.')[-1]:
                last_modified_date = file_date
                last_modified_file = file_name
    return(last_modified_file)
get_last_mod_file("test-jmp-book")

2020-04-29 11:54:28 upload_test.pdf


'upload_test.pdf'

In [6]:
# code block 3.5: downloading an object from S3

def download_file_from_s3(s3bucketname,S3_keyname,local_filename):
    
    
    s3 = boto3.resource('s3')
    
    for attempt in range(1,6):

        try:
            s3.meta.client.download_file(s3bucketname, S3_keyname, local_filename)

        except botocore.exceptions.ClientError as e:
            if e.response['Error']['Code'] == "404":
                print("The object does not exist.")
                
        except Exception as e:
            print(e)
            logging.info(str(e))
        else:
            print("downloaded successfully in attempt ", attempt)
            
            break
download_file_from_s3("s3-jmp-upload-test","upload_test.pdf", "download_test.pdf")
#output

downloaded successfully in attempt  1


In [7]:
# code block 3.6: deleting an object from bucket and bucket itself
S3 = boto3.client('s3')
bucket_name = 'test-jmp-book'
key_name = 'upload_test.pdf'

response = S3.delete_object(
    Bucket=bucket_name,
    Key=key_name,
)

# deleting a bucket

def delete_all_objects(bucket_name):
    result = []
    s3 = boto3.resource('s3')
    bucket=s3.Bucket(bucket_name)
    for obj_version in bucket.object_versions.all():
        result.append({'Key': obj_version.object_key,
                    'VersionId': obj_version.id})
    print(result)
    bucket.delete_objects(Delete={'Objects': result})
    
def delete_bucket(bucket_name):
    s3 = boto3.resource('s3')
    my_bucket = s3.Bucket(bucket_name)
    if any(my_bucket.objects.all()) is True:
        delete_all_objects(bucket_name)
    my_bucket.delete()
    return True
delete_bucket('test-jmp-book')

True

In [None]:
# code block 3.7: list all buckets

# Retrieve the list of existing buckets

def list_buckets():
    s3 = boto3.client('s3')
    response = s3.list_buckets()

    for bucket in response['Buckets']:
        print({bucket["Name"]})
        print('*'*10)
list_buckets()

In [None]:
# Code block 3.8: creating sns topic and sqs queue
import boto3
import json
import sys
import time



def CreateTopicandQueue(topic_name, email_address):
    
        sqs = boto3.client('sqs')
        sns = boto3.client('sns')
        millis = str(int(round(time.time() * 1000)))

        #Create SNS topic
        snsTopicName=topic_name + millis

        topic_response=sns.create_topic(Name=snsTopicName)
        snsTopicArn = topic_response['TopicArn']
        
        # subscribing email_address to SNS topic
        
        if email_address is not None:
        
            email_response = sns.subscribe(TopicArn=snsTopicArn,Protocol='email',Endpoint=email_address,
        ReturnSubscriptionArn=True)
            emailArn = email_response['SubscriptionArn']
            
        else:
            emailArn = None

        #create SQS queue
        sqsQueueName=topic_name + millis
        sqs.create_queue(QueueName=sqsQueueName)
        sqsQueueUrl = sqs.get_queue_url(QueueName=sqsQueueName)['QueueUrl']
 
        attribs = sqs.get_queue_attributes(QueueUrl=sqsQueueUrl,
                                                    AttributeNames=['QueueArn'])['Attributes']
                                        
        sqsQueueArn = attribs['QueueArn']

        # Subscribe SQS queue to SNS topic
        sns.subscribe(
            TopicArn=snsTopicArn,
            Protocol='sqs',
            Endpoint=sqsQueueArn)

        #Authorize SNS to write SQS queue 
        policy = """{{
  "Version":"2012-10-17",
  "Statement":[
    {{
      "Sid":"MyPolicy",
      "Effect":"Allow",
      "Principal" : {{"AWS" : "*"}},
      "Action":"SQS:SendMessage",
      "Resource": "{}",
      "Condition":{{
        "ArnEquals":{{
          "aws:SourceArn": "{}"
        }}
      }}
    }}
  ]
}}""".format(sqsQueueArn, snsTopicArn)
 
        response = sqs.set_queue_attributes(
            QueueUrl = sqsQueueUrl,
            Attributes = {
                'Policy' : policy
            })
    
    
    
        return({"snsTopicArn":snsTopicArn,"sqsQueueArn":sqsQueueArn,"sqsQueueUrl":sqsQueueUrl, 'emailArn':emailArn })

response_dict = CreateTopicandQueue("test_topic", "jay@jaympatel.com")

In [None]:
#Code block 3.9: sending a message through SNS

client = boto3.client('sns')
response = client.publish(
    TopicArn=response_dict["snsTopicArn"],
    Message='this is a test of SNS and SQS',
    Subject='test_SNS_SQS',
    MessageStructure='string',    
)

In [None]:
# Code block 3.10: retrieving messages through SQS

sqs = boto3.client('sqs')
sqsResponse = sqs.receive_message(QueueUrl=response_dict['sqsQueueUrl'], MessageAttributeNames=['ALL'],
                                          MaxNumberOfMessages=10, WaitTimeSeconds = 10)
# parsing sqs messages
sqsResponse["Messages"]

if 'Messages' in sqsResponse:
    for message in sqsResponse["Messages"]:
        message_dict = json.loads(message["Body"])
        message_text = message_dict["Message"]
        subject_text = message_dict["Subject"]
        message_id = message_dict["MessageId"]
        receipt_handle = message["ReceiptHandle"]
        #print("receipt_handle: ", receipt_handle)
        print("message_id: ", message_id)
        print("subject_text: ", subject_text)
        print("message_text: ", message_text)

In [None]:
#Code block 3.11: deleting message by receipt handle and deleting SNS and SQS queue

response = sqs.delete_message(
    QueueUrl=response_dict['sqsQueueUrl'],
    ReceiptHandle=receipt_handle
)

# DELETE SQS and SNS queue

sqs.delete_queue(QueueUrl=response_dict['sqsQueueUrl'])
sns.delete_topic(TopicArn=response_dict['snsTopicArn'])

In [None]:
#Code block 3.12: Sample script to run on EC2 server
# please copy this and save it as a us_fda_script.py file. 
    
#! /opt/conda/bin/python3

import requests
import numpy as np
import pandas as pd
import io
from bs4 import BeautifulSoup
import boto3

def S3_upload(S3_bucket_name,local_filename, S3_keyname):
    S3 = boto3.client('s3')
    
    for attempt in range(1,6):
        try:
            # files automatically and upload parts in parallel.
            S3.upload_file(local_filename,S3_bucket_name, S3_keyname)
            
        except Exception as e:
            print(str(e))
        else:
            print("finished uploading to s3 in attempt ", attempt)
            break

def get_abs_url(html_tag):
    soup = BeautifulSoup(html_tag,'lxml')
    abs_url = 'https://www.fda.gov' + soup.find('a')['href']
    company_name = soup.find('a').get_text()
    return abs_url, company_name

if __name__ == "__main__":  # confirms that the code is under main function


    my_headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ' + ' (KHTML, like Gecko) Chrome/61.0.3163.100Safari/537.36'
    }

    test_url = 'https://web.archive.org/save/_embed/https://www.fda.gov/files/api/datatables/static/warning-letters.json?_=1586319220541'
    
    response_dict = {'emailArn': 'arn:aws:sns:us-east-2:896493407642:test_topic1586487525592:40239d22-7025-40b4-ac4b-bd36e3a1f9cc',
    'snsTopicArn': 'arn:aws:sns:us-east-2:896493407642:test_topic1586487525592',
    'sqsQueueArn': 'arn:aws:sqs:us-east-2:896493407642:test_topic1586487525592',
    'sqsQueueUrl': 'https://us-east-2.queue.amazonaws.com/896493407642/test_topic1586487525592'}

    r = requests.get(url = test_url, headers = my_headers)
    #print("request code: ", r.status_code)
    html_response = r.text
    string_json2 = io.StringIO(html_response)
    df = pd.read_json(string_json2)
    df["abs_url"], df["company_name"] = zip(*df["field_company_name_warning_lette"].apply(get_abs_url))
    df.to_csv("warning_letters_table.csv")
    S3_keyname = "warning_letters_table.csv"
    local_filename = "warning_letters_table.csv"
    S3bucket_name = 'test-jmp-book'
    S3_upload(S3bucket_name,local_filename,S3_keyname)
    # sending a message through SNS
    message_text = S3_keyname + " successfully uploaded to " + S3bucket_name
    client = boto3.client('sns', 'us-east-2')
    response = client.publish(
        TopicArn=response_dict["snsTopicArn"],
        Message=message_text,
        Subject='s3 upload successful',
        MessageStructure='string',    
    )