In [1]:
import requests
from bs4 import BeautifulSoup

In [2]:
import boto3
import botocore

# Posting messages to an AWS queue
### URL in the MoreInfo property to an SQS queue:

In [3]:
# create sqs client
sqs = boto3.client('sqs')

In [4]:
# create / open the SQS queue
queue = sqs.create_queue(QueueName="PlanetMoreInfo")
print (queue)

{'QueueUrl': 'https://queue.amazonaws.com/057062933592/PlanetMoreInfo', 'ResponseMetadata': {'RequestId': '5e27e4a1-cacb-5df7-9a8b-b3a324d832d3', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '5e27e4a1-cacb-5df7-9a8b-b3a324d832d3', 'date': 'Sat, 04 Apr 2020 20:19:32 GMT', 'content-type': 'text/xml', 'content-length': '326'}, 'RetryAttempts': 0}}


In [6]:
# read and parse the planets HTML
html = requests.get("http://localhost:8080/planets.html")
planet_soup = BeautifulSoup(html.content, "lxml")

In [7]:
planets = []
planet_rows = planet_soup.select("tr.planet")

In [8]:

for i in planet_rows:
    tds = i.select("td")
    # get the URL
    more_info_url = tds[5].select("a")[0]["href"].strip()
    # send the URL to the queue
    sqs.send_message(QueueUrl=queue["QueueUrl"],
    MessageBody = more_info_url)
    print("Sent %s to %s" % (more_info_url, queue["QueueUrl"] ))

Sent https://en.wikipedia.org/wiki/Mercury_(planet) to https://queue.amazonaws.com/057062933592/PlanetMoreInfo
Sent https://en.wikipedia.org/wiki/Venus to https://queue.amazonaws.com/057062933592/PlanetMoreInfo
Sent https://en.wikipedia.org/wiki/Earth to https://queue.amazonaws.com/057062933592/PlanetMoreInfo
Sent https://en.wikipedia.org/wiki/Mars to https://queue.amazonaws.com/057062933592/PlanetMoreInfo
Sent https://en.wikipedia.org/wiki/Jupiter to https://queue.amazonaws.com/057062933592/PlanetMoreInfo
Sent https://en.wikipedia.org/wiki/Saturn to https://queue.amazonaws.com/057062933592/PlanetMoreInfo
Sent https://en.wikipedia.org/wiki/Uranus to https://queue.amazonaws.com/057062933592/PlanetMoreInfo
Sent https://en.wikipedia.org/wiki/Neptune to https://queue.amazonaws.com/057062933592/PlanetMoreInfo
Sent https://en.wikipedia.org/wiki/Pluto to https://queue.amazonaws.com/057062933592/PlanetMoreInfo


# Reading and processing messages
### To process the messages:

In [None]:
print("Starting")

# create sqs client
sqs = boto3.client('sqs')

print("Created client")

# create / open the SQS queue
queue = sqs.create_queue(QueueName="PlanetMoreInfo")
queue_url = queue["QueueUrl"]
print ("Opened queue: %s" % queue_url)

while True:
	print ("Attempting to receive messages")
	response = sqs.receive_message(QueueUrl=queue_url,
								   MaxNumberOfMessages=1,
								   WaitTimeSeconds=1)
	if not 'Messages' in response:
		print ("No messages")
		continue

	message = response['Messages'][0]
	receipt_handle = message['ReceiptHandle']
	url = message['Body']

	# parse the page
	html = requests.get(url)
	bsobj = BeautifulSoup(html.text, "lxml")

	# now find the planet name and albedo info
	planet=bsobj.findAll("h1", {"id": "firstHeading"} )[0].text
	albedo_node = bsobj.findAll("a", {"href": "/wiki/Geometric_albedo"})[0]
	root_albedo = albedo_node.parent
	albedo = root_albedo.text.strip()

	# delete the message from the queue
	sqs.delete_message(
		QueueUrl=queue_url,
		ReceiptHandle=receipt_handle
	)

	# print the planets name and albedo info
	print("%s: %s" % (planet, albedo))

Starting
Created client
Opened queue: https://queue.amazonaws.com/057062933592/PlanetMoreInfo
Attempting to receive messages
No messages
Attempting to receive messages
No messages
Attempting to receive messages
No messages
Attempting to receive messages
No messages
Attempting to receive messages
No messages
Attempting to receive messages
No messages
Attempting to receive messages
No messages
Attempting to receive messages
No messages
Attempting to receive messages
No messages
Attempting to receive messages
No messages
Attempting to receive messages
No messages
Attempting to receive messages
No messages
Attempting to receive messages
No messages
Attempting to receive messages
No messages
Attempting to receive messages
No messages
Attempting to receive messages
No messages
Attempting to receive messages
No messages
Attempting to receive messages
No messages
Attempting to receive messages
No messages
Attempting to receive messages
No messages
Attempting to receive messages
No messages
Att