## Example on how to get data from Amazon API and how to handle XML files

In case one needs to get data from Amazon API. Here the classical procedure with a simple example.

In [1]:
# Copyright 2010-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# This file is licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License. A copy of the
# License is located at
#
# http://aws.amazon.com/apache2.0/
#
# This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
# OF ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
#
# ABOUT THIS PYTHON SAMPLE: This sample is part of the AWS General Reference 
# Signing AWS API Requests top available at
# https://docs.aws.amazon.com/general/latest/gr/sigv4-signed-request-examples.html
#

# AWS Version 4 signing example

# EC2 API (DescribeRegions)

# See: http://docs.aws.amazon.com/general/latest/gr/sigv4_signing.html
# This version makes a GET request and passes the signature
# in the Authorization header.
import sys, os, base64, datetime, hashlib, hmac 
import requests # pip install requests

# ************* REQUEST VALUES *************
method = 'GET'
service = 'ec2'
host = 'ec2.amazonaws.com'
region = 'us-east-1'
endpoint = 'https://ec2.amazonaws.com'
request_parameters = 'Action=DescribeRegions&Version=2013-10-15'

# Key derivation functions. See:
# http://docs.aws.amazon.com/general/latest/gr/signature-v4-examples.html#signature-v4-examples-python
def sign(key, msg):
    return hmac.new(key, msg.encode('utf-8'), hashlib.sha256).digest()

def getSignatureKey(key, dateStamp, regionName, serviceName):
    kDate = sign(('AWS4' + key).encode('utf-8'), dateStamp)
    kRegion = sign(kDate, regionName)
    kService = sign(kRegion, serviceName)
    kSigning = sign(kService, 'aws4_request')
    return kSigning

# Read AWS access key from env. variables or configuration file. Best practice is NOT
# to embed credentials in code.
access_key = os.environ.get('AWS_ACCESS_KEY_ID')
secret_key = os.environ.get('AWS_SECRET_ACCESS_KEY')
if access_key is None or secret_key is None:
    print('No access key is available.')
    sys.exit()

# Create a date for headers and the credential string
t = datetime.datetime.utcnow()
amzdate = t.strftime('%Y%m%dT%H%M%SZ')
datestamp = t.strftime('%Y%m%d') # Date w/o time, used in credential scope


# ************* TASK 1: CREATE A CANONICAL REQUEST *************
# http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html

# Step 1 is to define the verb (GET, POST, etc.)--already done.

# Step 2: Create canonical URI--the part of the URI from domain to query 
# string (use '/' if no path)
canonical_uri = '/' 

# Step 3: Create the canonical query string. In this example (a GET request),
# request parameters are in the query string. Query string values must
# be URL-encoded (space=%20). The parameters must be sorted by name.
# For this example, the query string is pre-formatted in the request_parameters variable.
canonical_querystring = request_parameters

# Step 4: Create the canonical headers and signed headers. Header names
# must be trimmed and lowercase, and sorted in code point order from
# low to high. Note that there is a trailing \n.
canonical_headers = 'host:' + host + '\n' + 'x-amz-date:' + amzdate + '\n'

# Step 5: Create the list of signed headers. This lists the headers
# in the canonical_headers list, delimited with ";" and in alpha order.
# Note: The request can include any headers; canonical_headers and
# signed_headers lists those that you want to be included in the 
# hash of the request. "Host" and "x-amz-date" are always required.
signed_headers = 'host;x-amz-date'

# Step 6: Create payload hash (hash of the request body content). For GET
# requests, the payload is an empty string ("").
payload_hash = hashlib.sha256(('').encode('utf-8')).hexdigest()

# Step 7: Combine elements to create canonical request
canonical_request = method + '\n' + canonical_uri + '\n' + canonical_querystring + '\n' + canonical_headers + '\n' + signed_headers + '\n' + payload_hash


# ************* TASK 2: CREATE THE STRING TO SIGN*************
# Match the algorithm to the hashing algorithm you use, either SHA-1 or
# SHA-256 (recommended)
algorithm = 'AWS4-HMAC-SHA256'
credential_scope = datestamp + '/' + region + '/' + service + '/' + 'aws4_request'
string_to_sign = algorithm + '\n' +  amzdate + '\n' +  credential_scope + '\n' +  hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()

# ************* TASK 3: CALCULATE THE SIGNATURE *************
# Create the signing key using the function defined above.
signing_key = getSignatureKey(secret_key, datestamp, region, service)

# Sign the string_to_sign using the signing_key
signature = hmac.new(signing_key, (string_to_sign).encode('utf-8'), hashlib.sha256).hexdigest()
#print(signature)

# ************* TASK 4: ADD SIGNING INFORMATION TO THE REQUEST *************
# The signing information can be either in a query string value or in 
# a header named Authorization. This code shows how to use a header.
# Create authorization header and add to request headers
authorization_header = algorithm + ' ' + 'Credential=' + access_key + '/' + credential_scope + ', ' +  'SignedHeaders=' + signed_headers + ', ' + 'Signature=' + signature

# The request can include any headers, but MUST include "host", "x-amz-date", 
# and (for this scenario) "Authorization". "host" and "x-amz-date" must
# be included in the canonical_headers and signed_headers, as noted
# earlier. Order here is not significant.
# Python note: The 'host' header is added automatically by the Python 'requests' library.
headers = {'x-amz-date':amzdate, 'Authorization':authorization_header}


# ************* SEND THE REQUEST *************
request_url = endpoint + '?' + canonical_querystring

print('\nBEGIN REQUEST++++++++++++++++++++++++++++++++++++')
print('Request URL = ' + request_url)
r = requests.get(request_url, headers=headers)

print('\nRESPONSE++++++++++++++++++++++++++++++++++++')
print('Response code: %d\n' % r.status_code)
print(r.text)

# Hide my access key and secret key for this demo
access_key = 'XXXACCESS_KEYXXX'
secret_key = 'XXXSECRET_KEYXXX'


BEGIN REQUEST++++++++++++++++++++++++++++++++++++
Request URL = https://ec2.amazonaws.com?Action=DescribeRegions&Version=2013-10-15

RESPONSE++++++++++++++++++++++++++++++++++++
Response code: 200

<?xml version="1.0" encoding="UTF-8"?>
<DescribeRegionsResponse xmlns="http://ec2.amazonaws.com/doc/2013-10-15/">
    <requestId>e5091dd5-427e-4858-9cca-205858047b77</requestId>
    <regionInfo>
        <item>
            <regionName>eu-north-1</regionName>
            <regionEndpoint>ec2.eu-north-1.amazonaws.com</regionEndpoint>
        </item>
        <item>
            <regionName>ap-south-1</regionName>
            <regionEndpoint>ec2.ap-south-1.amazonaws.com</regionEndpoint>
        </item>
        <item>
            <regionName>eu-west-3</regionName>
            <regionEndpoint>ec2.eu-west-3.amazonaws.com</regionEndpoint>
        </item>
        <item>
            <regionName>eu-west-2</regionName>
            <regionEndpoint>ec2.eu-west-2.amazonaws.com</regionEndpoint>
        </item

**Giving simple example of keywords and indicies to search on Amazon API**

In [2]:
import datetime

associate_id = 'XXXASSOCIATE_IDXXX'

# keywords we are going to search on Amazon API
# Blankets, Mugs, Watches, Towels, Cushion, Batteries, Deodorants, Curtains, Routers, Adapters. 10 selected categories.
keywords_list =['Blankets','Mugs','Watches','Towels','Cushion','Batteries','Deodorants','Curtains','Routers','Adapters']

# indicies we are going to search on Amazon API
# https://docs.aws.amazon.com/AWSECommerceService/latest/DG/SearchIndices.html
index_list = ['Apparel','Apparel','Watch','Apparel','Apparel','Electronics','Apparel','Apparel','Electronics','Electronics']
data_in_str = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")


In [3]:
url_list = []
for i_i in range(len(keywords_list)):
    url_list.append(('http://webservices.amazon.com/onca/xml?Service=AWSECommerceService'
                     '&AWSAccessKeyId={}'
                     '&AssociateTag={}'
                     '&Operation=ItemSearch'
                     '&Keywords={}'
                     '&SearchIndex={}'
                     '&Timestamp={}'
                     '&Signature={}'.format(access_key,
                                            associate_id,
                                            keywords_list[i_i],
                                            index_list[i_i],
                                            data_in_str,
                                            signature)))
print(url_list[-1])

http://webservices.amazon.com/onca/xml?Service=AWSECommerceService&AWSAccessKeyId=XXXACCESS_KEYXXX&AssociateTag=XXXASSOCIATE_IDXXX&Operation=ItemSearch&Keywords=Adapters&SearchIndex=Electronics&Timestamp=2020-05-10T18:47:58Z&Signature=5c41b874af9e94e2905ed4b5081d193c8bd09d50ad5f53016f61c1de8d68dd1e


In [4]:
r= requests.get(url_list[-1])
r

<Response [410]>

In [11]:
import xml.etree.ElementTree as ET

tree = ET.parse('example3.xml')
root = tree.getroot()
root

<Element 'CATALOG' at 0x00000018DE17AF98>

**Just gives a example on how to change a simple XML file to a dataframe**

In [51]:
import pandas as pd
import numpy as np
doi = ET.parse('example3.xml')
cds = doi.findall('CD')
columns_cd = ['Title', 'Artist', 'Country','Company','Price','Year']
df_cd = pd.DataFrame(columns = columns_cd)

for c in cds:
    title_cd = c.find('TITLE').text
    artist_cd = c.find('ARTIST').text
    country_cd = c.find('COUNTRY').text
    company_cd = c.find('COMPANY').text
    price_cd = c.find('PRICE').text
    year_cd = c.find('YEAR').text
    df_cd = df_cd.append(pd.Series([title_cd, artist_cd, country_cd, company_cd, price_cd, year_cd], index = columns_cd), ignore_index = True)
                                                                                

df_cd

Unnamed: 0,Title,Artist,Country,Company,Price,Year
0,Empire Burlesque,Bob Dylan,USA,Columbia,10.9,1985
1,Hide your heart,Bonnie Tyler,UK,CBS Records,9.9,1988
2,Greatest Hits,Dolly Parton,USA,RCA,9.9,1982
3,Still got the blues,Gary Moore,UK,Virgin records,10.2,1990
4,Eros,Eros Ramazzotti,EU,BMG,9.9,1997
5,One night only,Bee Gees,UK,Polydor,10.9,1998
6,Sylvias Mother,Dr.Hook,UK,CBS,8.1,1973
7,Maggie May,Rod Stewart,UK,Pickwick,8.5,1990
8,Romanza,Andrea Bocelli,EU,Polydor,10.8,1996
9,When a man loves a woman,Percy Sledge,USA,Atlantic,8.7,1987
