In [1]:
#Handling Web Response Status Codes:

import requests
r1 = requests.get('https://s3-eu-west-1.amazonaws.com/ih-materials/uploads/data-static/documents/the-html5-breakfast-site.html')
print(r1.status_code)


200


In [2]:
r2 = requests.get('https://s3-eu-west-1.amazonaws.com/ih-materials/uploads/data-static/documents/forbidden')
print(r2.status_code)

403


In [3]:
r3 = requests.get('http://google.com')
print(r3.history)

[<Response [301]>]


In [4]:
print(r3.history[0].text)

<HTML><HEAD><meta http-equiv="content-type" content="text/html;charset=utf-8">
<TITLE>301 Moved</TITLE></HEAD><BODY>
<H1>301 Moved</H1>
The document has moved
<A HREF="http://www.google.com/">here</A>.
</BODY></HTML>



In [2]:
#Condition to check if the statuss is successful:

url = 'https://s3-eu-west-1.amazonaws.com/ih-materials/uploads/data-static/documents/the-html5-breakfast-site.html'

import requests

r = requests.get(url) # `url` has been defined before
if r.status_code < 300:
    print('request was successful')
elif r.status_code >= 400 and r.status_code < 500:
    print('request failed because the resource either does not exist or is forbidden')
else:
    print('request failed because the response server encountered an error')

request was successful


In [4]:
#Try url for exceptions:

try:
    r = requests.get('http://google.com', timeout=10)
except requests.exceptions.Timeout:
    print('Timeout error')
except requests.exceptions.TooManyRedirects:
    print('Redirect error')
except requests.exceptions.SSLError:
    print('Ssl error')
except requests.exceptions.RequestException as e:
    print('Other unknown errors')

In [7]:
#User Agent:
#Fool website:

headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
response = requests.get(url, headers=headers)
print(response.content)

b'<!DOCTYPE html>\n<html>\n  <head>\n    <title>The HTML5 Breakfast Site</title>\n    <meta charset="UTF-8">\n\n    <style>\n      body {\n        background-color: limegreen;\n        margin: 40px;\n      } \n    </style>\n  </head>\n  <body>\n    <div id="container">\n\n      <nav id="topnav">\n        <a href="https://www.ironhack.com" target="_blank">HOME</a> |\n        <a href="https://www.ironhack.com/en/team" target="_blank">ABOUT US</a> |\n        <a href="https://www.ironhack.com/en/contact" target="_blank">CONTACT US</a>\n      </nav>\n\n      <section id="content">\n        <h1>The Ironhack Breakfast Place</h1>\n        <p>Here you will find all sorts of delicious treats</p>\n        <figure>\n          <img src="breakfast.jpg" width="400" alt="healthy breakfast">\n          <figcaption>CC Image courtesy of Ruth Hartnup on Flickr</figcaption>\n        </figure>\n      </section>\n\n      <footer>\n        <p class="love">Made with love by Ironhack</p>\n      </footer>\n\n   

In [10]:
#Making Asynchronous Requests
import asyncio, requests

urls = [
    'https://s3-eu-west-1.amazonaws.com/ih-materials/uploads/data-static/documents/breakfast.jpg',
    'https://s3-eu-west-1.amazonaws.com/ih-materials/uploads/data-static/documents/forbidden',
    'https://s3-eu-west-1.amazonaws.com/ih-materials/uploads/data-static/documents/the-html5-breakfast-site.html'
]

async def main():
    loop = asyncio.get_event_loop()
    futures = [loop.run_in_executor(None, requests.get, url) for url in urls]
    for response in await asyncio.gather(*futures):
        print(response.status_code)

loop = asyncio.get_event_loop()
loop.run_until_complete(main())

RuntimeError: This event loop is already running

200
403
200


In [11]:
#Dealing with Throttling and Rate Limiting:

import requests, time

urls = [
    'https://s3-eu-west-1.amazonaws.com/ih-materials/uploads/data-static/documents/breakfast.jpg',
    'https://s3-eu-west-1.amazonaws.com/ih-materials/uploads/data-static/documents/forbidden',
    'https://s3-eu-west-1.amazonaws.com/ih-materials/uploads/data-static/documents/the-html5-breakfast-site.html'
]

for url in urls:
    response = requests.get(url)
    print(response.status_code)
    time.sleep(1)

200
403
200
