## Serial

In [9]:
import requests

URLS = [
    'http://www.foxnews.com/',
    'http://www.cnn.com/',
    'http://europe.wsj.com/',
    'http://www.bbc.co.uk/',
    'http://nonexistent-subdomain.python.org/'
]

def load_url(url):
    try:
        response = requests.get(url, timeout=60)
        print(f'{url} page is {len(response.content)} bytes')
    except Exception as exc:
        print(f'{url} generated an exception: {exc}')

def main():
    for url in URLS:
        load_url(url)

if __name__ == "__main__":
    main()


http://www.foxnews.com/ page is 728206 bytes
http://www.cnn.com/ page is 3021208 bytes
http://europe.wsj.com/ page is 549532 bytes
http://www.bbc.co.uk/ page is 583210 bytes
http://nonexistent-subdomain.python.org/ generated an exception: HTTPConnectionPool(host='nonexistent-subdomain.python.org', port=80): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x0000029CFBE6FB50>: Failed to resolve 'nonexistent-subdomain.python.org' ([Errno 11001] getaddrinfo failed)"))


## Parallel

In [10]:
import concurrent.futures
import urllib.request

URLS = ['http://www.foxnews.com/',
        'http://www.cnn.com/',
        'http://europe.wsj.com/',
        'http://www.bbc.co.uk/',
        'http://nonexistent-subdomain.python.org/']

# Retrieve a single page and report the URL and contents
def load_url(url, timeout):
    with urllib.request.urlopen(url, timeout=timeout) as conn:
        return conn.read()

# We can use a with statement to ensure threads are cleaned up promptly
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
    # Start the load operations and mark each future with its URL
    future_to_url = {executor.submit(load_url, url, 60): url for url in URLS}
    for future in concurrent.futures.as_completed(future_to_url):
        url = future_to_url[future]
        try:
            data = future.result()
        except Exception as exc:
            print('%r generated an exception: %s' % (url, exc))
        else:
            print('%r page is %d bytes' % (url, len(data)))

'http://nonexistent-subdomain.python.org/' generated an exception: <urlopen error [Errno 11001] getaddrinfo failed>
'http://europe.wsj.com/' generated an exception: HTTP Error 403: Forbidden
'http://www.bbc.co.uk/' page is 583210 bytes
'http://www.foxnews.com/' page is 728206 bytes
'http://www.cnn.com/' page is 3021208 bytes


## Async

In [None]:
import asyncio
import aiohttp

URLS = [
    'http://www.foxnews.com/',
    'http://www.cnn.com/',
    'http://europe.wsj.com/',
    'http://www.bbc.co.uk/',
    'http://nonexistent-subdomain.python.org/'
]

async def load_url(session, url):
    try:
        async with session.get(url, timeout=60) as response:
            data = await response.read()
            print(f'{url} page is {len(data)} bytes')
    except Exception as exc:
        print(f'{url} generated an exception: {exc}')

async def main():
    async with aiohttp.ClientSession() as session:
        tasks = [load_url(session, url) for url in URLS]
        await asyncio.gather(*tasks)

if __name__ == "__main__":
    await main()


http://nonexistent-subdomain.python.org/ generated an exception: Cannot connect to host nonexistent-subdomain.python.org:80 ssl:default [getaddrinfo failed]
http://www.bbc.co.uk/ page is 583210 bytes
http://www.foxnews.com/ page is 728206 bytes
http://www.cnn.com/ page is 3021208 bytes
http://europe.wsj.com/ page is 549532 bytes
