## Requests

In [1]:
# Embed the requests homepage
from IPython.display import IFrame
requests_documentation_url = "https://requests.readthedocs.io/en/latest/"
IFrame(requests_documentation_url, '100%', '30%')

In [2]:
# Import
import requests

# URL
url = 'https://catfact.ninja/fact'

# Make a request
response = requests.get(url)

In [3]:
response_content = response.content

# Display
display(response_content)

b'{"fact":"A cat\'s cerebral cortex contains about twice as many neurons as that of dogs. Cats have 300 million neurons, whereas dogs have about 160 million. See, cats rule, dogs drool!","length":173}'

In [4]:
response_json = response.json()

# Display
display(response_json)

{'fact': "A cat's cerebral cortex contains about twice as many neurons as that of dogs. Cats have 300 million neurons, whereas dogs have about 160 million. See, cats rule, dogs drool!",
 'length': 173}

## API which requires parameters

In [5]:
# URL
url = 'http://universities.hipolabs.com/search?country=Finland'

# Make a request
response = requests.get(url)

# Decode JSON
response_json = response.json()

# Display
display(response_json[:2])

[{'country': 'Finland',
  'domains': ['abo.fi'],
  'web_pages': ['http://www.abo.fi/'],
  'alpha_two_code': 'FI',
  'name': 'Abo Akademi University',
  'state-province': None},
 {'country': 'Finland',
  'domains': ['cou.fi'],
  'web_pages': ['http://www.cou.fi/'],
  'alpha_two_code': 'FI',
  'name': 'Central Ostrobothnia University of Applied Sciences',
  'state-province': None}]

In [6]:
# URL
url = 'http://universities.hipolabs.com/search'

# Make the parameter dictionary
parameters = {'country' : 'Finland'}

# Get response
response = requests.get(url, params=parameters)

# Decode JSON
response_json = response.json()

# Display
display(response_json[:2])

[{'country': 'Finland',
  'domains': ['abo.fi'],
  'web_pages': ['http://www.abo.fi/'],
  'alpha_two_code': 'FI',
  'name': 'Abo Akademi University',
  'state-province': None},
 {'country': 'Finland',
  'domains': ['cou.fi'],
  'web_pages': ['http://www.cou.fi/'],
  'alpha_two_code': 'FI',
  'name': 'Central Ostrobothnia University of Applied Sciences',
  'state-province': None}]

## Exercise 1

In [1]:
# Import module
import requests

# URL of the activity API end point
url = "http://www.boredapi.com/api/activity/"

# Send the request using the get() function
response = requests.get(url)

In [2]:
# Show the JSON content of the response
display(response.json())

{'activity': 'Take a caffeine nap',
 'type': 'relaxation',
 'participants': 1,
 'price': 0.1,
 'link': '',
 'key': '5092652',
 'accessibility': 0.08}

In [3]:
# Define some parameters
params = {
    'type' : 'education',
    'participants' : 1,
}

# Send the request using get() with parameters
response = requests.get(url, params)

In [4]:
# Show the JSON content of the response
display("Response")
display(response.json())

'Response'

{'activity': "Research a topic you're interested in",
 'type': 'education',
 'participants': 1,
 'price': 0,
 'link': '',
 'key': '3561421',
 'accessibility': 0.9}

In [5]:
# Define some parameters
params = {
    'type' : 'social',
    'participants' : 2,
    'minprice' : 0,
    'maxprice' : 1000,
}

# Send the request using get() with parameters
response = requests.get(url, params)

In [6]:
# Show the JSON content of the response
display(response.json())
display("")

{'activity': "Text a friend you haven't talked to in a long time",
 'type': 'social',
 'participants': 2,
 'price': 0.05,
 'link': '',
 'key': '6081071',
 'accessibility': 0.2}

''

## Exercise 2

In [8]:
# Import modules
import requests

# URL of the activity API end point
url = "http://www.boredapi.com/api/activity/"

# Make the request using the get() function
response = requests.get(url)

In [9]:
display("Request headers")
display(dict(response.request.headers))

'Request headers'

{'User-Agent': 'python-requests/2.28.1',
 'Accept-Encoding': 'gzip, deflate',
 'Accept': '*/*',
 'Connection': 'keep-alive'}

In [10]:
display("Response headers")
display(dict(response.headers))

'Response headers'

{'Server': 'Cowboy',
 'Connection': 'keep-alive',
 'X-Powered-By': 'Express',
 'Access-Control-Allow-Origin': '*',
 'Access-Control-Allow-Headers': 'Origin, X-Requested-With, Content-Type, Accept',
 'Content-Type': 'application/json; charset=utf-8',
 'Content-Length': '128',
 'Etag': 'W/"80-5bUQ0QKp2O72cxy7Bl5XTVBYaos"',
 'Date': 'Thu, 24 Nov 2022 09:47:49 GMT',
 'Via': '1.1 vegur'}

## Exercise 3

In [12]:
# Import module
import requests

# Define webpage to scrape
url = "http://www.example.com/"

# Make a request for the URL
response = requests.get(url)

# Examine the response
display(response.content)

b'<!doctype html>\n<html>\n<head>\n    <title>Example Domain</title>\n\n    <meta charset="utf-8" />\n    <meta http-equiv="Content-type" content="text/html; charset=utf-8" />\n    <meta name="viewport" content="width=device-width, initial-scale=1" />\n    <style type="text/css">\n    body {\n        background-color: #f0f0f2;\n        margin: 0;\n        padding: 0;\n        font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;\n        \n    }\n    div {\n        width: 600px;\n        margin: 5em auto;\n        padding: 2em;\n        background-color: #fdfdff;\n        border-radius: 0.5em;\n        box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);\n    }\n    a:link, a:visited {\n        color: #38488f;\n        text-decoration: none;\n    }\n    @media (max-width: 700px) {\n        div {\n            margin: 0 auto;\n            width: auto;\n        }\n    }\n    </style>    \n</head>\n\n<body>\n<div>\n    

In [13]:
# Looks like HTML :) Let's access it using the text attribute
html = response.text

print(html)

<!doctype html>
<html>
<head>
    <title>Example Domain</title>

    <meta charset="utf-8" />
    <meta http-equiv="Content-type" content="text/html; charset=utf-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1" />
    <style type="text/css">
    body {
        background-color: #f0f0f2;
        margin: 0;
        padding: 0;
        font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
        
    }
    div {
        width: 600px;
        margin: 5em auto;
        padding: 2em;
        background-color: #fdfdff;
        border-radius: 0.5em;
        box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);
    }
    a:link, a:visited {
        color: #38488f;
        text-decoration: none;
    }
    @media (max-width: 700px) {
        div {
            margin: 0 auto;
            width: auto;
        }
    }
    </style>    
</head>

<body>
<div>
    <h1>Example Domain</h1>
    <p>This domai

In [14]:
# Import beautiful soup module
from bs4 import BeautifulSoup

# Create soup
soup = BeautifulSoup(html, 'html.parser')

In [15]:
# Extract page title from the HTML
print(f"Found title: {soup.title.text}")

Found title: Example Domain


In [16]:
# Extract links (hrefs) from the HTML
for link in soup.find_all('a'):
    print(f"Found link: {link.get('href')}")

Found link: https://www.iana.org/domains/example


In [17]:
# Extract all text from the HTML
print(f"Found text: {soup.get_text()}")    

Found text: 


Example Domain







Example Domain
This domain is for use in illustrative examples in documents. You may use this
    domain in literature without prior coordination or asking for permission.
More information...






## After exercises: Saving retrieved data to disk

In [20]:
# Import
import requests
import jsonlines
import time

# URL
url = 'https://catfact.ninja/fact'

# Make three requests in loop and make a list of response JSON objects
for i in range(3):

    # Logging
    print(f"Make request {i}")

    # Make a request
    response = requests.get(url)
    
    # Decode to JSON
    response_json = response.json()
                
    # Open a jsonlines writer in 'append' mode 
    with jsonlines.open('catfacts.jsonl', mode='a') as writer:

        # Write
        writer.write(response_json)
        
    # Sleep for one second between requests
    time.sleep(1)

Make request 0
Make request 1
Make request 2


In [21]:
# Open a jsonlines reader
with jsonlines.open('catfacts.jsonl', mode='r') as reader:
    
    # Read and display
    for obj in reader:
        display(obj)

{'fact': "A cat's nose is as unique as a human's fingerprint.", 'length': 51}

{'fact': "A cat's jaw has only up and down motion; it does not have any lateral, side to side motion, like dogs and humans.",
 'length': 113}

{'fact': 'In just seven years, a single pair of cats and their offspring could produce a staggering total of 420,000 kittens.',
 'length': 115}