# API REQUEST

In [1]:
import requests
import json
from bs4 import BeautifulSoup   

# Set up the parameters we want to pass to the API.
# This is the latitude and longitude of New York City.
parameters = {"lat": 40.71, "lon": -74}
# Make a get request with the pahttp://api.open-notify.orgrameters.
response = requests.get("http://api.open-notify.org/iss-pass.json", params=parameters)

#is the response positive?
print(response)

# Print the content of the response (the data the server returned)
# print(response.content)

# save response into a much nicer format
a = response.json()
#print(response.json())

pretty_json = json.dumps(a, indent=4, sort_keys=True)
print(pretty_json)


<Response [200]>
{
    "message": "success",
    "request": {
        "altitude": 100,
        "datetime": 1646994710,
        "latitude": 40.71,
        "longitude": -74.0,
        "passes": 5
    },
    "response": [
        {
            "duration": 566,
            "risetime": 1646995613
        },
        {
            "duration": 610,
            "risetime": 1647049957
        },
        {
            "duration": 647,
            "risetime": 1647055735
        },
        {
            "duration": 578,
            "risetime": 1647061616
        },
        {
            "duration": 575,
            "risetime": 1647067487
        }
    ]
}


In [2]:
# Make the same request we did earlier, but with the coordinates of San Francisco instead.
parameters = {"lat": 37.78, "lon": -122.41}
response = requests.get("http://api.open-notify.org/iss-pass.json", params=parameters)
# Get the response data as a python object. Verify that it's a dictionary.
data = response.json()
#print(type(data))
#print(data)



pretty_json = json.dumps(data, indent=4, sort_keys=True)
print(pretty_json)


{
    "message": "success",
    "request": {
        "altitude": 100,
        "datetime": 1646996484,
        "latitude": 37.78,
        "longitude": -122.41,
        "passes": 5
    },
    "response": [
        {
            "duration": 639,
            "risetime": 1647000948
        },
        {
            "duration": 606,
            "risetime": 1647006762
        },
        {
            "duration": 624,
            "risetime": 1647061049
        },
        {
            "duration": 632,
            "risetime": 1647066848
        },
        {
            "duration": 521,
            "risetime": 1647072763
        }
    ]
}


In [3]:
with open("latlong_ny.json", "w") as fp:
    json.dump(a,fp) 

with open('latlong.json') as json_file:
    data = json.load(json_file)
    for p in data['response']:
        print(f"Duration: {p['duration']}")
        print(f"RiseTime: {p['risetime']}")
        print('')

Duration: 594
RiseTime: 1607524821

Duration: 648
RiseTime: 1607530588

Duration: 581
RiseTime: 1607536467

Duration: 569
RiseTime: 1607542344

Duration: 638
RiseTime: 1607548163



# CSV

In [5]:
import csv

with open('./data/beers.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0
    for row in csv_reader:
        if line_count == 0:
            print(f'Column names are {", ".join(row)}')
            line_count += 1
        else:
            print(f'\t The beer {row[0]} is {row[1]}, and it is from {row[2]}.')
            line_count += 1
    print(f'Processed {line_count} lines.')


Column names are Name, Appearance, Origin
	 The beer Edelweiss is White, and it is from Austria.
	 The beer CuvÃ©e des Trolls is Blond, and it is from Belgium.
	 The beer Choulette AmbrÃ©e is Amber, and it is from France.
	 The beer Gulden Draak is Dark, and it is from Belgium.
	 The beer Water is Crystal Clear, and it is from Anywhere.
Processed 6 lines.


# CSV into Dictionary

In [6]:
import csv

with open('./data/beers.csv', mode='r') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    line_count = 0
    for row in csv_reader:
        if line_count == 0:
            print(f'Column names are {", ".join(row)}')
            line_count += 1
        print(f'\tThe beer {row["Name"]} is {row["Appearance"]}, and it is from {row["Origin"]}.')
        line_count += 1
    print(f'Processed {line_count} lines.')


Column names are Name, Appearance, Origin
	The beer Edelweiss is White, and it is from Austria.
	The beer CuvÃ©e des Trolls is Blond, and it is from Belgium.
	The beer Choulette AmbrÃ©e is Amber, and it is from France.
	The beer Gulden Draak is Dark, and it is from Belgium.
	The beer Water is Crystal Clear, and it is from Anywhere.
Processed 6 lines.


# The Power of Pandas

In [7]:
import pandas

df = pandas.read_csv('./data/beers.csv')

print(df)


               Name     Appearance    Origin
0         Edelweiss          White   Austria
1  Cuvée des Trolls          Blond   Belgium
2  Choulette Ambrée          Amber    France
3      Gulden Draak           Dark   Belgium
4             Water  Crystal Clear  Anywhere


# downloading html with requests library

In [8]:
import requests
page = requests.get("http://dataquestio.github.io/web-scraping-pages/simple.html")
page

<Response [200]>

# We can print out the HTML content of the page using the content property: 

In [9]:
import requests
page = requests.get("http://dataquestio.github.io/web-scraping-pages/simple.html")
page

page.status_code
page.content

b'<!DOCTYPE html>\n<html>\n    <head>\n        <title>A simple example page</title>\n    </head>\n    <body>\n        <p>Here is some simple content for this page.</p>\n    </body>\n</html>'

# Beautiful Soup

In [10]:
# Weather.gov
    
import requests
from bs4 import BeautifulSoup

page = requests.get("https://forecast.weather.gov/MapClick.php?lat=37.777120000000025&lon=-122.41963999999996#.X9DVpBakolQ")
soup = BeautifulSoup(page.content, 'html.parser')
html = list(soup.children)[2]
body = list(html.children)[3]

print(body)

#target parents
#go deeper into childen
#find p element or relevant class

    #target day
    #target temperature
    #target summary 
    
#save into a list
#save into a dictionary?

{"day1":["sunday", "54", 'today was a great sunny day '], "day2":["monday", "54", 'today was a great sunny day ']}

#load dict into pandas
# create column names if needed

<body>
<main class="container">
<header class="row clearfix" id="page-header">
<a class="pull-left" href="//www.noaa.gov" id="header-noaa"><img alt="National Oceanic and Atmospheric Administration" src="/css/images/header_noaa.png"/></a>
<a class="pull-left" href="//www.weather.gov" id="header-nws"><img alt="National Weather Service" src="/css/images/header_nws.png"/></a>
<a class="pull-right" href="//www.commerce.gov" id="header-doc"><img alt="United States Department of Commerce" src="/css/images/header_doc.png"/></a>
</header>
<nav class="navbar navbar-default row" role="navigation">
<div class="container-fluid">
<div class="navbar-header">
<button class="navbar-toggle collapsed" data-target="#top-nav" data-toggle="collapse" type="button">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
</div>
<div class="collapse navbar-collapse" id="top-nav">
<ul class="nav navbar-nav">
<li><a hre

{'day1': ['sunday', '54', 'today was a great sunny day '],
 'day2': ['monday', '54', 'today was a great sunny day ']}

In [11]:
# For further information
# https://www.dataquest.io/blog/python-api-tutorial/

import requests
from bs4 import BeautifulSoup

page = requests.get("http://dataquestio.github.io/web-scraping-pages/simple.html")
# Print the status code of the response.

soup = BeautifulSoup(page.content, 'html.parser')
#print(soup.prettify())

print()
print("This are the children")
print()

html = list(soup.children)[2]
#print(html)

body = list(html.children)[3]
#print(body)

p = list(body.children)[1]
#print(p)

#print(p.get_text())

#Faster Way

p = soup.find('p')

print(p.get_text())



This are the children

Here is some simple content for this page.


In [12]:
l = ['html', '\n', "<html> <head> <title>A simple example page</title></head><body><p>Here is some simple content for this page.</p></body></html>"]

In [42]:
l[2]

'<html> <head> <title>A simple example page</title></head><body><p>Here is some simple content for this page.</p></body></html>'

# FindAll with BS

In [1]:
# https://www.dataquest.io/blog/python-api-tutorial/

import requests
from bs4 import BeautifulSoup

page =requests.get("http://dataquestio.github.io/web-scraping-pages/ids_and_classes.html")
# Print the status code of the response.

soup =BeautifulSoup(page.content, 'html.parser')
#print(soup.prettify())

outer = soup.find_all('p', class_='outer-text')[0]
#outer = soup.find(id='first').string
print(outer)


<p class="outer-text first-item" id="second">
<b>
                First outer paragraph.
            </b>
</p>


In [2]:
outer.find_all('b')[0]

<b>
                First outer paragraph.
            </b>