# CSV

In [1]:
import csv
with open('ch6-files/tab_delimited_stock_prices.txt', 'r') as f:
    reader = csv.reader(f, delimiter='\t')
    for row in reader:
        date = row[0]
        symbol = row[1]
        closing_price = float(row[2])
        print(date, symbol, closing_price)

6/20/2014 AAPL 90.91
6/20/2014 MSFT 41.68
6/20/2014 FB 64.5
6/19/2014 AAPL 91.86
6/19/2014 MSFT 41.51
6/19/2014 FB 64.34


- csv with header -> DictReader

In [2]:
with open('ch6-files/colon_delimited_stock_prices.txt', 'r') as f:
    reader = csv.DictReader(f, delimiter=':')
    for row in reader:
        date = row["date"]
        symbol = row["symbol"]
        closing_price = float(row["closing_price"])
        print(date, symbol, closing_price)

6/20/2014 AAPL 90.91
6/20/2014 MSFT 41.68
6/20/2014 FB 64.5


In [3]:
today_prices = { 'AAPL' : 90.91, 'MSFT' : 41.68, 'FB' : 64.5 }
with open('ch6-files/comma_delimited_stock_prices.txt','w') as f:
    writer = csv.writer(f, delimiter=',')
    for stock, price in today_prices.items():
        writer.writerow([stock, price])

# Scraping the Web

Python's built-in HTML parser doesn't always cope well with HTML that's not perfectly formed.  
Thus, using **html5lib** is recommended.

In [4]:
from bs4 import BeautifulSoup
import requests
html = requests.get("http://www.example.com").text
soup = BeautifulSoup(html, 'html5lib')

In [5]:
first_paragraph = soup.find('p')
first_paragraph

<p>This domain is established to be used for illustrative examples in documents. You may use this
    domain in examples without prior coordination or asking for permission.</p>

In [6]:
first_paragraph_text = soup.p.text
first_paragraph_text

'This domain is established to be used for illustrative examples in documents. You may use this\n    domain in examples without prior coordination or asking for permission.'

In [7]:
all_paragraphs = soup.find_all('p')
all_paragraphs

[<p>This domain is established to be used for illustrative examples in documents. You may use this
     domain in examples without prior coordination or asking for permission.</p>,
 <p><a href="http://www.iana.org/domains/example">More information...</a></p>]

In [8]:
important_paragraphs = soup('p', {'class' : 'important'})
important_paragraphs = soup('p', 'important')
important_paragraphs = [p for p in soup('p')
                         if 'important' in p.get('class', [])]
important_paragraphs

[]

# Using APIs

## Unauthenticated API 

In [9]:
import requests, json
endpoint = "https://api.github.com/users/Lee-W/repos"
repos = json.loads(requests.get(endpoint).text)

- Python doesn’t come with a great date parser.
    - **python-dateutil** is recommended

In [10]:
from collections import Counter
from dateutil.parser import parse

dates = [parse(repo["created_at"]) for repo in repos]
month_counts = Counter(date.month for date in dates)
weekday_counts = Counter(date.weekday() for date in dates)
last_5_repositories = sorted(repos,
                             key=lambda r: r["created_at"],
                             reverse=True)[:5]
last_5_languages = [repo["language"]
                    for repo in last_5_repositories]

In [11]:
dates

[datetime.datetime(2014, 9, 7, 2, 44, 34, tzinfo=tzutc()),
 datetime.datetime(2015, 6, 13, 2, 7, 15, tzinfo=tzutc()),
 datetime.datetime(2014, 2, 3, 12, 28, 38, tzinfo=tzutc()),
 datetime.datetime(2016, 1, 22, 13, 8, 28, tzinfo=tzutc()),
 datetime.datetime(2016, 2, 21, 10, 23, 6, tzinfo=tzutc()),
 datetime.datetime(2014, 6, 3, 3, 10, 19, tzinfo=tzutc()),
 datetime.datetime(2014, 1, 29, 16, 5, 31, tzinfo=tzutc()),
 datetime.datetime(2016, 4, 6, 5, 50, 36, tzinfo=tzutc()),
 datetime.datetime(2014, 12, 29, 13, 4, 36, tzinfo=tzutc()),
 datetime.datetime(2015, 4, 10, 17, 54, 27, tzinfo=tzutc()),
 datetime.datetime(2015, 9, 6, 9, 15, 57, tzinfo=tzutc()),
 datetime.datetime(2015, 2, 1, 8, 14, 6, tzinfo=tzutc()),
 datetime.datetime(2015, 2, 10, 2, 54, 6, tzinfo=tzutc()),
 datetime.datetime(2016, 3, 16, 8, 30, 57, tzinfo=tzutc()),
 datetime.datetime(2016, 4, 5, 3, 6, 44, tzinfo=tzutc()),
 datetime.datetime(2015, 7, 3, 4, 34, 27, tzinfo=tzutc()),
 datetime.datetime(2014, 2, 18, 4, 1, 42, tzinfo=

In [12]:
last_5_languages

['Jupyter Notebook',
 'Jupyter Notebook',
 'Jupyter Notebook',
 'Jupyter Notebook',
 'Jupyter Notebook']

# Further Exploration
- pandas
- scrapy