In [48]:
import requests
from bs4 import BeautifulSoup
import json
import math
import re
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt

### Using JSON

JSON - JavaScript Object Notation, is currently becoming the most common form of data interchange. Python includes a json module for working with data in that format, and can easily ajust between the raw string of the JSON stream, and assorted python data types.

In [50]:
r = requests.get("https://www.statbureau.org/get-data-json?country=united-states")

In [51]:
inflation = json.loads(r.text)
type(inflation)

list

In [41]:
def get_adj_price(year, price):
    req_url = "https://www.statbureau.org/calculate-inflation-price-json?country=united-states&start={0!s}/1/1&end=2016/8/1&amount={1!s}"
    response = requests.get(req_url.format(year, price))
    if response.status_code == 200:
        adj_val = float(response.json().replace('$','').replace(',','').replace(' ', ''))
    else:
        adj_val= math.nan
    return adj_val

print(get_adj_price(1967, 3.50))

25.6


In [42]:
moviepage = requests.get("http://www.the-numbers.com/movie/records/All-Time-Domestic-Box-Office")
moviesoup = BeautifulSoup(moviepage.text, "lxml")

nondecimal = re.compile(r'[^\d]+')
yearlist = []
titlelist = []
domlist = []
interlist = []
worldlist = []

movierows = moviesoup.find_all('tr')
movierows = movierows[1:]

for row in movierows:
    cols = row.find_all('td')
    cols = cols[1:]
    
    yearlist.append(int(cols[0].text))
    titlelist.append(cols[1].text)
    domlist.append(int(nondecimal.sub('', cols[2].text)))
    interlist.append(int(nondecimal.sub('', cols[3].text)))
    worldlist.append(int(nondecimal.sub('', cols[4].text)))
    
movieDataDict = {}
movieDataDict['year'] = yearlist
movieDataDict['title'] = titlelist
movieDataDict['domestic'] = domlist
movieDataDict['international'] = interlist
movieDataDict['worldwide'] = worldlist

movieframe = pd.DataFrame(movieDataDict)
movieframe.head()

Unnamed: 0,domestic,international,title,worldwide,year
0,936662225,1122000000,Star Wars Ep. VII: The Force Awakens,2058662225,2015
1,760507625,2023411357,Avatar,2783918982,2009
2,658672302,1548943366,Titanic,2207615668,1997
3,652198010,1018130015,Jurassic World,1670328025,2015
4,623279547,896200000,The Avengers,1519479547,2012


In [62]:
def price_wrapper(the_row):
    return get_adj_price(the_row.year, the_row.domestic)

movieframe['adjusted domestic'] = movieframe.apply(price_wrapper, axis=1)
movieframe.head()

Unnamed: 0,domestic,international,title,worldwide,year,adjusted_domestic,adjusted domestic
0,936662225,1122000000,Star Wars Ep. VII: The Force Awakens,2058662225,2015,960759700.0,960759700.0
1,760507625,2023411357,Avatar,2783918982,2009,871294700.0,871294700.0
2,658672302,1548943366,Titanic,2207615668,1997,1000272000.0,1000272000.0
3,652198010,1018130015,Jurassic World,1670328025,2015,668977100.0,668977100.0
4,623279547,896200000,The Avengers,1519479547,2012,665207700.0,665207700.0


In [63]:
movieframe.sort_values(by=['adjusted domestic'], ascending=False).head(10)

Unnamed: 0,domestic,international,title,worldwide,year,adjusted_domestic,adjusted domestic
8,460998007,325600000,Star Wars Ep. IV: A New Hope,786598007,1977,1907779000.0,1907779000.0
89,260000000,210700000,Jaws,470700000,1975,1206585000.0,1206585000.0
12,435110554,357854772,ET: The Extra-Terrestrial,792965326,1982,1114869000.0,1114869000.0
2,658672302,1548943366,Titanic,2207615668,1997,1000272000.0,1000272000.0
0,936662225,1122000000,Star Wars Ep. VII: The Force Awakens,2058662225,2015,960759700.0,960759700.0
73,290271960,243900000,Star Wars Ep. V: The Empire Strikes Back,534171960,1980,911510700.0,911510700.0
1,760507625,2023411357,Avatar,2783918982,2009,871294700.0,871294700.0
56,309205079,263500000,Star Wars Ep. VI: Return of the Jedi,572705079,1983,763042700.0,763042700.0
15,422780140,564700000,The Lion King,987480140,1994,698407900.0,698407900.0
7,474544677,552500000,Star Wars Ep. I: The Phantom Menace,1027044677,1999,697349000.0,697349000.0


### Using a database API
For this, the OMDb API will be used, to pull additional data from their API based on a query string.
4. The data returned from the API will be in JSON format.
8. An appropriate column will be added to the pandas dataframe, and the additional data inserted.
2. The data will be resorted based on the rating from rotten tomatoes, for the sake of testing.

In [76]:
def get_rating(title):
    req_url = "http://www.omdbapi.com/?t={0!s}&tomatoes=true"
    response = requests.get(req_url.format(title))
    
    if response.status_code == 200:
        try:
            rating = response.json()['tomatoMeter']
            if rating == 'N/A':
                rating = math.nan
        except KeyError:
            rating = math.nan
    else:
        rating = math.nan
    
    return rating

print(get_rating("Titanic"))

88


In [77]:
def rating_wrapper(the_row):
    return get_rating(the_row.title)

movieframe['tomato rating'] = movieframe.apply(rating_wrapper, axis=1)
movieframe.head()

Unnamed: 0,domestic,international,title,worldwide,year,adjusted_domestic,adjusted domestic,tomato rating
0,936662225,1122000000,Star Wars Ep. VII: The Force Awakens,2058662225,2015,960759700.0,960759700.0,
1,760507625,2023411357,Avatar,2783918982,2009,871294700.0,871294700.0,83.0
2,658672302,1548943366,Titanic,2207615668,1997,1000272000.0,1000272000.0,88.0
3,652198010,1018130015,Jurassic World,1670328025,2015,668977100.0,668977100.0,71.0
4,623279547,896200000,The Avengers,1519479547,2012,665207700.0,665207700.0,92.0


In [78]:
movieframe.sort_values(by=['tomato rating'], ascending=False).head(10)

Unnamed: 0,domestic,international,title,worldwide,year,adjusted_domestic,adjusted domestic,tomato rating
25,380529370,555900000,Finding Nemo,936429370,2003,506642600.0,506642600.0,99
16,415004880,654813349,Toy Story 3,1069818229,2010,462864700.0,462864700.0,99
39,341268248,680500000,Zootopia,1021768248,2016,347512900.0,347512900.0,98
34,356461711,497461374,Inside Out,853923085,2015,365632400.0,365632400.0,98
12,435110554,357854772,ET: The Extra-Terrestrial,792965326,1982,1114869000.0,1114869000.0,98
67,293004164,438538457,Up,731542621,2009,335687600.0,335687600.0,98
89,260000000,210700000,Jaws,470700000,1975,1206585000.0,1206585000.0,97
75,289423425,270334294,"Monsters, Inc.",559757719,2001,400623600.0,400623600.0,96
38,342548984,592154195,The Lord of the Rings: The Two Towers,934703179,2002,466915400.0,466915400.0,96
92,257784718,199944670,The Lego Movie,457729388,2014,266417000.0,266417000.0,96
