In [2]:
#Import Dependencies
import pandas as pd
import requests
import json
import time
import numpy as np

In [3]:
#Query API and Convert to DataFrame

#Read csv with proper encoding
path = "U.S. Released Movies_ 1972-2016.csv"
movies = pd.read_csv(path, encoding='latin1')
movies.head()

Unnamed: 0,Position,Const,Created,Modified,Description,Title,URL,Title Type,IMDb Rating,Runtime (mins),Year,Genres,Num Votes,Release Date,Directors
0,1,tt0110912,4/23/2013,4/23/2013,,Pulp Fiction,https://www.imdb.com/title/tt0110912/,movie,8.9,154.0,1994.0,"Crime, Drama",1607823.0,5/21/1994,Quentin Tarantino
1,2,tt1872181,4/23/2013,4/23/2013,,The Amazing Spider-Man 2,https://www.imdb.com/title/tt1872181/,movie,6.6,142.0,2014.0,"Action, Adventure, Sci-Fi",381550.0,4/10/2014,Marc Webb
2,3,tt0111161,4/23/2013,4/23/2013,,The Shawshank Redemption,https://www.imdb.com/title/tt0111161/,movie,9.3,142.0,1994.0,Drama,2057262.0,9/10/1994,Frank Darabont
3,4,tt0076759,4/23/2013,4/23/2013,,Star Wars,https://www.imdb.com/title/tt0076759/,movie,8.6,121.0,1977.0,"Action, Adventure, Fantasy, Sci-Fi",1102354.0,5/25/1977,George Lucas
4,5,tt0088763,4/23/2013,4/23/2013,,Back to the Future,https://www.imdb.com/title/tt0088763/,movie,8.5,116.0,1985.0,"Adventure, Comedy, Sci-Fi",915281.0,7/3/1985,Robert Zemeckis


In [4]:
#Extract relevant columns
movies = movies[['Const', 'Title', 'Runtime (mins)', 'Year', 'Genres', 'Release Date']]
movies.head()

Unnamed: 0,Const,Title,Runtime (mins),Year,Genres,Release Date
0,tt0110912,Pulp Fiction,154.0,1994.0,"Crime, Drama",5/21/1994
1,tt1872181,The Amazing Spider-Man 2,142.0,2014.0,"Action, Adventure, Sci-Fi",4/10/2014
2,tt0111161,The Shawshank Redemption,142.0,1994.0,Drama,9/10/1994
3,tt0076759,Star Wars,121.0,1977.0,"Action, Adventure, Fantasy, Sci-Fi",5/25/1977
4,tt0088763,Back to the Future,116.0,1985.0,"Adventure, Comedy, Sci-Fi",7/3/1985


In [5]:
#Rename Columns
movies = movies.rename(columns= {'Const': 'IMDb ID'})
movies.head()

Unnamed: 0,IMDb ID,Title,Runtime (mins),Year,Genres,Release Date
0,tt0110912,Pulp Fiction,154.0,1994.0,"Crime, Drama",5/21/1994
1,tt1872181,The Amazing Spider-Man 2,142.0,2014.0,"Action, Adventure, Sci-Fi",4/10/2014
2,tt0111161,The Shawshank Redemption,142.0,1994.0,Drama,9/10/1994
3,tt0076759,Star Wars,121.0,1977.0,"Action, Adventure, Fantasy, Sci-Fi",5/25/1977
4,tt0088763,Back to the Future,116.0,1985.0,"Adventure, Comedy, Sci-Fi",7/3/1985


In [6]:
#Drop rows with incomplete data
movies = movies.dropna()

#Exract rows from years 2010 and later
movies = movies[movies.iloc[:,3] >= 2010]
movies = movies.set_index(['IMDb ID'])
movies.head()

Unnamed: 0_level_0,Title,Runtime (mins),Year,Genres,Release Date
IMDb ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
tt1872181,The Amazing Spider-Man 2,142.0,2014.0,"Action, Adventure, Sci-Fi",4/10/2014
tt1323594,Despicable Me,95.0,2010.0,"Animation, Comedy, Family, Fantasy",6/20/2010
tt1375670,Grown Ups,102.0,2010.0,Comedy,6/24/2010
tt0892769,How to Train Your Dragon,98.0,2010.0,"Animation, Action, Adventure, Family, Fantasy",3/18/2010
tt1375666,Inception,148.0,2010.0,"Action, Adventure, Sci-Fi, Thriller",7/8/2010


In [7]:
#Change Year and Runtime to integers
movies[['Year','Runtime (mins)']] = movies[['Year','Runtime (mins)']].applymap(np.int64)
movies.head()

Unnamed: 0_level_0,Title,Runtime (mins),Year,Genres,Release Date
IMDb ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
tt1872181,The Amazing Spider-Man 2,142,2014,"Action, Adventure, Sci-Fi",4/10/2014
tt1323594,Despicable Me,95,2010,"Animation, Comedy, Family, Fantasy",6/20/2010
tt1375670,Grown Ups,102,2010,Comedy,6/24/2010
tt0892769,How to Train Your Dragon,98,2010,"Animation, Action, Adventure, Family, Fantasy",3/18/2010
tt1375666,Inception,148,2010,"Action, Adventure, Sci-Fi, Thriller",7/8/2010


In [20]:
# Pull: Rating, Metascore, and BoxOffice from omdb
movies['Rating'] = ''
movies['Metascore'] = ''
movies['Box Office'] = ''
count=0

movies.index

Index(['tt1872181', 'tt1323594', 'tt1375670', 'tt0892769', 'tt1375666',
       'tt1228705', 'tt1250777', 'tt1245526', 'tt1130884', 'tt1104001',
       ...
       'tt1658801', 'tt1881109', 'tt3297330', 'tt3707106', 'tt3640424',
       'tt2322517', 'tt3450900', 'tt0775440', 'tt3748172', 'tt4425200'],
      dtype='object', name='IMDb ID', length=1564)

In [30]:
for imdb in movies.index:
    try:
        count += 1
        url = 'http://www.omdbapi.com/?i={0}&apikey=trilogy'.format(imdb)
        results = requests.get(url).json()
        movies.loc[imdb,'Rating'] = results['Rated']
        movies.loc[imdb,'Metascore'] = results['Metascore']
        movies.loc[imdb,'Box Office'] = results['BoxOffice']
        print(f'{imdb} processed: {count} / {len(movies.index)}')
        time.sleep(.1)
    except:
        print(f'Error processing {imdb}, skipping: {count} / {len(movies.index)}')

tt1872181 processed: 3135 / 1564
tt1323594 processed: 3136 / 1564
tt1375670 processed: 3137 / 1564
tt0892769 processed: 3138 / 1564
tt1375666 processed: 3139 / 1564
tt1228705 processed: 3140 / 1564
tt1250777 processed: 3141 / 1564
tt1245526 processed: 3142 / 1564
tt1130884 processed: 3143 / 1564
tt1104001 processed: 3144 / 1564
tt0435761 processed: 3145 / 1564
tt0780504 processed: 3146 / 1564
tt1596343 processed: 3147 / 1564
tt1201607 processed: 3148 / 1564
tt1219289 processed: 3149 / 1564
tt1298650 processed: 3150 / 1564
tt1270798 processed: 3151 / 1564
tt1568346 processed: 3152 / 1564
tt1454029 processed: 3153 / 1564
tt1637688 processed: 3154 / 1564
tt1232829 processed: 3155 / 1564
tt0948470 processed: 3156 / 1564
tt1605630 processed: 3157 / 1564
tt0848228 processed: 3158 / 1564
tt1440129 processed: 3159 / 1564
tt1194173 processed: 3160 / 1564
tt1217209 processed: 3161 / 1564
tt1259521 processed: 3162 / 1564
tt1790886 processed: 3163 / 1564
tt1371111 processed: 3164 / 1564
tt1345836 

tt0938283 processed: 3385 / 1564
tt1001526 processed: 3386 / 1564
tt1424381 processed: 3387 / 1564
tt0892791 processed: 3388 / 1564
tt0477080 processed: 3389 / 1564
tt1502404 processed: 3390 / 1564
tt0990407 processed: 3391 / 1564
tt0993842 processed: 3392 / 1564
tt1591095 processed: 3393 / 1564
tt1302011 processed: 3394 / 1564
tt1486185 processed: 3395 / 1564
tt1401152 processed: 3396 / 1564
tt1240982 processed: 3397 / 1564
tt1657507 processed: 3398 / 1564
tt0477302 processed: 3399 / 1564
tt1448755 processed: 3400 / 1564
tt1204342 processed: 3401 / 1564
tt0448694 processed: 3402 / 1564
tt0471042 processed: 3403 / 1564
tt1568911 processed: 3404 / 1564
tt1591479 processed: 3405 / 1564
tt1366365 processed: 3406 / 1564
tt1142977 processed: 3407 / 1564
tt1838544 processed: 3408 / 1564
tt1397514 processed: 3409 / 1564
tt1327194 processed: 3410 / 1564
tt1667353 processed: 3411 / 1564
tt2109184 processed: 3412 / 1564
tt0431021 processed: 3413 / 1564
tt1899353 processed: 3414 / 1564
tt2083383 

tt1555064 processed: 3634 / 1564
tt1321509 processed: 3635 / 1564
tt1075747 processed: 3636 / 1564
tt1477076 processed: 3637 / 1564
tt1302067 processed: 3638 / 1564
tt1270761 processed: 3639 / 1564
tt1305591 processed: 3640 / 1564
tt2306745 processed: 3641 / 1564
tt0359950 processed: 3642 / 1564
tt1413495 processed: 3643 / 1564
tt0835418 processed: 3644 / 1564
tt1641975 processed: 3645 / 1564
tt2136808 processed: 3646 / 1564
tt1655416 processed: 3647 / 1564
tt1928330 processed: 3648 / 1564
tt1815708 processed: 3649 / 1564
tt2112293 processed: 3650 / 1564
tt1975249 processed: 3651 / 1564
tt1435513 processed: 3652 / 1564
tt1742336 processed: 3653 / 1564
tt1606392 processed: 3654 / 1564
tt1723124 processed: 3655 / 1564
tt1441326 processed: 3656 / 1564
tt1440161 processed: 3657 / 1564
tt0997152 processed: 3658 / 1564
tt1464580 processed: 3659 / 1564
tt1529572 processed: 3660 / 1564
tt1423995 processed: 3661 / 1564
tt1440292 processed: 3662 / 1564
tt1742334 processed: 3663 / 1564
tt2771372 

tt1319704 processed: 3883 / 1564
tt1320291 processed: 3884 / 1564
tt1533013 processed: 3885 / 1564
tt0959329 processed: 3886 / 1564
tt1268987 processed: 3887 / 1564
tt1307873 processed: 3888 / 1564
tt1196204 processed: 3889 / 1564
tt0775489 processed: 3890 / 1564
tt2692904 processed: 3891 / 1564
tt1013860 processed: 3892 / 1564
tt1547230 processed: 3893 / 1564
tt1985019 processed: 3894 / 1564
tt1967545 processed: 3895 / 1564
tt1827354 processed: 3896 / 1564
tt1410051 processed: 3897 / 1564
tt1407065 processed: 3898 / 1564
tt1100051 processed: 3899 / 1564
tt1486190 processed: 3900 / 1564
tt1431181 processed: 3901 / 1564
tt1241017 processed: 3902 / 1564
tt1679332 processed: 3903 / 1564
tt1153546 processed: 3904 / 1564
tt1173687 processed: 3905 / 1564
tt1629242 processed: 3906 / 1564
tt2326612 processed: 3907 / 1564
tt1462901 processed: 3908 / 1564
tt2204379 processed: 3909 / 1564
tt1884318 processed: 3910 / 1564
tt1730687 processed: 3911 / 1564
tt1833879 processed: 3912 / 1564
tt1740047 

tt1602472 processed: 4132 / 1564
tt1757742 processed: 4133 / 1564
tt0893412 processed: 4134 / 1564
tt1612774 processed: 4135 / 1564
tt1592873 processed: 4136 / 1564
tt1720616 processed: 4137 / 1564
tt1068242 processed: 4138 / 1564
tt1598828 processed: 4139 / 1564
tt1175709 processed: 4140 / 1564
tt1758692 processed: 4141 / 1564
tt1384927 processed: 4142 / 1564
tt1411238 processed: 4143 / 1564
tt1648208 processed: 4144 / 1564
tt1527788 processed: 4145 / 1564
tt1588170 processed: 4146 / 1564
tt1614989 processed: 4147 / 1564
tt1885265 processed: 4148 / 1564
tt1855401 processed: 4149 / 1564
tt1204340 processed: 4150 / 1564
tt1452628 processed: 4151 / 1564
tt1403988 processed: 4152 / 1564
tt1220888 processed: 4153 / 1564
tt1787759 processed: 4154 / 1564
tt1682940 processed: 4155 / 1564
tt1183923 processed: 4156 / 1564
tt1606390 processed: 4157 / 1564
tt1653700 processed: 4158 / 1564
tt1582271 processed: 4159 / 1564
tt1456060 processed: 4160 / 1564
tt1605777 processed: 4161 / 1564
tt1356864 

tt2448374 processed: 4378 / 1564
tt2641874 processed: 4379 / 1564
tt1684927 processed: 4380 / 1564
tt1783413 processed: 4381 / 1564
tt1709157 processed: 4382 / 1564
tt1724982 processed: 4383 / 1564
tt2024354 processed: 4384 / 1564
tt1790658 processed: 4385 / 1564
tt2236182 processed: 4386 / 1564
tt1733125 processed: 4387 / 1564
tt1935749 processed: 4388 / 1564
tt2282719 processed: 4389 / 1564
tt1772373 processed: 4390 / 1564
tt1637652 processed: 4391 / 1564
tt2510998 processed: 4392 / 1564
tt2170371 processed: 4393 / 1564
tt1699225 processed: 4394 / 1564
tt2151885 processed: 4395 / 1564
tt1744825 processed: 4396 / 1564
tt1724572 processed: 4397 / 1564
tt1535565 processed: 4398 / 1564
tt2100380 processed: 4399 / 1564
tt1825918 processed: 4400 / 1564
tt2305700 processed: 4401 / 1564
tt1117646 processed: 4402 / 1564
tt1714196 processed: 4403 / 1564
tt2086853 processed: 4404 / 1564
tt1691338 processed: 4405 / 1564
tt2049576 processed: 4406 / 1564
tt2342339 processed: 4407 / 1564
tt2381287 

tt4302938 processed: 4625 / 1564
tt3152624 processed: 4626 / 1564
tt3168230 processed: 4627 / 1564
tt3715320 processed: 4628 / 1564
tt1856101 processed: 4629 / 1564
tt1758810 processed: 4630 / 1564
tt1179933 processed: 4631 / 1564
tt1365519 processed: 4632 / 1564
tt1126590 processed: 4633 / 1564
tt1316546 processed: 4634 / 1564
tt0437086 processed: 4635 / 1564
Error processing tt1399664, skipping: 4636 / 1564
tt1226837 processed: 4637 / 1564
tt1648190 processed: 4638 / 1564
tt1846589 processed: 4639 / 1564
tt2141773 processed: 4640 / 1564
tt2378507 processed: 4641 / 1564
tt1366338 processed: 4642 / 1564
tt1798603 processed: 4643 / 1564
tt0825283 processed: 4644 / 1564
Error processing tt1074206, skipping: 4645 / 1564
tt0491203 processed: 4646 / 1564
tt1502407 processed: 4647 / 1564
tt0460890 processed: 4648 / 1564
tt1517451 processed: 4649 / 1564
tt1791528 processed: 4650 / 1564
tt0491175 processed: 4651 / 1564
tt0837156 processed: 4652 / 1564
tt1273221 processed: 4653 / 1564
tt1374989

In [31]:
movies.head()

Unnamed: 0_level_0,Title,Runtime (mins),Year,Genres,Release Date,Rating,Metascore,Box Office
IMDb ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
tt1872181,The Amazing Spider-Man 2,142,2014,"Action, Adventure, Sci-Fi",4/10/2014,PG-13,53,"$183,277,573"
tt1323594,Despicable Me,95,2010,"Animation, Comedy, Family, Fantasy",6/20/2010,PG,72,"$251,476,985"
tt1375670,Grown Ups,102,2010,Comedy,6/24/2010,PG-13,30,"$162,001,186"
tt0892769,How to Train Your Dragon,98,2010,"Animation, Action, Adventure, Family, Fantasy",3/18/2010,PG,74,"$216,900,000"
tt1375666,Inception,148,2010,"Action, Adventure, Sci-Fi, Thriller",7/8/2010,PG-13,74,"$292,568,851"


In [None]:
#Clean the genre column

In [None]:
#Critics Ratings

#subquery

#Scatterplots to visualize correlation

#test for significance 

In [None]:
#Genres

#Paredo bar Chart to identify top Genres

#Plot changes over time with a Line graph

In [None]:
#MPAA Rating

#Paredo bar Chart

In [None]:
#Runtime

#Scatterplot

#test for significance 