In [1]:
#Import Dependencies
import pandas as pd
import numpy as np
import requests
import time

In [2]:
#Read csv with proper encoding
path = "U.S. Released Movies_ 1972-2016.csv"
movies = pd.read_csv(path, encoding='latin1')
movies.head()

Unnamed: 0,Position,Const,Created,Modified,Description,Title,URL,Title Type,IMDb Rating,Runtime (mins),Year,Genres,Num Votes,Release Date,Directors
0,1,tt0110912,4/23/2013,4/23/2013,,Pulp Fiction,https://www.imdb.com/title/tt0110912/,movie,8.9,154.0,1994.0,"Crime, Drama",1607823.0,5/21/1994,Quentin Tarantino
1,2,tt1872181,4/23/2013,4/23/2013,,The Amazing Spider-Man 2,https://www.imdb.com/title/tt1872181/,movie,6.6,142.0,2014.0,"Action, Adventure, Sci-Fi",381550.0,4/10/2014,Marc Webb
2,3,tt0111161,4/23/2013,4/23/2013,,The Shawshank Redemption,https://www.imdb.com/title/tt0111161/,movie,9.3,142.0,1994.0,Drama,2057262.0,9/10/1994,Frank Darabont
3,4,tt0076759,4/23/2013,4/23/2013,,Star Wars,https://www.imdb.com/title/tt0076759/,movie,8.6,121.0,1977.0,"Action, Adventure, Fantasy, Sci-Fi",1102354.0,5/25/1977,George Lucas
4,5,tt0088763,4/23/2013,4/23/2013,,Back to the Future,https://www.imdb.com/title/tt0088763/,movie,8.5,116.0,1985.0,"Adventure, Comedy, Sci-Fi",915281.0,7/3/1985,Robert Zemeckis


In [3]:
#Extract relevant columns
movies = movies[['Const', 'Title', 'Runtime (mins)', 'Year', 'Genres', 'Release Date']]
movies.head()

Unnamed: 0,Const,Title,Runtime (mins),Year,Genres,Release Date
0,tt0110912,Pulp Fiction,154.0,1994.0,"Crime, Drama",5/21/1994
1,tt1872181,The Amazing Spider-Man 2,142.0,2014.0,"Action, Adventure, Sci-Fi",4/10/2014
2,tt0111161,The Shawshank Redemption,142.0,1994.0,Drama,9/10/1994
3,tt0076759,Star Wars,121.0,1977.0,"Action, Adventure, Fantasy, Sci-Fi",5/25/1977
4,tt0088763,Back to the Future,116.0,1985.0,"Adventure, Comedy, Sci-Fi",7/3/1985


In [4]:
#Rename Columns
movies = movies.rename(columns= {'Const': 'IMDb ID'})
movies.head()

Unnamed: 0,IMDb ID,Title,Runtime (mins),Year,Genres,Release Date
0,tt0110912,Pulp Fiction,154.0,1994.0,"Crime, Drama",5/21/1994
1,tt1872181,The Amazing Spider-Man 2,142.0,2014.0,"Action, Adventure, Sci-Fi",4/10/2014
2,tt0111161,The Shawshank Redemption,142.0,1994.0,Drama,9/10/1994
3,tt0076759,Star Wars,121.0,1977.0,"Action, Adventure, Fantasy, Sci-Fi",5/25/1977
4,tt0088763,Back to the Future,116.0,1985.0,"Adventure, Comedy, Sci-Fi",7/3/1985


In [5]:
#Drop rows with incomplete data
movies = movies.dropna()

#Exract rows from years 2010 and later
movies = movies[movies.iloc[:,3] >= 2010]
movies = movies.set_index(['IMDb ID'])
movies.head()

Unnamed: 0_level_0,Title,Runtime (mins),Year,Genres,Release Date
IMDb ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
tt1872181,The Amazing Spider-Man 2,142.0,2014.0,"Action, Adventure, Sci-Fi",4/10/2014
tt1323594,Despicable Me,95.0,2010.0,"Animation, Comedy, Family, Fantasy",6/20/2010
tt1375670,Grown Ups,102.0,2010.0,Comedy,6/24/2010
tt0892769,How to Train Your Dragon,98.0,2010.0,"Animation, Action, Adventure, Family, Fantasy",3/18/2010
tt1375666,Inception,148.0,2010.0,"Action, Adventure, Sci-Fi, Thriller",7/8/2010


In [6]:
#Change Year and Runtime to integers
movies[['Year','Runtime (mins)']] = movies[['Year','Runtime (mins)']].applymap(np.int64)
movies.head()

Unnamed: 0_level_0,Title,Runtime (mins),Year,Genres,Release Date
IMDb ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
tt1872181,The Amazing Spider-Man 2,142,2014,"Action, Adventure, Sci-Fi",4/10/2014
tt1323594,Despicable Me,95,2010,"Animation, Comedy, Family, Fantasy",6/20/2010
tt1375670,Grown Ups,102,2010,Comedy,6/24/2010
tt0892769,How to Train Your Dragon,98,2010,"Animation, Action, Adventure, Family, Fantasy",3/18/2010
tt1375666,Inception,148,2010,"Action, Adventure, Sci-Fi, Thriller",7/8/2010


In [7]:
count = 0

for imdb in movies.index:
    url = f'http://www.omdbapi.com/?i={imdb}&apikey=trilogy'
    results = requests.get(url).json()
    try:
        count += 1
        movies.loc[imdb,'Rating'] = results['Rated']
        movies.loc[imdb,'Metascore'] = results['Metascore']
        movies.loc[imdb,'Box Office'] = results['BoxOffice']
        print(f'{imdb} processed: {count} / {len(movies)}')
        time.sleep(.1)
    except:
        print(f'Error processing {imdb}, skipping: {count} / {len(movies)}')
        

tt1872181 processed: 1 / 1564
tt1323594 processed: 2 / 1564
tt1375670 processed: 3 / 1564
tt0892769 processed: 4 / 1564
tt1375666 processed: 5 / 1564
tt1228705 processed: 6 / 1564
tt1250777 processed: 7 / 1564
tt1245526 processed: 8 / 1564
tt1130884 processed: 9 / 1564
tt1104001 processed: 10 / 1564
tt0435761 processed: 11 / 1564
tt0780504 processed: 12 / 1564
tt1596343 processed: 13 / 1564
tt1201607 processed: 14 / 1564
tt1219289 processed: 15 / 1564
tt1298650 processed: 16 / 1564
tt1270798 processed: 17 / 1564
tt1568346 processed: 18 / 1564
tt1454029 processed: 19 / 1564
tt1637688 processed: 20 / 1564
tt1232829 processed: 21 / 1564
tt0948470 processed: 22 / 1564
tt1605630 processed: 23 / 1564
tt0848228 processed: 24 / 1564
tt1440129 processed: 25 / 1564
tt1194173 processed: 26 / 1564
tt1217209 processed: 27 / 1564
tt1259521 processed: 28 / 1564
tt1790886 processed: 29 / 1564
tt1371111 processed: 30 / 1564
tt1345836 processed: 31 / 1564
tt1077368 processed: 32 / 1564
tt1853728 process

tt1401152 processed: 262 / 1564
tt1240982 processed: 263 / 1564
tt1657507 processed: 264 / 1564
tt0477302 processed: 265 / 1564
tt1448755 processed: 266 / 1564
tt1204342 processed: 267 / 1564
tt0448694 processed: 268 / 1564
tt0471042 processed: 269 / 1564
tt1568911 processed: 270 / 1564
tt1591479 processed: 271 / 1564
tt1366365 processed: 272 / 1564
tt1142977 processed: 273 / 1564
tt1838544 processed: 274 / 1564
tt1397514 processed: 275 / 1564
tt1327194 processed: 276 / 1564
tt1667353 processed: 277 / 1564
tt2109184 processed: 278 / 1564
tt0431021 processed: 279 / 1564
tt1899353 processed: 280 / 1564
tt2083383 processed: 281 / 1564
tt1496025 processed: 282 / 1564
tt1596365 processed: 283 / 1564
tt1959332 processed: 284 / 1564
tt1213663 processed: 285 / 1564
tt1645155 processed: 286 / 1564
tt1132449 processed: 287 / 1564
tt1990314 processed: 288 / 1564
tt1999995 processed: 289 / 1564
tt1308729 processed: 290 / 1564
tt0404978 processed: 291 / 1564
tt1748179 processed: 292 / 1564
tt176418

tt1742336 processed: 519 / 1564
tt1606392 processed: 520 / 1564
tt1723124 processed: 521 / 1564
tt1441326 processed: 522 / 1564
tt1440161 processed: 523 / 1564
tt0997152 processed: 524 / 1564
tt1464580 processed: 525 / 1564
tt1529572 processed: 526 / 1564
tt1423995 processed: 527 / 1564
tt1440292 processed: 528 / 1564
tt1742334 processed: 529 / 1564
tt2771372 processed: 530 / 1564
tt1273678 processed: 531 / 1564
tt0808510 processed: 532 / 1564
tt1666186 processed: 533 / 1564
tt1032751 processed: 534 / 1564
tt1464174 processed: 535 / 1564
tt1604171 processed: 536 / 1564
tt1449283 processed: 537 / 1564
tt1767382 processed: 538 / 1564
tt1630036 processed: 539 / 1564
tt1517489 processed: 540 / 1564
tt1870529 processed: 541 / 1564
tt1714206 processed: 542 / 1564
tt1160996 processed: 543 / 1564
tt0455323 processed: 544 / 1564
tt1990216 processed: 545 / 1564
tt1438173 processed: 546 / 1564
tt2061712 processed: 547 / 1564
tt1867093 processed: 548 / 1564
tt2119474 processed: 549 / 1564
tt188649

tt1884318 processed: 776 / 1564
tt1730687 processed: 777 / 1564
tt1833879 processed: 778 / 1564
tt1740047 processed: 779 / 1564
tt1386925 processed: 780 / 1564
tt1196340 processed: 781 / 1564
tt1390535 processed: 782 / 1564
tt1381505 processed: 783 / 1564
tt1518812 processed: 784 / 1564
tt1409004 processed: 785 / 1564
tt1130969 processed: 786 / 1564
tt1106860 processed: 787 / 1564
tt1726592 processed: 788 / 1564
tt1937388 processed: 789 / 1564
tt1709652 processed: 790 / 1564
tt1838722 processed: 791 / 1564
tt1414378 processed: 792 / 1564
tt1575539 processed: 793 / 1564
tt1125929 processed: 794 / 1564
tt1092634 processed: 795 / 1564
tt1278379 processed: 796 / 1564
tt1411232 processed: 797 / 1564
tt1600524 processed: 798 / 1564
tt1325723 processed: 799 / 1564
tt1233192 processed: 800 / 1564
tt2112277 processed: 801 / 1564
tt2062661 processed: 802 / 1564
tt1300159 processed: 803 / 1564
tt1563704 processed: 804 / 1564
tt1038685 processed: 805 / 1564
tt1407049 processed: 806 / 1564
tt119613

tt1232838 processed: 1032 / 1564
tt1462054 processed: 1033 / 1564
tt1758575 processed: 1034 / 1564
tt1555149 processed: 1035 / 1564
tt2011971 processed: 1036 / 1564
tt0850677 processed: 1037 / 1564
tt1447793 processed: 1038 / 1564
tt1613062 processed: 1039 / 1564
tt1728196 processed: 1040 / 1564
tt1646974 processed: 1041 / 1564
tt1278449 processed: 1042 / 1564
tt1650453 processed: 1043 / 1564
tt1303803 processed: 1044 / 1564
tt1535491 processed: 1045 / 1564
tt1583753 processed: 1046 / 1564
tt1692500 processed: 1047 / 1564
tt2521086 processed: 1048 / 1564
tt1830497 processed: 1049 / 1564
tt1303235 processed: 1050 / 1564
tt2009538 processed: 1051 / 1564
tt1285016 processed: 1052 / 1564
tt1675434 processed: 1053 / 1564
tt0975645 processed: 1054 / 1564
tt1210166 processed: 1055 / 1564
tt1655420 processed: 1056 / 1564
tt1979320 processed: 1057 / 1564
tt1542344 processed: 1058 / 1564
tt0964517 processed: 1059 / 1564
tt0993846 processed: 1060 / 1564
tt1426329 processed: 1061 / 1564
tt1571222 

Error processing tt2670492, skipping: 1278 / 1564
tt2374835 processed: 1279 / 1564
tt1717578 processed: 1280 / 1564
tt1747960 processed: 1281 / 1564
tt1087470 processed: 1282 / 1564
tt1591585 processed: 1283 / 1564
tt2426110 processed: 1284 / 1564
tt1632679 processed: 1285 / 1564
tt1719681 processed: 1286 / 1564
tt1844811 processed: 1287 / 1564
tt2345525 processed: 1288 / 1564
tt1684934 processed: 1289 / 1564
tt1705115 processed: 1290 / 1564
tt2137742 processed: 1291 / 1564
tt1595411 processed: 1292 / 1564
tt1831611 processed: 1293 / 1564
tt1685518 processed: 1294 / 1564
tt2302925 processed: 1295 / 1564
tt2246779 processed: 1296 / 1564
tt1844641 processed: 1297 / 1564
Error processing tt0944947, skipping: 1298 / 1564
tt1392190 processed: 1299 / 1564
tt1945228 processed: 1300 / 1564
tt0478970 processed: 1301 / 1564
tt1289401 processed: 1302 / 1564
tt1825683 processed: 1303 / 1564
tt2094766 processed: 1304 / 1564
tt1340138 processed: 1305 / 1564
tt1029360 processed: 1306 / 1564
tt1964418

tt1564777 processed: 1524 / 1564
tt2180351 processed: 1525 / 1564
tt2402101 processed: 1526 / 1564
tt0835775 processed: 1527 / 1564
tt2017486 processed: 1528 / 1564
tt1837636 processed: 1529 / 1564
tt2179136 processed: 1530 / 1564
tt1355644 processed: 1531 / 1564
tt1881002 processed: 1532 / 1564
tt1469304 processed: 1533 / 1564
tt2402927 processed: 1534 / 1564
tt2374684 processed: 1535 / 1564
tt1703957 processed: 1536 / 1564
tt2494376 processed: 1537 / 1564
Error processing tt2879552, skipping: 1538 / 1564
tt2062700 processed: 1539 / 1564
tt2671706 processed: 1540 / 1564
tt2788710 processed: 1541 / 1564
tt1754656 processed: 1542 / 1564
tt2802144 processed: 1543 / 1564
tt2397535 processed: 1544 / 1564
tt3353060 processed: 1545 / 1564
tt2884018 processed: 1546 / 1564
tt2937898 processed: 1547 / 1564
tt3397884 processed: 1548 / 1564
tt2180411 processed: 1549 / 1564
tt0790770 processed: 1550 / 1564
tt3829266 processed: 1551 / 1564
tt2494362 processed: 1552 / 1564
tt3289728 processed: 1553 

In [8]:
movies

Unnamed: 0_level_0,Title,Runtime (mins),Year,Genres,Release Date,Rating,Metascore,Box Office
IMDb ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
tt1872181,The Amazing Spider-Man 2,142,2014,"Action, Adventure, Sci-Fi",4/10/2014,PG-13,53,"$183,277,573"
tt1323594,Despicable Me,95,2010,"Animation, Comedy, Family, Fantasy",6/20/2010,PG,72,"$251,476,985"
tt1375670,Grown Ups,102,2010,Comedy,6/24/2010,PG-13,30,"$162,001,186"
tt0892769,How to Train Your Dragon,98,2010,"Animation, Action, Adventure, Family, Fantasy",3/18/2010,PG,74,"$216,900,000"
tt1375666,Inception,148,2010,"Action, Adventure, Sci-Fi, Thriller",7/8/2010,PG-13,74,"$292,568,851"
tt1228705,Iron Man 2,124,2010,"Action, Adventure, Sci-Fi",4/26/2010,PG-13,57,"$312,057,433"
tt1250777,Kick-Ass,117,2010,"Action, Comedy",3/12/2010,R,66,"$20,000,000"
tt1245526,RED,111,2010,"Action, Comedy, Crime, Thriller",9/29/2010,PG-13,60,"$88,900,000"
tt1130884,Shutter Island,138,2010,"Mystery, Thriller",2/13/2010,R,63,"$125,001,000"
tt1104001,Tron,125,2010,"Action, Adventure, Fantasy, Sci-Fi",11/30/2010,PG,49,"$172,051,787"


In [9]:
movies = movies.dropna()
movies = movies[~movies['Box Office'].str.contains("N/A")]
movies = movies[~movies['Rating'].str.contains("N/A")]
movies = movies[~movies['Metascore'].str.contains("N/A")]

ValueError: invalid literal for int() with base 10: '181591.00'

In [10]:
movies['Box Office'] = movies[movies.columns[7:]].replace('[\$,]', '', regex=True).astype(float)
movies['Box Office'] = movies['Box Office'].astype(int)

In [12]:
movies

Unnamed: 0_level_0,Title,Runtime (mins),Year,Genres,Release Date,Rating,Metascore,Box Office
IMDb ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
tt1872181,The Amazing Spider-Man 2,142,2014,"Action, Adventure, Sci-Fi",4/10/2014,PG-13,53,183277573
tt1323594,Despicable Me,95,2010,"Animation, Comedy, Family, Fantasy",6/20/2010,PG,72,251476985
tt1375670,Grown Ups,102,2010,Comedy,6/24/2010,PG-13,30,162001186
tt0892769,How to Train Your Dragon,98,2010,"Animation, Action, Adventure, Family, Fantasy",3/18/2010,PG,74,216900000
tt1375666,Inception,148,2010,"Action, Adventure, Sci-Fi, Thriller",7/8/2010,PG-13,74,292568851
tt1228705,Iron Man 2,124,2010,"Action, Adventure, Sci-Fi",4/26/2010,PG-13,57,312057433
tt1250777,Kick-Ass,117,2010,"Action, Comedy",3/12/2010,R,66,20000000
tt1245526,RED,111,2010,"Action, Comedy, Crime, Thriller",9/29/2010,PG-13,60,88900000
tt1130884,Shutter Island,138,2010,"Mystery, Thriller",2/13/2010,R,63,125001000
tt1104001,Tron,125,2010,"Action, Adventure, Fantasy, Sci-Fi",11/30/2010,PG,49,172051787
