In [1]:
# Library imports
import requests
import json
import pandas as pd
from movies_io import *
import time

In [3]:
# Demonstration of API calls and scrapes for a single movie
tmdb_id = 475303
imdb_id = get_imdb_ID(tmdb_id)
movie_details = get_movie_details(tmdb_id)
box_office = get_box_office(imdb_id)
imdb_info = get_imdb_info(imdb_id)

Scraping: https://api.themoviedb.org/3/movie/475303?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt7139936
Getting IMDb information on: https://www.imdb.com/title/tt7139936




In [4]:
# Output of API calls and scrapes for a single movie
print(tmdb_id)
print(imdb_id)
print(movie_details)
print(box_office)
print(imdb_info)

475303
tt7139936
('A Rainy Day in New York', '2019-07-26', 92, 0, 'en', ['Gravier Productions', 'Perdido Productions', 'FilmNation Entertainment'], ['Comedy', 'Romance'])
['–', '$20,810,600', '$20,810,600']
['48', '6.6', '15,896', '349', 'PG-13', 'Woody Allen', ['Woody Allen'], ['Timothée Chalamet', 'Elle Fanning', 'Liev Schreiber', 'Suzanne Smith']]


In [3]:
# Creation of blank dataframe. Scrapes and API calls will eventually populate data directly into the dataframe
movies_df = pd.DataFrame(columns=['tmdb_id', 'imdb_id', 'title', 'runtime', 'release_date', 'budget', 'language', 'studios', 'genres', 'domestic_gross', 'international_gross', 'worldwide_gross', 'metacritic_score', 'average_imdb_vote', 'imdb_vote_count', 'imdb_popularity', 'content_rating', 'director', 'creator', 'actor'])

In [87]:
# Generating a list of URLs to access TMDB's "Top Rated" section 
urls_320_340 = []
for i in range(320, 340):
    urls_320_340.append('https://api.themoviedb.org/3/movie/top_rated?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US&page=' + str(i))
# We eventually exhausted the Top Rated movie list after ~340 pages

['https://api.themoviedb.org/3/movie/top_rated?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US&page=320', 'https://api.themoviedb.org/3/movie/top_rated?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US&page=321', 'https://api.themoviedb.org/3/movie/top_rated?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US&page=322', 'https://api.themoviedb.org/3/movie/top_rated?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US&page=323', 'https://api.themoviedb.org/3/movie/top_rated?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US&page=324', 'https://api.themoviedb.org/3/movie/top_rated?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US&page=325', 'https://api.themoviedb.org/3/movie/top_rated?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US&page=326', 'https://api.themoviedb.org/3/movie/top_rated?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US&page=327', 'https://api.themoviedb.org/3/movie/top_rated?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language

In [88]:
# Generating a list of lists of TMDB IDs. Each call to a "Top Rated" page results in a list of IDs
tmdb_ids = []
for url in urls_320_340:
    tmdb_ids.append(get_tmdb_ids(url))
    time.sleep(.5)

In [89]:
# For each list of lists of IDs, call each function for the API call and webscrape, then append directly into the dataframe
for x in tmdb_ids:
    for y in x:
        tmdb_id = y
        imdb_id = get_imdb_ID(tmdb_id)
        if imdb_id and imdb_id != 0:
            movie_details = get_movie_details(tmdb_id)
            box_office = get_box_office(imdb_id)
            imdb_info = get_imdb_info(imdb_id)
            time.sleep(.5)
            movies_df = movies_df.append({
                'tmdb_id': tmdb_id,
                'imdb_id': imdb_id,
                'title': movie_details[0],
                'runtime': movie_details[2],
                'release_date': movie_details[1],
                'budget': movie_details[3],
                'language': movie_details[4],
                'studios': movie_details[5],
                'genres': movie_details[6],
                'domestic_gross': box_office[0],
                'international_gross': box_office[1],
                'worldwide_gross': box_office[2],
                'metacritic_score': imdb_info[0],
                'average_imdb_vote': imdb_info[1],
                'imdb_vote_count': imdb_info[2],
                'imdb_popularity': imdb_info[3],
                'content_rating': imdb_info[4],
                'director': imdb_info[5],
                'creator': imdb_info[6],
                'actor': imdb_info[7]}, ignore_index=True)
# This entire process is repeated for multiple pages


Scraping: https://api.themoviedb.org/3/movie/203835?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt1935897
Getting IMDb information on: https://www.imdb.com/title/tt1935897
Scraping: https://api.themoviedb.org/3/movie/10885?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt0482599
Getting IMDb information on: https://www.imdb.com/title/tt0482599
Scraping: https://api.themoviedb.org/3/movie/19824?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt1276434
Getting IMDb information on: https://www.imdb.com/title/tt1276434
Scraping: https://api.themoviedb.org/3/movie/612152?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt9419834
Getting IMDb information on: https://www.imdb.co

Getting IMDb information on: https://www.imdb.com/title/tt2474976
Scraping: https://api.themoviedb.org/3/movie/127493?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt1656186
Getting IMDb information on: https://www.imdb.com/title/tt1656186
Scraping: https://api.themoviedb.org/3/movie/290637?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt3733778
Getting IMDb information on: https://www.imdb.com/title/tt3733778
Scraping: https://api.themoviedb.org/3/movie/19840?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt1032815
Getting IMDb information on: https://www.imdb.com/title/tt1032815
Scraping: https://api.themoviedb.org/3/movie/171424?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.c

Scraping: https://api.themoviedb.org/3/movie/20069?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt0847745
Getting IMDb information on: https://www.imdb.com/title/tt0847745
Scraping: https://api.themoviedb.org/3/movie/122857?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt1659343
Getting IMDb information on: https://www.imdb.com/title/tt1659343
Scraping: https://api.themoviedb.org/3/movie/10012?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt0257516
Getting IMDb information on: https://www.imdb.com/title/tt0257516
Scraping: https://api.themoviedb.org/3/movie/201088?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt2717822
Getting IMDb information on: https://www.imdb.co

Getting IMDb information on: https://www.imdb.com/title/tt0331468
Scraping: https://api.themoviedb.org/3/movie/13493?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt0997047
Getting IMDb information on: https://www.imdb.com/title/tt0997047
Scraping: https://api.themoviedb.org/3/movie/186759?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt1468846
Getting IMDb information on: https://www.imdb.com/title/tt1468846
Scraping: https://api.themoviedb.org/3/movie/9796?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt0454970
Getting IMDb information on: https://www.imdb.com/title/tt0454970
Scraping: https://api.themoviedb.org/3/movie/346910?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com

Getting IMDb information on: https://www.imdb.com/title/tt1472584
Scraping: https://api.themoviedb.org/3/movie/33107?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt1391034
Getting IMDb information on: https://www.imdb.com/title/tt1391034
Scraping: https://api.themoviedb.org/3/movie/277355?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt1945084
Getting IMDb information on: https://www.imdb.com/title/tt1945084
Scraping: https://api.themoviedb.org/3/movie/146301?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt2473510
Getting IMDb information on: https://www.imdb.com/title/tt2473510
Scraping: https://api.themoviedb.org/3/movie/354282?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.c

Scraping: https://api.themoviedb.org/3/movie/37430?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt0816462
Getting IMDb information on: https://www.imdb.com/title/tt0816462
Scraping: https://api.themoviedb.org/3/movie/42425?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt0115446
Getting IMDb information on: https://www.imdb.com/title/tt0115446
Scraping: https://api.themoviedb.org/3/movie/11375?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt0329717
Getting IMDb information on: https://www.imdb.com/title/tt0329717
Scraping: https://api.themoviedb.org/3/movie/13121?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt0780622
Getting IMDb information on: https://www.imdb.com/

Getting IMDb information on: https://www.imdb.com/title/tt0337689
Scraping: https://api.themoviedb.org/3/movie/11231?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt0110657
Getting IMDb information on: https://www.imdb.com/title/tt0110657
Scraping: https://api.themoviedb.org/3/movie/102207?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt1723642
Getting IMDb information on: https://www.imdb.com/title/tt1723642
Scraping: https://api.themoviedb.org/3/movie/11237?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt0366174
Getting IMDb information on: https://www.imdb.com/title/tt0366174
Scraping: https://api.themoviedb.org/3/movie/42888?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com

Scraping: https://api.themoviedb.org/3/movie/10588?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt0312528
Getting IMDb information on: https://www.imdb.com/title/tt0312528
Scraping: https://api.themoviedb.org/3/movie/9731?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt0089173
Getting IMDb information on: https://www.imdb.com/title/tt0089173
Scraping: https://api.themoviedb.org/3/movie/277558?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt2124787
Getting IMDb information on: https://www.imdb.com/title/tt2124787
Scraping: https://api.themoviedb.org/3/movie/210908?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt2527186
Getting IMDb information on: https://www.imdb.com

Getting IMDb information on: https://www.imdb.com/title/tt1288637
Scraping: https://api.themoviedb.org/3/movie/47933?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt1628841
Getting IMDb information on: https://www.imdb.com/title/tt1628841
Scraping: https://api.themoviedb.org/3/movie/475210?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt5246700
Getting IMDb information on: https://www.imdb.com/title/tt5246700
Scraping: https://api.themoviedb.org/3/movie/11158?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt0104437
Getting IMDb information on: https://www.imdb.com/title/tt0104437
Scraping: https://api.themoviedb.org/3/movie/9612?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/

Scraping: https://api.themoviedb.org/3/movie/133469?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt1850394
Getting IMDb information on: https://www.imdb.com/title/tt1850394
Scraping: https://api.themoviedb.org/3/movie/8814?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt0419706
Getting IMDb information on: https://www.imdb.com/title/tt0419706
Scraping: https://api.themoviedb.org/3/movie/10032?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt0480271
Getting IMDb information on: https://www.imdb.com/title/tt0480271
Scraping: https://api.themoviedb.org/3/movie/9713?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt0120701
Getting IMDb information on: https://www.imdb.com/t

Getting IMDb information on: https://www.imdb.com/title/tt0110216
Scraping: https://api.themoviedb.org/3/movie/481375?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt2386237
Getting IMDb information on: https://www.imdb.com/title/tt2386237
Scraping: https://api.themoviedb.org/3/movie/50357?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt1772240
Getting IMDb information on: https://www.imdb.com/title/tt1772240
Scraping: https://api.themoviedb.org/3/movie/193612?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt2235779
Getting IMDb information on: https://www.imdb.com/title/tt2235779
Scraping: https://api.themoviedb.org/3/movie/301608?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.c

Scraping: https://api.themoviedb.org/3/movie/6589?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt0467110
Getting IMDb information on: https://www.imdb.com/title/tt0467110
Scraping: https://api.themoviedb.org/3/movie/12158?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt0114825
Getting IMDb information on: https://www.imdb.com/title/tt0114825
Scraping: https://api.themoviedb.org/3/movie/29426?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt1134854
Getting IMDb information on: https://www.imdb.com/title/tt1134854
Scraping: https://api.themoviedb.org/3/movie/6116?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt0120185
Getting IMDb information on: https://www.imdb.com/ti

Getting IMDb information on: https://www.imdb.com/title/tt1564585
Scraping: https://api.themoviedb.org/3/movie/33282?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt1504362
Getting IMDb information on: https://www.imdb.com/title/tt1504362
Scraping: https://api.themoviedb.org/3/movie/3093?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt0430912
Getting IMDb information on: https://www.imdb.com/title/tt0430912
Scraping: https://api.themoviedb.org/3/movie/241254?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/title/tt1085492
Getting IMDb information on: https://www.imdb.com/title/tt1085492
Scraping: https://api.themoviedb.org/3/movie/10283?api_key=d34a7ae8e00fac4590a4aee2a6e5d4a5&language=en-US
Getting box office information on: https://www.boxofficemojo.com/

In [92]:
# Verification of dataframe size
movies_df.shape

(6759, 20)

In [91]:
# Save the dataframe down to a CSV
movies_df.to_csv('dataframe.csv')