# Practicing TMDB API Calls

*Christina Brockway*

## Imports

In [1]:
#Import packages
import os, time, json
import tmdbsimple as tmdb
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm_notebook




## Create Folder


In [2]:
#Create the folder for saving files
FOLDER = 'MovieData/'
os.makedirs(FOLDER, exist_ok = True)
os.listdir (FOLDER)

['.ipynb_checkpoints',
 'basics-filter.csv',
 'ratings-filter.csv',
 'tmdb_api_results 2010.json']

## Load API Key

In [3]:

with open('/Users/csbro/.secret/tmdb_api.json', 'r') as f:
    login = json.load(f)
login.keys()

dict_keys(['api_key'])

In [4]:
tmdb.API_KEY = login['api_key']

## Define Functions

In [5]:
def get_movie_with_rating(movie_id):
    #Get movie object using movie_id
    movie= tmdb.Movies(movie_id)
    #Save the dictionaries 
    movie_info = movie.info()
    releases = movie.releases()
    #Loop through countries for only US
    for c in releases['countries']:
        if c['iso_3166_1'] == 'US':
            movie_info['certification']= c['certification']
    return movie_info


In [6]:
def write_json(new_data, filename):
    """Appends a list of records (new_data) into a json file (filename).
    Adapted from: https://www.geeksforgeeks.org/append-to-json-file-using-python/"""

    with open(filename, 'r+') as file:
        #Load existing data into dictionary
        file_data = json.load(file)
        #choose to extend or append
        if (type(new_data) == list) & (type(file_data) == list):
            file_data.extend(new_data)
        else:
            file_data.append(new_data)
        #set file's current position at offset
        file.seek(0)
        #convert back to json
        json.dump(file_data, file)

## Confirm API Function works

In [7]:
test= ["tt0848228", "tt0332280"]
results= []
for movie_id in test:
    movie_info = get_movie_with_rating(movie_id)
    results.append(movie_info)
pd.DataFrame(results)

Unnamed: 0,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,...,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,certification
0,False,/9BBTo63ANSmhC4e6r62OJFuK2GL.jpg,"{'id': 86311, 'name': 'The Avengers Collection...",220000000,"[{'id': 878, 'name': 'Science Fiction'}, {'id'...",https://www.marvel.com/movies/the-avengers,24428,tt0848228,en,The Avengers,...,1518815515,143,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Some assembly required.,The Avengers,False,7.71,29274,PG-13
1,False,/qom1SZSENdmHFNZBXbtJAU0WTlC.jpg,,29000000,"[{'id': 10749, 'name': 'Romance'}, {'id': 18, ...",http://www.newline.com/properties/notebookthe....,11036,tt0332280,en,The Notebook,...,115603229,123,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Behind every great love is a great story.,The Notebook,False,7.88,10688,PG-13


## Load in data 

In [8]:
basics = pd.read_csv("MovieData/basics-filter.csv")

basics

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
0,tt0035423,movie,Kate & Leopold,Kate & Leopold,0,2001.0,,118,"Comedy,Fantasy,Romance"
1,tt0062336,movie,The Tango of the Widower and Its Distorting Mi...,El tango del viudo y su espejo deformante,0,2020.0,,70,Drama
2,tt0069049,movie,The Other Side of the Wind,The Other Side of the Wind,0,2018.0,,122,Drama
3,tt0088751,movie,The Naked Monster,The Naked Monster,0,2005.0,,100,"Comedy,Horror,Sci-Fi"
4,tt0096056,movie,Crime and Punishment,Crime and Punishment,0,2002.0,,126,Drama
...,...,...,...,...,...,...,...,...,...
86974,tt9914942,movie,Life Without Sara Amat,La vida sense la Sara Amat,0,2019.0,,74,Drama
86975,tt9915872,movie,The Last White Witch,My Girlfriend is a Wizard,0,2019.0,,97,"Comedy,Drama,Fantasy"
86976,tt9916170,movie,The Rehearsal,O Ensaio,0,2019.0,,51,Drama
86977,tt9916190,movie,Safeguard,Safeguard,0,2020.0,,95,"Action,Adventure,Thriller"


## Define variables

In [9]:
#Set the Year to filter for
YEAR = 2010
#Create an empty list for errors
errors = []

In [10]:
#Prepare DF for json file
JSON_MOVIE= f'{FOLDER}tmdb_api_results {YEAR}.json'

#Check if file exists
file_exists = os.path.isfile(JSON_MOVIE)
if file_exists == False:
    print(f'Creating json file for API results for {YEAR}')
    with open(JSON_MOVIE, 'w') as f:
        json.dump([{'imdb_id':0}], f)
else: 
    print(f'{JSON_MOVIE} already exists.')

MovieData/tmdb_api_results 2010.json already exists.


In [11]:
#Saving 2010  as the current df
df = basics.loc[basics['startYear'] == YEAR].copy()
#saving movie_id to separate variable
movie_ids = df['tconst']

In [12]:
#Load exisiting data from json into DF called previous_df
previous_df = pd.read_json(JSON_MOVIE)

In [13]:
#filter out any ids that are already in the file
needed_mids = movie_ids[~movie_ids.isin(previous_df['imdb_id'])]

In [14]:
# Loop through movie_ids with a progress bar
for movie_id in tqdm_notebook(needed_mids, f'Movies from {YEAR}'):
    # Get index and movie id from the list:
    try:
        temp = get_movie_with_rating(movie_id)
        write_json(temp, JSON_MOVIE)
        time.sleep(0.02)
    except Exception as e:
        errors.append([movie_id, e])

print(f' - Total Errors: {len(errors)}')

Movies from 2010:   0%|          | 0/3862 [00:00<?, ?it/s]

 - Total Errors: 1126


In [15]:
print(errors)

[['tt0230212', HTTPError('404 Client Error: Not Found for url: https://api.themoviedb.org/3/movie/tt0230212?api_key=35a7b33196852a5816731c8b4196d66c')], ['tt0464032', HTTPError('404 Client Error: Not Found for url: https://api.themoviedb.org/3/movie/tt0464032?api_key=35a7b33196852a5816731c8b4196d66c')], ['tt0465637', HTTPError('404 Client Error: Not Found for url: https://api.themoviedb.org/3/movie/tt0465637?api_key=35a7b33196852a5816731c8b4196d66c')], ['tt0465760', HTTPError('404 Client Error: Not Found for url: https://api.themoviedb.org/3/movie/tt0465760?api_key=35a7b33196852a5816731c8b4196d66c')], ['tt0473343', HTTPError('404 Client Error: Not Found for url: https://api.themoviedb.org/3/movie/tt0473343?api_key=35a7b33196852a5816731c8b4196d66c')], ['tt0483839', HTTPError('404 Client Error: Not Found for url: https://api.themoviedb.org/3/movie/tt0483839?api_key=35a7b33196852a5816731c8b4196d66c')], ['tt0490199', HTTPError('404 Client Error: Not Found for url: https://api.themoviedb.or

In [16]:
final_2010_df = pd.read_json(JSON_MOVIE)
final_2010_df.to_csv (f'{FOLDER}final_tmdb_data_{YEAR}.csv.gz',
                      compression = 'gzip', index = False)