## API Calls

* Your stakeholder wants you to extract the budget, revenue, and MPAA Rating (G/PG/PG-13/R), which is also called "Certification".

### Import libraries

In [77]:
# Install tmdbsimple (only need to run once)
!pip install tmdbsimple

# imports
import json, os, time
import tmdbsimple as tmdb
import pandas as pd
from tqdm.notebook import tqdm_notebook
import glob



In [78]:
# load api key
with open('/Users/ROSA/.secret/tmdb_api.json') as f:
    login = json.load(f)

login.keys()

dict_keys(['api-key'])

In [79]:
import tmdbsimple as tmdb
tmdb.API_KEY =  login['api-key']

In [80]:
## make a movie object using the .Movies function from tmdb
movie = tmdb.Movies(603)

In [81]:
## movie objects have a .info dictionary 
info = movie.info()
info

{'adult': False,
 'backdrop_path': '/oMsxZEvz9a708d49b6UdZK1KAo5.jpg',
 'belongs_to_collection': {'id': 2344,
  'name': 'The Matrix Collection',
  'poster_path': '/bV9qTVHTVf0gkW0j7p7M0ILD4pG.jpg',
  'backdrop_path': '/bRm2DEgUiYciDw3myHuYFInD7la.jpg'},
 'budget': 63000000,
 'genres': [{'id': 28, 'name': 'Action'},
  {'id': 878, 'name': 'Science Fiction'}],
 'homepage': 'http://www.warnerbros.com/matrix',
 'id': 603,
 'imdb_id': 'tt0133093',
 'original_language': 'en',
 'original_title': 'The Matrix',
 'overview': 'Set in the 22nd century, The Matrix tells the story of a computer hacker who joins a group of underground insurgents fighting the vast and powerful computers who now rule the earth.',
 'popularity': 79.098,
 'poster_path': '/f89U3ADr1oiB1s9GkdPOEpXUk5H.jpg',
 'production_companies': [{'id': 79,
   'logo_path': '/tpFpsqbleCzEE2p5EgvUq6ozfCA.png',
   'name': 'Village Roadshow Pictures',
   'origin_country': 'US'},
  {'id': 372,
   'logo_path': None,
   'name': 'Groucho II Film

In [82]:
info['budget']

63000000

In [83]:
info['revenue']

463517383

In [84]:
info['imdb_id']

'tt0133093'

In [85]:
movie = tmdb.Movies('tt1361336')
info = movie.info()
info['budget']

50000000

## Saving the Movie Certification/MPAA Rating

In [86]:
response = movie.releases()
response

{'id': 587807,
 'countries': [{'certification': '',
   'descriptors': [],
   'iso_3166_1': 'CO',
   'primary': False,
   'release_date': '2021-02-12'},
  {'certification': 'PG',
   'descriptors': [],
   'iso_3166_1': 'US',
   'primary': False,
   'release_date': '2021-02-26'},
  {'certification': 'ALL',
   'descriptors': [],
   'iso_3166_1': 'KR',
   'primary': False,
   'release_date': '2021-02-24'},
  {'certification': '',
   'descriptors': [],
   'iso_3166_1': 'ID',
   'primary': False,
   'release_date': '2021-03-10'},
  {'certification': '6',
   'descriptors': [],
   'iso_3166_1': 'NL',
   'primary': False,
   'release_date': '2021-06-09'},
  {'certification': 'G',
   'descriptors': [],
   'iso_3166_1': 'IE',
   'primary': False,
   'release_date': '2021-05-07'},
  {'certification': 'M/6',
   'descriptors': [],
   'iso_3166_1': 'PT',
   'primary': False,
   'release_date': '2021-03-04'},
  {'certification': '6+',
   'descriptors': [],
   'iso_3166_1': 'RU',
   'primary': False,
  

In [87]:
response = movie.releases()
for c in movie.countries:
    if c['iso_3166_1'] == 'US':
        print (c['certification'])
        

PG
PG
PG


In [88]:
# Get the movie object for the current id
movie = tmdb.Movies('tt1361336')
# save the .info .releases dictionaries
info = movie.info()
releases = movie.releases()
# Loop through countries in releases
for c in releases['countries']:
    # if the country abbreviation==US
    if c['iso_3166_1' ] =='US':
        ## save a "certification" key in the info dict with the certification
       info['certification'] = c['certification']

In [89]:
# function to add certification to movie.info dict
def get_movie_with_rating(movie_id):
    """Adapted from source = https://github.com/celiao/tmdbsimple"""
    
    # set movie id
    movie = tmdb.Movies(movie_id)

    # save the .info and .releases dictionaries
    info = movie.info()
    releases = movie.releases()
    
    # only get releases of the movie in the US
    for c in releases['countries']:
        
        # if country abbreviation == US
        if c['iso_3166_1'] == 'US':
            
            # save certification key into info dict
            info['certification'] = c['certification']
            
    return info

In [90]:
test = get_movie_with_rating("tt0848228") #put your function name here
test

{'adult': False,
 'backdrop_path': '/9BBTo63ANSmhC4e6r62OJFuK2GL.jpg',
 'belongs_to_collection': {'id': 86311,
  'name': 'The Avengers Collection',
  'poster_path': '/yFSIUVTCvgYrpalUktulvk3Gi5Y.jpg',
  'backdrop_path': '/zuW6fOiusv4X9nnW3paHGfXcSll.jpg'},
 'budget': 220000000,
 'genres': [{'id': 878, 'name': 'Science Fiction'},
  {'id': 28, 'name': 'Action'},
  {'id': 12, 'name': 'Adventure'}],
 'homepage': 'https://www.marvel.com/movies/the-avengers',
 'id': 24428,
 'imdb_id': 'tt0848228',
 'original_language': 'en',
 'original_title': 'The Avengers',
 'overview': 'When an unexpected enemy emerges and threatens global safety and security, Nick Fury, director of the international peacekeeping agency known as S.H.I.E.L.D., finds himself in need of a team to pull the world back from the brink of disaster. Spanning the globe, a daring recruitment effort begins!',
 'popularity': 100.8,
 'poster_path': '/RYMX2wcKCBAr24UyPD7xwmjaTn.jpg',
 'production_companies': [{'id': 420,
   'logo_path':

## Creating folder

In [91]:
import json, os, time
import tmdbsimple as tmdb
from tqdm.notebook import tqdm_notebook
FOLDER = "Data/"
os.makedirs(FOLDER, exist_ok=True)
os.listdir(FOLDER)



['final_tmdb_data_by_year',
 'title_akas.csv.gz',
 'title_basics.csv.gz',
 'title_ratings.csv.gz']

In [92]:
YEARS_TO_GET = (2000,2001)

In [93]:
errors = [ ]

In [94]:
#load in the dataframe from project part 1 as basics
basics = pd.read_csv('/Users/Rosa/Documents/Data/Movie-Profitability/Data/title_basics.csv.gz')

basics.head()

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
0,tt0035423,movie,Kate & Leopold,Kate & Leopold,0,2001,,118,"Comedy,Fantasy,Romance"
1,tt0062336,movie,The Tango of the Widower and Its Distorting Mi...,El tango del viudo y su espejo deformante,0,2020,,70,Drama
2,tt0068865,movie,Lives of Performers,Lives of Performers,0,2016,,90,Drama
3,tt0069049,movie,The Other Side of the Wind,The Other Side of the Wind,0,2018,,122,Drama
4,tt0082328,movie,Embodiment of Evil,Encarnação do Demônio,0,2008,,94,Horror


In [95]:
def write_json(new_data,filename):
    """Adapted from source = https://geeksforgeeks.org/append-to-json-file-using-python/"""
    
    with open (filename, 'r+') as file:
        #First we load existing data into a dict.
        file_data = json.load(file)
        if (type(new_data) == list) & (type(file_data) == list) :
            file_data.extend(new_data)
        else:
            file_data.append(new_data)
        # sets file's current position at offset.
        file.seek(0)
        # convert back to json.
        json.dump(file_data,file)


In [96]:
# Start of OUTER loop
for YEAR in tqdm_notebook(YEARS_TO_GET, desc='YEARS', position=0):
    #Defining the JSON file to store results for year
    JSON_FILE = f'{FOLDER}tmdb_api_results_{YEAR}.json'
    # check if file exists
    file_exists = os.path.isfile(JSON_FILE)
    
    # If it does not exist: create it
    if file_exists == False:
    # save an empty dict with just "imdb_id" to the new json file.
        with open(JSON_FILE,'w') as f:
            json.dump([{'imdb_id':0}],f)

    #Saving new year as the current df
    df = basics.loc[ basics['startYear']==YEAR].copy()

    # saving movie ids to list
    movie_ids = df['tconst'].copy()
    movie_ids

    # Load existing data from json into a dataframe called "previous_df"
    previous_df = pd.read_json(JSON_FILE)
    previous_df

    # filter out any ids that are already in the JSON_FILE
    movie_ids_to_get = movie_ids[~movie_ids.isin(previous_df['imdb_id'])]

    #Get index and movie id from list
    # INNER Loop
    
    for movie_id in tqdm_notebook(movie_ids_to_get,
                                  desc=f'Movies from {YEAR}',
                                  position=1,
                                  leave=True):
        try:
            # Retrieve then data for the movie id
            temp = get_movie_with_rating(movie_id)  
            # Append/extend results to existing file using a pre-made function
            write_json(temp,JSON_FILE)
            # Short 20 ms sleep to prevent overwhelming server
            time.sleep(0.02)
            
         # if it fails, make a dict with just the id and None for certification.   
        except Exception as e:
            continue 

    final_year_df = pd.read_json(JSON_FILE)
    final_year_df.to_csv(f"{FOLDER}final_tmdb_data_{YEAR}.csv.gz", compression="gzip", index=False)

YEARS:   0%|          | 0/2 [00:00<?, ?it/s]

Movies from 2000:   0%|          | 0/1558 [00:00<?, ?it/s]

Movies from 2001:   0%|          | 0/1666 [00:00<?, ?it/s]

In [97]:
final_year_df

Unnamed: 0,imdb_id,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,original_language,original_title,...,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,certification
0,0,,,,,,,,,,...,,,,,,,,,,
1,tt0035423,0.0,/hfeiSfWYujh6MKhtGTXyK3DD4nN.jpg,,48000000.0,"[{'id': 10749, 'name': 'Romance'}, {'id': 14, ...",,11232.0,en,Kate & Leopold,...,76019048.0,118.0,"[{'english_name': 'French', 'iso_639_1': 'fr',...",Released,"If they lived in the same century, they'd be p...",Kate & Leopold,0.0,6.331,1225.0,PG-13
2,tt0114447,0.0,,,0.0,"[{'id': 53, 'name': 'Thriller'}, {'id': 28, 'n...",,151007.0,en,The Silent Force,...,0.0,90.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,They left him for dead... They should have fin...,The Silent Force,0.0,5.000,3.0,
3,tt0116916,0.0,/rFpHBidSlhjflmnLu7BZilyKeQR.jpg,,0.0,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",,73549.0,en,The Dark Mist,...,0.0,101.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,,The Dark Mist,0.0,3.500,2.0,PG
4,tt0118154,0.0,,,0.0,"[{'id': 99, 'name': 'Documentary'}]",,292980.0,en,White Hotel,...,0.0,90.0,"[{'english_name': 'Amharic', 'iso_639_1': 'am'...",Released,,White Hotel,0.0,0.000,0.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1407,tt8665056,0.0,,,0.0,"[{'id': 37, 'name': 'Western'}]",http://skeletoncreekproductions.com/p-movie-br...,885436.0,en,Guns Along The Bravo,...,0.0,85.0,[],Released,Evil came to the Southwest until three blazing...,Guns Along The Bravo,0.0,0.000,0.0,
1408,tt8795764,0.0,,,0.0,"[{'id': 27, 'name': 'Horror'}]",https://www.utahwolf.com/films/coming-soon-new...,871624.0,en,New Breed,...,0.0,57.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,,New Breed,0.0,0.000,0.0,NR
1409,tt9071078,0.0,,,0.0,"[{'id': 28, 'name': 'Action'}]",,201706.0,cn,致命密函,...,0.0,90.0,"[{'english_name': 'Cantonese', 'iso_639_1': 'c...",Released,,Chinese Heroes,0.0,3.000,2.0,
1410,tt9212730,0.0,,,0.0,"[{'id': 27, 'name': 'Horror'}, {'id': 53, 'nam...",,382630.0,ja,実録外伝 ゾンビ極道,...,0.0,88.0,"[{'english_name': 'Japanese', 'iso_639_1': 'ja...",Released,,Yakuza Zombie,0.0,0.000,0.0,
