In [1]:
# imports
import json
import tmdbsimple as tmdb 
import pandas as pd
from tqdm.notebook import tqdm_notebook

In [2]:
# connect to the "TMDB" API
with open(".secret/tmdb_api.json", 'r') as f:
    login = json.load(f)
tmdb.API_KEY =  login['api_key']

In [3]:
# define list of years to get from API
years_to_get = list(range(2010, 2020))

In [4]:
# define function to get movie informations with RPAA rating
def get_with_rating(API, movie_id):
    movie = API.Movies(movie_id)
    movie_info = movie.info()
    releases = movie.releases()
    for c in releases["countries"]:
        if c["iso_3166_1"] == "US":
            movie_info["certification"] = c["certification"]
    return(movie_info)
# define function to save records in a json file
def write_json(new_data, filename):   
    with open(filename,"r+") as file:
        file_data = json.load(file)
        if (type(new_data) == list) & (type(file_data) == list):
            file_data.extend(new_data)
        else:
             file_data.append(new_data)
        file.seek(0)
        json.dump(file_data, file)

In [None]:
# extract movies informations
df_basics = pd.read_csv("Data/title_basics_filtred.csv")
errors = []
for year in tqdm_notebook(years_to_get, "Years", position=0):
    movie_ids = df_basics[df_basics["startYear"]==float(year)]["tconst"]
    with open(f"Data/tmdb_api_results_{year}.json", "a+") as f:
        try:
            previous_df = pd.read_json(f"Data/tmdb_api_results_{year}.json")
            if previous_df.shape[0] != 0:
                movie_ids = movie_ids[~movie_ids.isin(previous_df['imdb_id'])]
        except:
            f.truncate(0)
            f.write("[]")
    for movie_id in tqdm_notebook(movie_ids, f"Movies from {year}", position=0):
        try:
            record = get_with_rating(tmdb, movie_id)
            write_json(record, f"Data/tmdb_api_results_{year}.json")
        except Exception as e:
            errors.append([movie_id, e])

In [11]:
# save data in csv compressed files
for year in years_to_get:
    year_df = pd.read_json(f"Data/tmdb_api_results_{year}.json")
    year_df.to_csv(f"Data/final_tmdb_data_{year}.csv.gz", compression="gzip", index=False)