In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os, time, json, math
import tmdbsimple as tmdb
from tqdm.notebook import tqdm_notebook   # to make a progress bar from tqdm_notebook
FOLDER = "/Users/Rashad/Documents/GitHub/Test Repo/Project 3/Project-3/Data/"
os.makedirs(FOLDER, exist_ok=True)
os.listdir(FOLDER)

['.ipynb_checkpoints',
 'Data',
 'Final_AKAS.csv.gz',
 'Final_BASICS.csv.gz',
 'Final_RATINGS.csv.gz',
 'final_tmdb_data_2000.csv.gz',
 'Movies_in_2000.csv.gz',
 'Movies_in_2001.csv.gz',
 'Part_2.ipynb',
 'Project 3 - Part 1.ipynb',
 'Project 3 - Part 2.ipynb',
 'title-akas-us-only.csv',
 'title.akas.tsv.gz',
 'title.basics.tsv.gz',
 'title.ratings.tsv.gz']

In [2]:
def write_json(new_data, filename): 
    """Appends a list of records (new_data) to a json file (filename). 
    Adapted from: https://www.geeksforgeeks.org/append-to-json-file-using-python/"""  
    
    with open(filename,'r+') as file:
        # First we load existing data into a dict.
        file_data = json.load(file)
        ## Choose extend or append
        if (type(new_data) == list) & (type(file_data) == list):
            file_data.extend(new_data)
        else:
             file_data.append(new_data)
        # Sets file's current position at offset.
        file.seek(0)
        # convert back to json.
        json.dump(file_data, file)

In [3]:
def get_movie_with_rating(movie_id):
    
    # Get the movie object for the current id
    movie = tmdb.Movies(movie_id)
    
    # save the .info .releases dictionaries
    info = movie.info()
    releases = movie.releases()
    
    # Loop through countries in releases
    for c in releases['countries']:
        # if the country abbreviation==US
        if c['iso_3166_1' ] =='US':
            ## save a "certification" key in the info dict with the certification
            info['MPAA_Rating'] = c['certification']

    info['Movie_Budget'] = info['budget']
    info['Movie_Revenue'] = info['revenue']

    return info

In [4]:
# Load in the dataframe from project part 1 as basics:
basics_df = pd.read_csv('/Users/Rashad/Documents/GitHub/Test Repo/Project 3/Project-3/Data/Final_BASICS.csv.gz')

In [5]:
basics_df

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
0,tt0035423,movie,Kate & Leopold,Kate & Leopold,0,2001.0,,118,"Comedy,Fantasy,Romance"
1,tt0062336,movie,The Tango of the Widower and Its Distorting Mi...,El tango del viudo y su espejo deformante,0,2020.0,,70,Drama
2,tt0068865,movie,Lives of Performers,Lives of Performers,0,2016.0,,90,Drama
3,tt0069049,movie,The Other Side of the Wind,The Other Side of the Wind,0,2018.0,,122,Drama
4,tt0088751,movie,The Naked Monster,The Naked Monster,0,2005.0,,100,"Comedy,Horror,Sci-Fi"
...,...,...,...,...,...,...,...,...,...
81898,tt9914942,movie,Life Without Sara Amat,La vida sense la Sara Amat,0,2019.0,,74,Drama
81899,tt9915872,movie,The Last White Witch,Boku no kanojo wa mahoutsukai,0,2019.0,,97,"Comedy,Drama,Fantasy"
81900,tt9916170,movie,The Rehearsal,O Ensaio,0,2019.0,,51,Drama
81901,tt9916190,movie,Safeguard,Safeguard,0,2020.0,,95,"Action,Adventure,Thriller"


# Movies that started in the year 2000.

In [6]:
movies_in_2000_df = basics_df[basics_df['startYear'] == 2000]
movies_in_2000_df

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
9,tt0113026,movie,The Fantasticks,The Fantasticks,0,2000.0,,86,"Musical,Romance"
10,tt0113092,movie,For the Cause,For the Cause,0,2000.0,,100,"Action,Adventure,Drama"
12,tt0115937,movie,Consequence,Consequence,0,2000.0,,91,Drama
13,tt0116391,movie,Gang,Gang,0,2000.0,,167,"Action,Crime,Drama"
14,tt0116628,movie,The Incorporated,The Incorporated,0,2000.0,,86,"Action,Thriller"
...,...,...,...,...,...,...,...,...,...
77254,tt8327752,movie,Unknown the Great: The Life & Times of Buddy S...,Unknown the Great: The Life & Times of Buddy S...,0,2000.0,,77,"Comedy,Music"
77998,tt8553964,movie,Cotton Fleece,Cotton Fleece,0,2000.0,,90,Drama
79088,tt8907070,movie,Lost in the Wilderness,Lost in the Wilderness,0,2000.0,,77,Comedy
79230,tt8954964,movie,Good Luck,Good Luck,0,2000.0,,142,Drama


In [7]:
#Saving "movies_in_2000_df" dataframe to .csv that is compressed.
movies_in_2000_df.to_csv('/Users/Rashad/Documents/GitHub/Test Repo/Project 3/Project-3/Data/Movies_in_2000.csv.gz', compression='gzip', index=False)

# Movies that started in the year 2001.

In [8]:
movies_in_2001_df = basics_df[basics_df['startYear'] == 2001]
movies_in_2001_df

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
0,tt0035423,movie,Kate & Leopold,Kate & Leopold,0,2001.0,,118,"Comedy,Fantasy,Romance"
11,tt0114447,movie,The Silent Force,The Silent Force,0,2001.0,,90,Action
15,tt0116916,movie,The Lord Protector,The Lord Protector,0,2001.0,,101,"Action,Adventure,Fantasy"
19,tt0118589,movie,Glitter,Glitter,0,2001.0,,104,"Drama,Music,Romance"
20,tt0118652,movie,The Attic Expeditions,The Attic Expeditions,0,2001.0,,100,"Comedy,Horror,Mystery"
...,...,...,...,...,...,...,...,...,...
79662,tt9071078,movie,Dragon Hero,Mo ren kuang dao,0,2001.0,,100,"Action,Drama,Thriller"
80118,tt9212730,movie,Yakuza Zombie,Zonbi gokudo,0,2001.0,,87,"Horror,Thriller"
80151,tt9228234,movie,The Narc Enigma,The Narc Enigma,0,2001.0,,93,Action
80938,tt9555974,movie,Haunted School,Gui xue xiao,0,2001.0,,85,Horror


In [9]:
#Saving "movies_in_2001_df" dataframe to .csv that is compressed.
movies_in_2001_df.to_csv('/Users/Rashad/Documents/GitHub/Test Repo/Project 3/Project-3/Data/Movies_in_2001.csv.gz', compression='gzip', index=False)

In [10]:
list_2000 = list(movies_in_2000_df['tconst'])
list_2001 = list(movies_in_2001_df['tconst'])

In [11]:
# CREDENTIALS AND ACCESSING THE TMDB API
with open('/Users/Rashad/.secret/tmdb_api_key.json','r') as rc:
    login = json.load(rc)

In [12]:
# quick view of login dictionary keys
login.keys()

dict_keys(['api_key', 'api_read_token'])

In [13]:
# Instantiate TMDB API VARIABLE by loading the TMDB API KEY
tmdb.API_KEY = login['api_key']

In [14]:
# Specifying JSON_FILE filename (can include a folder)
# include the search terms in the filename

#JSON_FILE = "Data/results_in_progress_movie_year.json"
JSON_FILE = "/Users/Rashad/Documents/GitHub/Test Repo/Project 3/Project-3/Data/Data/results_in_progress_movie_year.json"

In [15]:
YEARS_TO_GET = [2000,2001]

In [16]:
errors = [ ]

In [17]:
# Start of OUTER loop
for YEAR in tqdm_notebook(YEARS_TO_GET, desc='YEARS', position=0):
    #Defining the JSON file to store results for year
    JSON_FILE = f'{FOLDER}tmdb_api_results_{YEAR}.json'
    # Check if file exists
    file_exists = os.path.isfile(JSON_FILE)
    
    # If it does not exist: create it
    if file_exists == False:
        # save an empty dict with just "imdb_id" to the new json file.
        with open(JSON_FILE,'w') as f:
            json.dump([{'imdb_id':0}],f)
    
    #Saving new year as the current df
    df = basics_df.loc[ basics_df['startYear']==YEAR].copy()
    # saving movie ids to list
    movie_ids = df['tconst'].copy()
    
    # Load existing data from json into a dataframe called "previous_df"
    previous_df = pd.read_json(JSON_FILE)
    
    # filter out any ids that are already in the JSON_FILE
    movie_ids_to_get = movie_ids[~movie_ids.isin(previous_df['imdb_id'])]
    
    #Get index and movie id from list
    # INNER Loop
    for movie_id in tqdm_notebook(movie_ids_to_get,
                                  desc=f'Movies from {YEAR}',
                                  position=1,
                                  leave=True):
        try:
            # Retrieve then data for the movie id
            temp = get_movie_with_rating(movie_id)  
            # Append/extend results to existing file using a pre-made function
            write_json(temp,JSON_FILE)
            # Short 40 ms sleep to prevent overwhelming server
            time.sleep(0.04)
            
        except Exception as e:
            errors.append([movie_id, e])
    
    final_year_df = pd.read_json(JSON_FILE)
    final_year_df.to_csv(f"{FOLDER}final_tmdb_data_{YEAR}.csv.gz", compression="gzip", index=False)

YEARS:   0%|          | 0/2 [00:00<?, ?it/s]

Movies from 2000:   0%|          | 0/1457 [00:00<?, ?it/s]

Movies from 2001:   0%|          | 0/1577 [00:00<?, ?it/s]

In [18]:
# total errors found
print(f"- Total errors: {len(errors)}")

- Total errors: 442


In [19]:
# check created csv file.  converting it into datafram to view.
the2000s_df = pd.read_csv('/Users/Rashad/Documents/GitHub/Test Repo/Project 3/Project-3/Data/final_tmdb_data_2000.csv.gz')
the2000s_df

Unnamed: 0,imdb_id,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,original_language,original_title,...,spoken_languages,status,tagline,title,video,vote_average,vote_count,MPAA_Rating,Movie_Budget,Movie_Revenue
0,0,,,,,,,,,,...,,,,,,,,,,
1,tt0113026,0.0,/vMFs7nw6P0bIV1jDsQpxAieAVnH.jpg,,10000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10402, '...",,62127.0,en,The Fantasticks,...,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Try to remember the first time magic happened,The Fantasticks,0.0,5.50,22.0,,10000000.0,0.0
2,tt0113092,0.0,,,0.0,"[{'id': 878, 'name': 'Science Fiction'}]",,110977.0,en,For the Cause,...,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,The ultimate showdown on a forbidden planet.,For the Cause,0.0,5.45,10.0,,0.0,0.0
3,tt0116391,0.0,,,0.0,"[{'id': 18, 'name': 'Drama'}, {'id': 28, 'name...",,442869.0,hi,Gang,...,"[{'english_name': 'Hindi', 'iso_639_1': 'hi', ...",Released,,Gang,0.0,4.00,1.0,,0.0,0.0
4,tt0118694,0.0,/n4GJFGzsc7NinI1VeGDXIcQjtU2.jpg,,150000.0,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",,843.0,cn,花樣年華,...,"[{'english_name': 'Cantonese', 'iso_639_1': 'c...",Released,"Feel the heat, keep the feeling burning, let t...",In the Mood for Love,0.0,8.11,2281.0,PG,150000.0,14204632.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1244,tt6174238,0.0,,,0.0,"[{'id': 80, 'name': 'Crime'}]",,223878.0,cn,冷战,...,"[{'english_name': 'Cantonese', 'iso_639_1': 'c...",Released,,Cold War,0.0,2.00,2.0,,0.0,0.0
1245,tt7029820,0.0,,,7000.0,[],,604889.0,en,Scream For Christmas,...,[],Released,,Scream For Christmas,0.0,0.00,0.0,,7000.0,0.0
1246,tt7197642,0.0,,,0.0,"[{'id': 35, 'name': 'Comedy'}]",,872676.0,en,"Goodbye, Merry-Go-Round",...,[],Released,,"Goodbye, Merry-Go-Round",0.0,0.00,0.0,,0.0,0.0
1247,tt7631368,0.0,/sF0gUHE0YzZNXYugTB2LFxJIppf.jpg,,10000000.0,"[{'id': 27, 'name': 'Horror'}]",,97186.0,fr,"I, Vampire",...,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,,"I, Vampire",0.0,6.40,4.0,NR,10000000.0,0.0


In [21]:
# check created csv file.  converting it into datafram to view.
the2001s_df = pd.read_csv('/Users/Rashad/Documents/GitHub/Test Repo/Project 3/Project-3/Data/final_tmdb_data_2001.csv.gz')
the2001s_df

Unnamed: 0,imdb_id,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,original_language,original_title,...,spoken_languages,status,tagline,title,video,vote_average,vote_count,MPAA_Rating,Movie_Budget,Movie_Revenue
0,0,,,,,,,,,,...,,,,,,,,,,
1,tt0035423,0.0,/hfeiSfWYujh6MKhtGTXyK3DD4nN.jpg,,48000000.0,"[{'id': 10749, 'name': 'Romance'}, {'id': 14, ...",,11232.0,en,Kate & Leopold,...,"[{'english_name': 'French', 'iso_639_1': 'fr',...",Released,"If they lived in the same century, they'd be p...",Kate & Leopold,0.0,6.330,1215.0,PG-13,48000000.0,76019048.0
2,tt0114447,0.0,,,0.0,"[{'id': 53, 'name': 'Thriller'}, {'id': 28, 'n...",,151007.0,en,The Silent Force,...,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,They left him for dead... They should have fin...,The Silent Force,0.0,5.000,3.0,,0.0,0.0
3,tt0116916,0.0,/rFpHBidSlhjflmnLu7BZilyKeQR.jpg,,0.0,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",,73549.0,en,The Dark Mist,...,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,,The Dark Mist,0.0,3.500,2.0,PG,0.0,0.0
4,tt0118589,0.0,/9NZAirJahVilTiDNCHLFcdkwkiy.jpg,,22000000.0,"[{'id': 18, 'name': 'Drama'}, {'id': 10402, 'n...",,10696.0,en,Glitter,...,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,"In music she found her dream, her love, herself.",Glitter,0.0,4.438,129.0,PG-13,22000000.0,5271666.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1340,tt7797790,0.0,,,0.0,"[{'id': 27, 'name': 'Horror'}]",,956219.0,en,Edmund Kemper Part 3: La mort sévit,...,"[{'english_name': 'French', 'iso_639_1': 'fr',...",Released,,Edmund Kemper Part 3: La mort sévit,0.0,0.000,0.0,,0.0,0.0
1341,tt8665056,0.0,,,0.0,"[{'id': 37, 'name': 'Western'}]",http://skeletoncreekproductions.com/p-movie-br...,885436.0,en,Guns Along The Bravo,...,[],Released,Evil came to the Southwest until three blazing...,Guns Along The Bravo,0.0,0.000,0.0,,0.0,0.0
1342,tt8795764,0.0,,,0.0,"[{'id': 27, 'name': 'Horror'}]",https://www.utahwolf.com/films/coming-soon-new...,871624.0,en,New Breed,...,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,,New Breed,0.0,0.000,0.0,NR,0.0,0.0
1343,tt9071078,0.0,,,0.0,"[{'id': 28, 'name': 'Action'}]",,201706.0,cn,致命密函,...,"[{'english_name': 'Cantonese', 'iso_639_1': 'c...",Released,,Chinese Heroes,0.0,3.000,2.0,,0.0,0.0
