## Part 1

In [1]:
import pandas as pd
import seaborn as sns
import numpy as np

In [2]:
title_basics = "https://datasets.imdbws.com/title.basics.tsv.gz"

title_akas = "https://datasets.imdbws.com/title.akas.tsv.gz"


title_ratings = "https://datasets.imdbws.com/title.ratings.tsv.gz"






In [3]:
basics = pd.read_csv(title_basics, sep = "\t", low_memory = False)

akas = pd.read_csv(title_akas, sep = "\t", low_memory = False)

ratings = pd.read_csv(title_ratings, sep = "\t", low_memory = False)

### Cleaning the basics table

In [4]:
basics.head()

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
0,tt0000001,short,Carmencita,Carmencita,0,1894,\N,1,"Documentary,Short"
1,tt0000002,short,Le clown et ses chiens,Le clown et ses chiens,0,1892,\N,5,"Animation,Short"
2,tt0000003,short,Pauvre Pierrot,Pauvre Pierrot,0,1892,\N,4,"Animation,Comedy,Romance"
3,tt0000004,short,Un bon bock,Un bon bock,0,1892,\N,12,"Animation,Short"
4,tt0000005,short,Blacksmith Scene,Blacksmith Scene,0,1893,\N,1,"Comedy,Short"


In [5]:
basics.dtypes

tconst            object
titleType         object
primaryTitle      object
originalTitle     object
isAdult           object
startYear         object
endYear           object
runtimeMinutes    object
genres            object
dtype: object

In [6]:
basics.isna().sum()

tconst             0
titleType          0
primaryTitle      11
originalTitle     11
isAdult            0
startYear          0
endYear            0
runtimeMinutes     0
genres            10
dtype: int64

In [7]:
basics.replace({'\\N':np.nan}, inplace = True)

In [8]:
basics.head()

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
0,tt0000001,short,Carmencita,Carmencita,0,1894,,1,"Documentary,Short"
1,tt0000002,short,Le clown et ses chiens,Le clown et ses chiens,0,1892,,5,"Animation,Short"
2,tt0000003,short,Pauvre Pierrot,Pauvre Pierrot,0,1892,,4,"Animation,Comedy,Romance"
3,tt0000004,short,Un bon bock,Un bon bock,0,1892,,12,"Animation,Short"
4,tt0000005,short,Blacksmith Scene,Blacksmith Scene,0,1893,,1,"Comedy,Short"


In [9]:
basics.dropna(subset = 'runtimeMinutes',inplace = True)

In [10]:
basics.isna().sum()

tconst                  0
titleType               0
primaryTitle            1
originalTitle           1
isAdult                 1
startYear          151915
endYear           2733691
runtimeMinutes          0
genres              75346
dtype: int64

In [11]:
basics.dropna(subset = 'genres',inplace = True)

In [12]:
basics.isna().sum()

tconst                  0
titleType               0
primaryTitle            1
originalTitle           1
isAdult                 0
startYear          146998
endYear           2659929
runtimeMinutes          0
genres                  0
dtype: int64

In [13]:
print(basics['titleType'].value_counts())

tvEpisode       1340772
short            589695
movie            376037
video            178499
tvMovie           90678
tvSeries          88925
tvSpecial         17583
tvMiniSeries      16760
tvShort            8635
videoGame           317
Name: titleType, dtype: int64


In [14]:
basics = basics[basics['titleType'] == 'movie']

In [35]:
print(basics['titleType'].value_counts())

movie    146276
Name: titleType, dtype: int64


In [37]:
basics = basics.loc[(basics['startYear'] >= 2000) & (basics['startYear'] <= 2022)]


In [38]:
print(basics['startYear'].value_counts())

2018    9626
2017    9431
2019    9367
2016    9021
2022    8613
2015    8594
2021    8198
2014    8169
2013    7789
2020    7561
2012    7306
2011    6774
2010    6370
2009    5973
2008    5214
2007    4629
2006    4391
2005    3900
2004    3535
2003    3231
2002    2994
2001    2858
2000    2732
Name: startYear, dtype: int64


In [16]:
#creating a value that selects all the documentary types from the dataset
is_documentary = basics['genres'].str.contains('documentary', case = False)

#now using the inverse of is_documentary to deselect it in the basics_t_s dataset. 
basics = basics[~is_documentary]

basics.head()

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
34803,tt0035423,movie,Kate & Leopold,Kate & Leopold,0,2001,,118,"Comedy,Fantasy,Romance"
61116,tt0062336,movie,The Tango of the Widower and Its Distorting Mi...,El tango del viudo y su espejo deformante,0,2020,,70,Drama
67669,tt0069049,movie,The Other Side of the Wind,The Other Side of the Wind,0,2018,,122,Drama
77964,tt0079644,movie,November 1828,November 1828,0,2001,,140,"Drama,War"
86801,tt0088751,movie,The Naked Monster,The Naked Monster,0,2005,,100,"Comedy,Horror,Sci-Fi"


### Akas cleaning 

In [39]:
akas.head()

Unnamed: 0,titleId,ordering,title,region,language,types,attributes,isOriginalTitle
0,tt0000001,6,Carmencita,US,,imdbDisplay,,0.0
1,tt0000002,7,The Clown and His Dogs,US,,,literal English title,0.0
2,tt0000005,10,Blacksmith Scene,US,,imdbDisplay,,0.0
3,tt0000005,1,Blacksmithing Scene,US,,alternative,,0.0
4,tt0000005,6,Blacksmith Scene #1,US,,alternative,,0.0


In [42]:
akas['region'].value_counts()

US    1409527
Name: region, dtype: int64

In [43]:
akas.replace({'\\N': np.nan}, inplace=True)


In [44]:
akas.head()

Unnamed: 0,titleId,ordering,title,region,language,types,attributes,isOriginalTitle
0,tt0000001,6,Carmencita,US,,imdbDisplay,,0.0
1,tt0000002,7,The Clown and His Dogs,US,,,literal English title,0.0
2,tt0000005,10,Blacksmith Scene,US,,imdbDisplay,,0.0
3,tt0000005,1,Blacksmithing Scene,US,,alternative,,0.0
4,tt0000005,6,Blacksmith Scene #1,US,,alternative,,0.0


### Cleaning Ratings dataset

In [45]:
# replaceing \N with np.NaN
ratings.replace({'\\N': np.nan}, inplace = True)


In [46]:
ratings.head()

Unnamed: 0,tconst,averageRating,numVotes
0,tt0000001,5.7,1947
1,tt0000002,5.8,264
2,tt0000005,6.2,2580
3,tt0000006,5.1,177
4,tt0000007,5.4,810


In [47]:
ratings.isna().sum()

tconst           0
averageRating    0
numVotes         0
dtype: int64

### Keepers for rating and basics

In [51]:
#keeping on the US region movies
#Rating
keepers =ratings['tconst'].isin(akas['titleId'])
ratings = ratings[keepers]
ratings.head()

Unnamed: 0,tconst,averageRating,numVotes
0,tt0000001,5.7,1947
1,tt0000002,5.8,264
2,tt0000005,6.2,2580
3,tt0000006,5.1,177
4,tt0000007,5.4,810


In [52]:
#keeping on the US region movies
#basics
keepers =basics['tconst'].isin(akas['titleId'])
basics = basics[keepers]
basics.head()

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
0,tt0035423,movie,Kate & Leopold,Kate & Leopold,0,2001,,118,"Comedy,Fantasy,Romance"
1,tt0062336,movie,The Tango of the Widower and Its Distorting Mi...,El tango del viudo y su espejo deformante,0,2020,,70,Drama
2,tt0069049,movie,The Other Side of the Wind,The Other Side of the Wind,0,2018,,122,Drama
4,tt0088751,movie,The Naked Monster,The Naked Monster,0,2005,,100,"Comedy,Horror,Sci-Fi"
7,tt0096056,movie,Crime and Punishment,Crime and Punishment,0,2002,,126,Drama


In [54]:
# example making new folder with os
import os
os.makedirs('Data/',exist_ok=True) 
# Confirm folder created
os.listdir("Data/")

['title_basics.csv.gz', 'title_akas.csv.gz', 'title_ratings.csv.gz']

In [55]:
## Save current dataframe to file.
basics.to_csv("Data/title_basics.csv.gz",compression='gzip',index=False)


In [56]:
akas.to_csv("Data/title_akas.csv.gz",compression='gzip',index=False)
ratings.to_csv("Data/title_ratings.csv.gz",compression='gzip',index=False)


In [57]:
basics = pd.read_csv("Data/title_basics.csv.gz", low_memory = False)
akas = pd.read_csv("Data/title_akas.csv.gz", low_memory = False)
ratings = pd.read_csv("Data/title_ratings.csv.gz", low_memory = False)

In [58]:
basics.head()

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
0,tt0035423,movie,Kate & Leopold,Kate & Leopold,0,2001,,118,"Comedy,Fantasy,Romance"
1,tt0062336,movie,The Tango of the Widower and Its Distorting Mi...,El tango del viudo y su espejo deformante,0,2020,,70,Drama
2,tt0069049,movie,The Other Side of the Wind,The Other Side of the Wind,0,2018,,122,Drama
3,tt0088751,movie,The Naked Monster,The Naked Monster,0,2005,,100,"Comedy,Horror,Sci-Fi"
4,tt0096056,movie,Crime and Punishment,Crime and Punishment,0,2002,,126,Drama


In [59]:
ratings.head()

Unnamed: 0,tconst,averageRating,numVotes
0,tt0000001,5.7,1947
1,tt0000002,5.8,264
2,tt0000005,6.2,2580
3,tt0000006,5.1,177
4,tt0000007,5.4,810


## Part 2

In [2]:
import json
with open('/Users/navnoorsingh/.secret/tmdb_api.json', 'r') as f:
    login = json.load(f)
## Display the keys of the loaded dict
login.keys()

dict_keys(['api-key'])

In [3]:
import tmdbsimple as tmdb
tmdb.API_KEY =  login['api-key']
 

In [4]:
## make a movie object using the .Movies function from tmdb
movie = tmdb.Movies(603)

info = movie.info()
info

{'adult': False,
 'backdrop_path': '/waCRuAW5ocONRehP556vPexVXA9.jpg',
 'belongs_to_collection': {'id': 2344,
  'name': 'The Matrix Collection',
  'poster_path': '/bV9qTVHTVf0gkW0j7p7M0ILD4pG.jpg',
  'backdrop_path': '/bRm2DEgUiYciDw3myHuYFInD7la.jpg'},
 'budget': 63000000,
 'genres': [{'id': 28, 'name': 'Action'},
  {'id': 878, 'name': 'Science Fiction'}],
 'homepage': 'http://www.warnerbros.com/matrix',
 'id': 603,
 'imdb_id': 'tt0133093',
 'original_language': 'en',
 'original_title': 'The Matrix',
 'overview': 'Set in the 22nd century, The Matrix tells the story of a computer hacker who joins a group of underground insurgents fighting the vast and powerful computers who now rule the earth.',
 'popularity': 77.213,
 'poster_path': '/f89U3ADr1oiB1s9GkdPOEpXUk5H.jpg',
 'production_companies': [{'id': 79,
   'logo_path': '/tpFpsqbleCzEE2p5EgvUq6ozfCA.png',
   'name': 'Village Roadshow Pictures',
   'origin_country': 'US'},
  {'id': 174,
   'logo_path': '/IuAlhI9eVC9Z8UQWOIDdWRKSEJ.png'

In [5]:
info['budget']

63000000

In [6]:
info['revenue']

463517383

In [7]:
info['imdb_id']

'tt0133093'

In [8]:
movie = tmdb.Movies('tt1361336')
info = movie.info()
info['budget']

50000000

In [10]:
response = movie.releases()

response

{'id': 587807,
 'countries': [{'certification': '',
   'descriptors': [],
   'iso_3166_1': 'CO',
   'primary': False,
   'release_date': '2021-02-12'},
  {'certification': 'PG',
   'descriptors': [],
   'iso_3166_1': 'US',
   'primary': False,
   'release_date': '2021-02-26'},
  {'certification': 'ALL',
   'descriptors': [],
   'iso_3166_1': 'KR',
   'primary': False,
   'release_date': '2021-02-24'},
  {'certification': '',
   'descriptors': [],
   'iso_3166_1': 'ID',
   'primary': False,
   'release_date': '2021-03-10'},
  {'certification': '6',
   'descriptors': [],
   'iso_3166_1': 'NL',
   'primary': False,
   'release_date': '2021-06-09'},
  {'certification': 'G',
   'descriptors': [],
   'iso_3166_1': 'IE',
   'primary': False,
   'release_date': '2021-05-07'},
  {'certification': 'M/6',
   'descriptors': [],
   'iso_3166_1': 'PT',
   'primary': False,
   'release_date': '2021-03-04'},
  {'certification': '6+',
   'descriptors': [],
   'iso_3166_1': 'RU',
   'primary': False,
  

In [9]:
# example from package README
# source = https://github.com/celiao/tmdbsimple
releases = movie.releases()
for c in releases['countries']:
    if c['iso_3166_1'] == 'US':
        print(c['certification'])

PG
PG
PG


In [11]:
def get_movie_with_rating(movie_id):
    # Get the movie object for the current id
    movie = tmdb.Movies('tt1361336')
    # save the .info .releases dictionaries
    info = movie.info()
    releases = movie.releases()
    # Loop through countries in releases
    for c in releases['countries']:
        # if the country abbreviation==US
        if c['iso_3166_1' ] =='US':
            ## save a "certification" key in the info dict with the certification
           info['certification'] = c['certification']
    return info

In [12]:
test = get_movie_with_rating("tt0848228") #put your function name here
test


{'adult': False,
 'backdrop_path': '/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg',
 'belongs_to_collection': None,
 'budget': 50000000,
 'genres': [{'id': 35, 'name': 'Comedy'},
  {'id': 10751, 'name': 'Family'},
  {'id': 16, 'name': 'Animation'}],
 'homepage': 'https://www.tomandjerrymovie.com',
 'id': 587807,
 'imdb_id': 'tt1361336',
 'original_language': 'en',
 'original_title': 'Tom & Jerry',
 'overview': 'Tom the cat and Jerry the mouse get kicked out of their home and relocate to a fancy New York hotel, where a scrappy employee named Kayla will lose her job if she can’t evict Jerry before a high-class wedding at the hotel. Her solution? Hiring Tom to get rid of the pesky mouse.',
 'popularity': 62.527,
 'poster_path': '/8XZI9QZ7Pm3fVkigWJPbrXCMzjq.jpg',
 'production_companies': [{'id': 174,
   'logo_path': '/IuAlhI9eVC9Z8UQWOIDdWRKSEJ.png',
   'name': 'Warner Bros. Pictures',
   'origin_country': 'US'},
  {'id': 8922,
   'logo_path': '/yZWehAyjfKi4KvKeg1bkJ1bm5H8.png',
   'name': 'Turner Ent

In [13]:
## testing our function by looping through a list of ids
import pandas as pd
test_ids = ["tt0848228", "tt0115937","tt0848228","tt0332280"]
results = []
errors = []
for movie_id in test_ids:
    
    try:
        movie_info = get_movie_with_rating(movie_id)
        results.append(movie_info)
        
    except Exception as e: 
        errors.append([movie_id, e])
    
pd.DataFrame(results)

Unnamed: 0,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,...,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,certification
0,False,/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg,,50000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",https://www.tomandjerrymovie.com,587807,tt1361336,en,Tom & Jerry,...,132000000,101,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Best of enemies. Worst of friends.,Tom & Jerry,False,6.923,2164,PG
1,False,/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg,,50000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",https://www.tomandjerrymovie.com,587807,tt1361336,en,Tom & Jerry,...,132000000,101,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Best of enemies. Worst of friends.,Tom & Jerry,False,6.923,2164,PG
2,False,/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg,,50000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",https://www.tomandjerrymovie.com,587807,tt1361336,en,Tom & Jerry,...,132000000,101,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Best of enemies. Worst of friends.,Tom & Jerry,False,6.923,2164,PG
3,False,/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg,,50000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",https://www.tomandjerrymovie.com,587807,tt1361336,en,Tom & Jerry,...,132000000,101,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Best of enemies. Worst of friends.,Tom & Jerry,False,6.923,2164,PG


- Number of errors: 0


[]

In [41]:
#designating the folder
import os, time,json
import tmdbsimple as tmdb 
from tqdm.notebook import tqdm_notebook
FOLDER = "Data/"
os.makedirs(FOLDER, exist_ok=True)
os.listdir(FOLDER)

['tmdb_api_results_2000.json',
 'title_basics.csv.gz',
 'title_akas.csv.gz',
 'title_ratings.csv.gz']

In [42]:
YEARS_TO_GET = [2000,2001]

In [43]:
import pandas as pd
basics = pd.read_csv('Data/title_basics.csv.gz')

basics.head()

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
0,tt0035423,movie,Kate & Leopold,Kate & Leopold,0,2001,,118,"Comedy,Fantasy,Romance"
1,tt0062336,movie,The Tango of the Widower and Its Distorting Mi...,El tango del viudo y su espejo deformante,0,2020,,70,Drama
2,tt0069049,movie,The Other Side of the Wind,The Other Side of the Wind,0,2018,,122,Drama
3,tt0088751,movie,The Naked Monster,The Naked Monster,0,2005,,100,"Comedy,Horror,Sci-Fi"
4,tt0096056,movie,Crime and Punishment,Crime and Punishment,0,2002,,126,Drama


In [None]:
#this following code will not be used but has it as a reference

#'Adapted from: https: //www.geeksforgeeks.org/append-to-json-file-using-python"
def write_json (new_data, filename):
    with open (filename, 'r+') as file:
        # First we load existing data into a dict.
        file_data = json.load(file)
        ## Choose extend or append
        if (type (new_data) == list) & (type(file_data)== list):
            file_data.extend(new_data)
        else:
            file_data.append(new_data)
        # sets file's current position at offset.
        file.seek(0)
        #convert back to json.
        json.dump(file_data, file)

In [44]:
#OUTER LOOP  
for YEAR in tqdm_notebook(YEARS_TO_GET, desc = 'YEARS', position = 0):
    #Defining the JSON file to store results for year
    JSON_FILE = f'{FOLDER}tmdb_api_results_{YEAR}.json'
    
    # Check if file exists
    file_exists = os.path.isfile(JSON_FILE)

    # If it does not exist: create it
    if file_exists == False:
    # save an empty dict with just "imdb_id" to the new json file.
        with open(JSON_FILE,'w') as f:
            json.dump([{'imdb_id':0}],f)

    #Saving new year as the current df
    df = basics.loc[basics['startYear'] == YEAR].copy()

    #saving movie ids as list
    movie_ids = df['tconst'].copy() #.to_list()

    # Load existing data from json into a dataframe called "previous_df"
    previous_df = pd.read_json(JSON_FILE)

    # filter out any ids that are already in the JSON_FILE
    movie_ids_to_get = movie_ids[~movie_ids.isin(previous_df['imdb_id'])]


    #Get index and movie id from list
    # INNER Loop
    for movie_id in tqdm_notebook(movie_ids_to_get,
                                  desc=f'Movies from {YEAR}',
                                  position=1,
                                  leave=True):
        try: 
            # Retrieve then data for the movie id
            temp = get_movie_with_rating(movie_id)  
            # Append/extend results to existing file using a pre-made function
            write_json(temp,JSON_FILE)
            # Short 20 ms sleep to prevent overwhelming server
            time.sleep(0.02)
            
        except Exception as e:
            errors.append([movie_id, e])


    final_year_df = pd.read_json(JSON_FILE)
    final_year_df.to_csv(f"{FOLDER}final_tmdb_data_{YEAR}.csv.gz", compression="gzip", index=False)


YEARS:   0%|          | 0/2 [00:00<?, ?it/s]

Movies from 2000:   0%|          | 0/1428 [00:00<?, ?it/s]

Movies from 2001:   0%|          | 0/1542 [00:00<?, ?it/s]

## EDA Analysis

In [57]:
final_year_df

Unnamed: 0,imdb_id,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,original_language,original_title,...,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,certification
0,0,,,,,,,,,,...,,,,,,,,,,
1,tt1361336,0.0,/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg,,50000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",https://www.tomandjerrymovie.com,587807.0,en,Tom & Jerry,...,132000000.0,101.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Best of enemies. Worst of friends.,Tom & Jerry,0.0,6.923,2164.0,PG
2,tt1361336,0.0,/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg,,50000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",https://www.tomandjerrymovie.com,587807.0,en,Tom & Jerry,...,132000000.0,101.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Best of enemies. Worst of friends.,Tom & Jerry,0.0,6.923,2164.0,PG
3,tt1361336,0.0,/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg,,50000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",https://www.tomandjerrymovie.com,587807.0,en,Tom & Jerry,...,132000000.0,101.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Best of enemies. Worst of friends.,Tom & Jerry,0.0,6.923,2164.0,PG
4,tt1361336,0.0,/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg,,50000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",https://www.tomandjerrymovie.com,587807.0,en,Tom & Jerry,...,132000000.0,101.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Best of enemies. Worst of friends.,Tom & Jerry,0.0,6.923,2164.0,PG
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1537,tt1361336,0.0,/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg,,50000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",https://www.tomandjerrymovie.com,587807.0,en,Tom & Jerry,...,132000000.0,101.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Best of enemies. Worst of friends.,Tom & Jerry,0.0,6.923,2164.0,PG
1538,tt1361336,0.0,/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg,,50000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",https://www.tomandjerrymovie.com,587807.0,en,Tom & Jerry,...,132000000.0,101.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Best of enemies. Worst of friends.,Tom & Jerry,0.0,6.923,2164.0,PG
1539,tt1361336,0.0,/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg,,50000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",https://www.tomandjerrymovie.com,587807.0,en,Tom & Jerry,...,132000000.0,101.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Best of enemies. Worst of friends.,Tom & Jerry,0.0,6.923,2164.0,PG
1540,tt1361336,0.0,/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg,,50000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",https://www.tomandjerrymovie.com,587807.0,en,Tom & Jerry,...,132000000.0,101.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Best of enemies. Worst of friends.,Tom & Jerry,0.0,6.923,2164.0,PG


In [50]:
with open('Data/tmdb_api_results_2000.json') as f:
    file_2000 = json.load(f)
## Display the keys of the loaded dict
file_2000

[{'imdb_id': 0},
 {'adult': False,
  'backdrop_path': '/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg',
  'belongs_to_collection': None,
  'budget': 50000000,
  'genres': [{'id': 35, 'name': 'Comedy'},
   {'id': 10751, 'name': 'Family'},
   {'id': 16, 'name': 'Animation'}],
  'homepage': 'https://www.tomandjerrymovie.com',
  'id': 587807,
  'imdb_id': 'tt1361336',
  'original_language': 'en',
  'original_title': 'Tom & Jerry',
  'overview': 'Tom the cat and Jerry the mouse get kicked out of their home and relocate to a fancy New York hotel, where a scrappy employee named Kayla will lose her job if she can’t evict Jerry before a high-class wedding at the hotel. Her solution? Hiring Tom to get rid of the pesky mouse.',
  'popularity': 62.527,
  'poster_path': '/8XZI9QZ7Pm3fVkigWJPbrXCMzjq.jpg',
  'production_companies': [{'id': 174,
    'logo_path': '/IuAlhI9eVC9Z8UQWOIDdWRKSEJ.png',
    'name': 'Warner Bros. Pictures',
    'origin_country': 'US'},
   {'id': 8922,
    'logo_path': '/yZWehAyjfKi4KvKeg1

In [53]:
data_2000 = pd.DataFrame(file_2000)

Unnamed: 0,imdb_id,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,original_language,original_title,...,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,certification
0,0,,,,,,,,,,...,,,,,,,,,,
1,tt1361336,False,/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg,,50000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",https://www.tomandjerrymovie.com,587807.0,en,Tom & Jerry,...,132000000.0,101.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Best of enemies. Worst of friends.,Tom & Jerry,False,6.923,2164.0,PG
2,tt1361336,False,/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg,,50000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",https://www.tomandjerrymovie.com,587807.0,en,Tom & Jerry,...,132000000.0,101.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Best of enemies. Worst of friends.,Tom & Jerry,False,6.923,2164.0,PG
3,tt1361336,False,/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg,,50000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",https://www.tomandjerrymovie.com,587807.0,en,Tom & Jerry,...,132000000.0,101.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Best of enemies. Worst of friends.,Tom & Jerry,False,6.923,2164.0,PG
4,tt1361336,False,/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg,,50000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",https://www.tomandjerrymovie.com,587807.0,en,Tom & Jerry,...,132000000.0,101.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Best of enemies. Worst of friends.,Tom & Jerry,False,6.923,2164.0,PG


In [58]:
data_2000.tail()

Unnamed: 0,imdb_id,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,original_language,original_title,...,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,certification
1422,tt1361336,False,/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg,,50000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",https://www.tomandjerrymovie.com,587807.0,en,Tom & Jerry,...,132000000.0,101.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Best of enemies. Worst of friends.,Tom & Jerry,False,6.923,2164.0,PG
1423,tt1361336,False,/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg,,50000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",https://www.tomandjerrymovie.com,587807.0,en,Tom & Jerry,...,132000000.0,101.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Best of enemies. Worst of friends.,Tom & Jerry,False,6.923,2164.0,PG
1424,tt1361336,False,/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg,,50000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",https://www.tomandjerrymovie.com,587807.0,en,Tom & Jerry,...,132000000.0,101.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Best of enemies. Worst of friends.,Tom & Jerry,False,6.923,2164.0,PG
1425,tt1361336,False,/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg,,50000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",https://www.tomandjerrymovie.com,587807.0,en,Tom & Jerry,...,132000000.0,101.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Best of enemies. Worst of friends.,Tom & Jerry,False,6.923,2164.0,PG
1426,tt1361336,False,/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg,,50000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",https://www.tomandjerrymovie.com,587807.0,en,Tom & Jerry,...,132000000.0,101.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Best of enemies. Worst of friends.,Tom & Jerry,False,6.923,2164.0,PG


In [59]:
with open('Data/tmdb_api_results_2001.json') as f:
    file_2001 = json.load(f)
## Display the keys of the loaded dict
file_2001

[{'imdb_id': 0},
 {'adult': False,
  'backdrop_path': '/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg',
  'belongs_to_collection': None,
  'budget': 50000000,
  'genres': [{'id': 35, 'name': 'Comedy'},
   {'id': 10751, 'name': 'Family'},
   {'id': 16, 'name': 'Animation'}],
  'homepage': 'https://www.tomandjerrymovie.com',
  'id': 587807,
  'imdb_id': 'tt1361336',
  'original_language': 'en',
  'original_title': 'Tom & Jerry',
  'overview': 'Tom the cat and Jerry the mouse get kicked out of their home and relocate to a fancy New York hotel, where a scrappy employee named Kayla will lose her job if she can’t evict Jerry before a high-class wedding at the hotel. Her solution? Hiring Tom to get rid of the pesky mouse.',
  'popularity': 62.527,
  'poster_path': '/8XZI9QZ7Pm3fVkigWJPbrXCMzjq.jpg',
  'production_companies': [{'id': 174,
    'logo_path': '/IuAlhI9eVC9Z8UQWOIDdWRKSEJ.png',
    'name': 'Warner Bros. Pictures',
    'origin_country': 'US'},
   {'id': 8922,
    'logo_path': '/yZWehAyjfKi4KvKeg1

In [60]:
data_2001 = pd.DataFrame(file_2001)
data_2001

Unnamed: 0,imdb_id,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,original_language,original_title,...,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,certification
0,0,,,,,,,,,,...,,,,,,,,,,
1,tt1361336,False,/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg,,50000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",https://www.tomandjerrymovie.com,587807.0,en,Tom & Jerry,...,132000000.0,101.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Best of enemies. Worst of friends.,Tom & Jerry,False,6.923,2164.0,PG
2,tt1361336,False,/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg,,50000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",https://www.tomandjerrymovie.com,587807.0,en,Tom & Jerry,...,132000000.0,101.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Best of enemies. Worst of friends.,Tom & Jerry,False,6.923,2164.0,PG
3,tt1361336,False,/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg,,50000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",https://www.tomandjerrymovie.com,587807.0,en,Tom & Jerry,...,132000000.0,101.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Best of enemies. Worst of friends.,Tom & Jerry,False,6.923,2164.0,PG
4,tt1361336,False,/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg,,50000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",https://www.tomandjerrymovie.com,587807.0,en,Tom & Jerry,...,132000000.0,101.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Best of enemies. Worst of friends.,Tom & Jerry,False,6.923,2164.0,PG
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1537,tt1361336,False,/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg,,50000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",https://www.tomandjerrymovie.com,587807.0,en,Tom & Jerry,...,132000000.0,101.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Best of enemies. Worst of friends.,Tom & Jerry,False,6.923,2164.0,PG
1538,tt1361336,False,/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg,,50000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",https://www.tomandjerrymovie.com,587807.0,en,Tom & Jerry,...,132000000.0,101.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Best of enemies. Worst of friends.,Tom & Jerry,False,6.923,2164.0,PG
1539,tt1361336,False,/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg,,50000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",https://www.tomandjerrymovie.com,587807.0,en,Tom & Jerry,...,132000000.0,101.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Best of enemies. Worst of friends.,Tom & Jerry,False,6.923,2164.0,PG
1540,tt1361336,False,/9ns9463dwOeo1CK1JU2wirL5Yi1.jpg,,50000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",https://www.tomandjerrymovie.com,587807.0,en,Tom & Jerry,...,132000000.0,101.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Best of enemies. Worst of friends.,Tom & Jerry,False,6.923,2164.0,PG
