# Imports

In [1]:
import csv
import os
from utils import get_id_list, get_data, write_file
import json 

from dotenv import load_dotenv
load_dotenv()

with open('./config.json') as f:
    config = json.load(f)

# Grabbing All Movie IDs

In [2]:
TMBD_API_KEY = os.getenv('TMBD_API_KEY')

YEARS = range(config["years"][0], config["years"][-1]+1)
CSV_HEADER = ['Title', 'Runtime (minutes)', 'Language', 'Overview',
              'Release Year', 'Genre', 'Keywords',
              'Actors', 'Directors', 'Stream', 'Buy', 'Rent', 
              'Production Companies']

# Writing all IDs to CSV file

In [3]:
for year in YEARS:
    # Grab list of ids for all films made in {YEAR}
    movie_list = list(set(get_id_list(TMBD_API_KEY, year)))

    FILE_NAME = f'./data/{year}_movie_collection_data.csv'

    # Creating file
    with open(FILE_NAME, 'w') as f:
        writer = csv.writer(f)
        writer.writerow(CSV_HEADER)

    # Iterate through list of ids to get data
    for id in movie_list:
        dict = get_data(TMBD_API_KEY, id)
        write_file(FILE_NAME, dict)

{'status_code': 7, 'status_message': 'Invalid API key: You must be granted a valid key.', 'success': False}


KeyError: 'results'

# Random Testing

In [7]:
import requests
import time
import json
from iso639 import languages

max_retries = 5

# Test the function
Movie_ID = '545611'
# Create empty index
API_key = os.getenv('TMBD_API_KEY')
query = 'https://api.themoviedb.org/3/movie/' + Movie_ID + \
    '?api_key='+API_key +'&append_to_response=keywords,' + \
        'watch/providers,credits&language=en-US'

response = requests.get(query)
for i in range(max_retries):
    if response.status_code == 429:
        # If the response was a 429, wait and then try again
        print(
            f"Request limit reached. Waiting and retrying ({i+1}/{
                max_retries})")
        time.sleep(2 ** i)  # Exponential backoff
    else:
        dict = response.json()

# Pretty print the JSON data
pretty_json = json.dumps(dict, indent=4)

# print(pretty_json)

def is_english(s):
    try:
        s.encode(encoding='utf-8').decode('ascii')
    except UnicodeDecodeError:
        return False
    else:
        return True

title = dict['title']
runtime = str(dict['runtime']) + " minutes"
language_code = dict['original_language']
release_date = dict['release_date']
overview = dict['overview']
all_genres = dict['genres']
website = 'movie_collection_data.csv' if dict['homepage'] == '' else dict['homepage']
prod_companies = dict['production_companies']

# Parsing release date
release_year = release_date.split('-')[0]

# Converting language
if language_code != 'xx':
    language = languages.get(alpha2=language_code).name
else:
    language = 'Unknown'

# Parsing genres
genre_str = ""
for genre in all_genres:
    genre_str += genre['name'] + ", "
genre_str = genre_str[:-2]

# Parsing keywords
all_keywords = dict['keywords']['keywords']
keyword_str = ""
for keyword in all_keywords:
    if is_english(keyword['name']):
        keyword_str += keyword['name'] + ", "
if keyword_str == "":
    keyword_str = "None" 
else:
    keyword_str = keyword_str[:-2]
print(keyword_str)

# Parsing watch providers
watch_providers = dict['watch/providers']['results']
stream_str, buy_str, rent_str = "", "", ""
if 'US' in watch_providers:
    watch_providers = watch_providers['US']
    provider_strings = ['flatrate', 'buy', 'rent']
    for string in provider_strings:
        if string not in watch_providers:
            continue

        _str = ""

        for element in watch_providers[string]:
            _str += element['provider_name'] + ", "
        _str = _str[:-2] + " "

        if string == 'flatrate':
            stream_str += _str
        elif string == 'buy':
            buy_str += _str
        else:
            rent_str += _str


credits = dict['credits']
actor_list, director_list = [], []

# Parsing cast
cast = credits['cast']
NUM_ACTORS = 5
for member in cast[:NUM_ACTORS]:
    actor_list.append(member["name"])

# Parsing crew
crew = credits['crew']
for member in crew:
    if member['job'] == 'Director':
        director_list.append(member["name"])

actor_str = ', '.join(list(set(actor_list)))
director_str = ', '.join(list(set(director_list)))

print(f"Actors: {actor_str}")
print(f"Director: {director_str}")

# Parsing production companies
prod_str = ""
for company in prod_companies:
    prod_str += company['name'] + ", "
prod_str = prod_str[:-2]
print(prod_str)

2022
mother, martial arts, kung fu, philosophy, generations conflict, chinese woman, laundromat, chinese, east asian lead, divorce, family, hot dog, asian woman, chinese immigrant, mother daughter relationship, asian american, intergenerational trauma, internal revenue service
Actors: James Hong, Michelle Yeoh, Ke Huy Quan, Stephanie Hsu, Jamie Lee Curtis
Director: Daniel Kwan, Daniel Scheinert
A24, IAC Films, AGBO, Ley Line Entertainment, TPC, Year of the Rat
