# Imports

In [9]:
import csv
import os
from utils import get_id_list, get_data, write_file

from dotenv import load_dotenv
load_dotenv()

True

# Grabbing All Movie IDs

In [10]:
TMBD_API_KEY = os.getenv('TMBD_API_KEY')
YEARS = [2022, 2023]
CSV_HEADER = ['Title', 'Runtime (minutes)', 'Language', 'Overview',
              'Release Date', 'Genre', 'Keywords',
              'Recommendation', 'Actors', 'Director', 
              'Stream', 'Buy', 'Rent', 'Production Companies', 'Website']

# Writing all IDs to CSV file

In [11]:
for year in YEARS:
    # Grab list of ids for all films made in {YEAR}
    movie_list = list(set(get_id_list(TMBD_API_KEY, year)))
    print(movie_list)

    FILE_NAME = f'./data/{year}_movie_collection_data.csv'

    # Creating file
    with open(FILE_NAME, 'w') as f:
        writer = csv.writer(f)
        writer.writerow(CSV_HEADER)

    # Iterate through list of ids to get data
    for id in movie_list:
        dict = get_data(TMBD_API_KEY, id)
        write_file(FILE_NAME, dict)

['951546', '785084', '851644', '1028703', '829560', '610150', '818647', '942012', '744276', '619979', '949697', '718930', '810171', '804095', '628900', '760741', '833326', '508947', '965571', '616820', '836225', '763285', '930596', '916224', '934194', '615777', '762504', '545611', '606870', '1015908', '453395', '414906', '985939', '436270', '829557', '338947', '539681', '783675', '818893', '823625', '646385', '799379', '338953', '885303', '616037', '779782', '843380', '507086', '899112', '760161', '931954', '766507', '585511', '752623', '338958', '361743', '879957', '774825', '965150', '882598', '438148', '675353', '629542', '76600', '526896', '315162', '799155', '949423', '872542', '966220', '756999', '536554', '335787', '601796', '717728', '610411', '505642', '949567', '49046', '983507', '900667', '760104', '593643', '663712', '893712', '705996', '718789', '624091', '995906', '615173', '1140233', '942881', '585083']
['753342', '899445', '893723', '1029575', '1010581', '982940', '8483

# Random Testing

In [12]:
import unicodedata
import requests
import time
import json

max_retries = 5

def is_english(s):
    try:
        s.encode(encoding='utf-8').decode('ascii')
    except UnicodeDecodeError:
        return False
    else:
        return True


# Test the function
Movie_ID = '545611'
# Create empty index
API_key = os.getenv('TMBD_API_KEY')
query = 'https://api.themoviedb.org/3/movie/' + Movie_ID + \
    '?api_key='+API_key +'&append_to_response=keywords,recommendations,' + \
        'watch/providers,credits&language=en-US'

response = requests.get(query)
for i in range(max_retries):
    if response.status_code == 429:
        # If the response was a 429, wait and then try again
        print(
            f"Request limit reached. Waiting and retrying ({i+1}/{
                max_retries})")
        time.sleep(2 ** i)  # Exponential backoff
    else:
        dict = response.json()

# Pretty print the JSON data
pretty_json = json.dumps(dict, indent=4)

# print(pretty_json)

title = dict['title']
runtime = str(dict['runtime']) + " minutes"
language = dict['original_language']
release_date = dict['release_date']
overview = dict['overview']
all_genres = dict['genres']
website = 'movie_collection_data.csv' if dict['homepage'] == '' else dict['homepage']
prod_companies = dict['production_companies']

# Parsing genres
genre_str = ""
for genre in all_genres:
    genre_str += genre['name'] + ", "
genre_str = genre_str[:-2]

# Parsing keywords
all_keywords = dict['keywords']['keywords']
keyword_str = ""
for keyword in all_keywords:
    if is_english(keyword['name']):
        keyword_str += keyword['name'] + ", "
if keyword_str == "":
    keyword_str = "None" 
else:
    keyword_str = keyword_str[:-2]
print(keyword_str)

# Parsing recommendations
recommendations = dict['recommendations']['results']
recommendation_str = ""
for recommendation in recommendations:
    recommendation_str += recommendation['title'] + ", "
recommendation_str = recommendation_str[:-2]

# Parsing watch providers
watch_providers = dict['watch/providers']['results']
stream_str, buy_str, rent_str = "", "", ""
if 'US' in watch_providers:
    watch_providers = watch_providers['US']
    provider_strings = ['flatrate', 'buy', 'rent']
    for string in provider_strings:
        if string not in watch_providers:
            continue

        _str = ""

        for element in watch_providers[string]:
            _str += element['provider_name'] + ", "
        _str = _str[:-2] + " "

        if string == 'flatrate':
            stream_str += _str
        elif string == 'buy':
            buy_str += _str
        else:
            rent_str += _str


credits = dict['credits']
actor_list, director_list = [], []

# Parsing cast
cast = credits['cast']
NUM_ACTORS = 5
for member in cast[:NUM_ACTORS]:
    actor_list.append(member["name"])

# Parsing crew
crew = credits['crew']
for member in crew:
    if member['job'] == 'Director':
        director_list.append(member["name"])

actor_str = ', '.join(list(set(actor_list)))
director_str = ', '.join(list(set(director_list)))

print(f"Actors: {actor_str}")
print(f"Director: {director_str}")

# Parsing production companies
prod_str = ""
for company in prod_companies:
    prod_str += company['name'] + ", "
prod_str = prod_str[:-2]
print(prod_str)

mother, martial arts, kung fu, philosophy, generations conflict, chinese woman, laundromat, chinese, east asian lead, divorce, family, hot dog, asian woman, chinese immigrant, mother daughter relationship, asian american, intergenerational trauma, internal revenue service
Actors: Michelle Yeoh, Stephanie Hsu, Ke Huy Quan, James Hong, Jamie Lee Curtis
Director: Daniel Scheinert, Daniel Kwan
A24, IAC Films, AGBO, Ley Line Entertainment, TPC, Year of the Rat
