# Imports

In [1]:
import csv
import os
from utils import get_id_list, get_data, write_file

from dotenv import load_dotenv
load_dotenv()

True

# Grabbing All Movie IDs

In [2]:
TMBD_API_KEY = os.getenv('TMBD_API_KEY')
YEARS = [2022, 2023]
CSV_HEADER = ['Title', 'Runtime (minutes)', 'Language', 'Overview',
              'Release Date', 'Genre', 'Keywords',
              'Actors', 'Director', 'Stream', 'Buy', 'Rent', 
              'Production Companies', 'Website']

# Writing all IDs to CSV file

In [3]:
for year in YEARS:
    # Grab list of ids for all films made in {YEAR}
    movie_list = list(set(get_id_list(TMBD_API_KEY, year)))
    print(movie_list)

    FILE_NAME = f'./data/{year}_movie_collection_data.csv'

    # Creating file
    with open(FILE_NAME, 'w') as f:
        writer = csv.writer(f)
        writer.writerow(CSV_HEADER)

    # Iterate through list of ids to get data
    for id in movie_list:
        dict = get_data(TMBD_API_KEY, id)
        write_file(FILE_NAME, dict)

['774825', '744276', '783675', '1015908', '505642', '790519', '997317', '601796', '879957', '801526', '810171', '725201', '545611', '406759', '717728', '642885', '436270', '965571', '682075', '985939', '799155', '453395', '335787', '661374', '610150', '942881', '507086', '836225', '779782', '539681', '624091', '556694', '872542', '675353', '76600', '829560', '949567', '760161', '930596', '785084', '1028703', '988591', '951546', '315162', '829557', '593643', '560057', '760104', '843380', '934194', '338953', '882598', '851644', '526896', '705996', '760741', '942012', '949697', '619979', '818893', '49046', '799379', '698508', '965150', '508947', '1006540', '616037', '585083', '752623', '916224', '361743', '1001835', '983526', '536554', '995906', '438148', '619803', '663712', '818647', '718930', '610411', '629542', '724495', '414906', '854006', '885303', '756999', '766507', '615777', '899112', '876792', '585511', '966220']
['467244', '1040148', '940551', '1075794', '838240', '1183905', '50

# Random Testing

In [None]:
import locale
import unicodedata
import requests
import time
import json
from iso639 import languages

max_retries = 5

def is_english(s):
    try:
        s.encode(encoding='utf-8').decode('ascii')
    except UnicodeDecodeError:
        return False
    else:
        return True 


# Test the function
Movie_ID = '545611'
# Create empty index
API_key = os.getenv('TMBD_API_KEY')
query = 'https://api.themoviedb.org/3/movie/' + Movie_ID + \
    '?api_key='+API_key +'&append_to_response=keywords,' + \
        'watch/providers,credits&language=en-US'

response = requests.get(query)
for i in range(max_retries):
    if response.status_code == 429:
        # If the response was a 429, wait and then try again
        print(
            f"Request limit reached. Waiting and retrying ({i+1}/{
                max_retries})")
        time.sleep(2 ** i)  # Exponential backoff
    else:
        dict = response.json()

# Pretty print the JSON data
pretty_json = json.dumps(dict, indent=4)

# print(pretty_json)

title = dict['title']
runtime = str(dict['runtime']) + " minutes"
language_code = dict['original_language']
release_date = dict['release_date']
overview = dict['overview']
all_genres = dict['genres']
website = 'movie_collection_data.csv' if dict['homepage'] == '' else dict['homepage']
prod_companies = dict['production_companies']

# Converting language
full_name = languages.get(alpha2=language_code).name

# Parsing genres
genre_str = ""
for genre in all_genres:
    genre_str += genre['name'] + ", "
genre_str = genre_str[:-2]

# Parsing keywords
all_keywords = dict['keywords']['keywords']
keyword_str = ""
for keyword in all_keywords:
    if is_english(keyword['name']):
        keyword_str += keyword['name'] + ", "
if keyword_str == "":
    keyword_str = "None" 
else:
    keyword_str = keyword_str[:-2]
print(keyword_str)

# Parsing watch providers
watch_providers = dict['watch/providers']['results']
stream_str, buy_str, rent_str = "", "", ""
if 'US' in watch_providers:
    watch_providers = watch_providers['US']
    provider_strings = ['flatrate', 'buy', 'rent']
    for string in provider_strings:
        if string not in watch_providers:
            continue

        _str = ""

        for element in watch_providers[string]:
            _str += element['provider_name'] + ", "
        _str = _str[:-2] + " "

        if string == 'flatrate':
            stream_str += _str
        elif string == 'buy':
            buy_str += _str
        else:
            rent_str += _str


credits = dict['credits']
actor_list, director_list = [], []

# Parsing cast
cast = credits['cast']
NUM_ACTORS = 5
for member in cast[:NUM_ACTORS]:
    actor_list.append(member["name"])

# Parsing crew
crew = credits['crew']
for member in crew:
    if member['job'] == 'Director':
        director_list.append(member["name"])

actor_str = ', '.join(list(set(actor_list)))
director_str = ', '.join(list(set(director_list)))

print(f"Actors: {actor_str}")
print(f"Director: {director_str}")

# Parsing production companies
prod_str = ""
for company in prod_companies:
    prod_str += company['name'] + ", "
prod_str = prod_str[:-2]
print(prod_str)