# Imports

In [1]:
import csv
import os
from utils import get_id_list, get_data, write_file

from dotenv import load_dotenv
load_dotenv()

True

# Grabbing All Movie IDs

In [2]:
TMBD_API_KEY = os.getenv('TMBD_API_KEY')
YEARS = [2022, 2023]
CSV_HEADER = ['Title', 'Runtime', 'Language', 'Overview',
              'Release Date', 'Genre', 'Keyword',
              'Recommendation', 'Cast', 'Crew', 
              'Stream', 'Buy', 'Rent']

# Writing all IDs to CSV file

In [3]:
for year in YEARS:
    # Grab list of ids for all films made in {YEAR}
    movie_list = get_id_list(TMBD_API_KEY, year)
    print(movie_list)

    FILE_NAME = f'./data/{year}_movie_collection_data.csv'

    # Creating file
    with open(FILE_NAME, 'w') as f:
        writer = csv.writer(f)
        writer.writerow(CSV_HEADER)

    # Iterate through list of ids to get data
    for id in movie_list:
        text = get_data(TMBD_API_KEY, id)
        write_file(FILE_NAME, text)

['885303', '1028703', '872542', '799155', '76600', '965571', '843380', '315162', '949567', '949697', '951546', '361743', '624091', '610150', '934194', '508947', '414906', '507086', '942012', '505642', '436270', '818893', '818647', '851644', '545611', '942012', '438148', '453395', '616037', '829557', '916224', '406759', '829560', '585083', '985939', '995906', '766507', '610411', '585511', '983507', '718930', '879957', '779782', '783675', '1001835', '545611', '1015908', '774825', '966220', '615777', '888001', '836225', '942881', '818893', '661374', '856433', '663712', '760104', '988591', '900667', '744276', '833326', '335787', '799379', '619979', '615173', '937895', '864101', '606870', '1020969', '49046', '601796', '854006', '717728', '646385', '536554', '830784', '629542', '876792', '539681', '629542', '648579', '1140233', '882598', '518896', '717728', '752623', '760741', '698508', '338958', '646385', '893712', '945675', '1020969', '539681', '560057', '457232', '830784', '829280', '6291

# Random Testing

In [5]:
import requests
import time
import json

max_retries = 5

Movie_ID = '885303'
# Create empty index
API_key = os.getenv('TMBD_API_KEY')
query = 'https://api.themoviedb.org/3/movie/' + Movie_ID + \
    '?api_key='+API_key +'&append_to_response=keywords,recommendations,' + \
        'watch/providers,credits&language=en-US'

response = requests.get(query)
for i in range(max_retries):
    if response.status_code == 429:
        # If the response was a 429, wait and then try again
        print(
            f"Request limit reached. Waiting and retrying ({i+1}/{
                max_retries})")
        time.sleep(2 ** i)  # Exponential backoff
    else:
        dict = response.json()

# Pretty print the JSON data
pretty_json = json.dumps(dict, indent=4)

print(pretty_json)

title = dict['title']
runtime = str(dict['runtime']) + " minutes"
language = dict['original_language']
release_date = dict['release_date']
overview = dict['overview']
all_genres = dict['genres']

# Parsing genres
genre_str = ""
for genre in all_genres:
    genre_str += genre['name'] + ", "
genre_str = genre_str[:-2]

# Parsing keywords
all_keywords = dict['keywords']['keywords']
keyword_str = ""
for keyword in all_keywords:
    keyword_str += keyword['name'] + ", "

keyword_str = keyword_str[:-2]

# Parsing recommendations
recommendations = dict['recommendations']['results']
recommendation_str = ""
for recommendation in recommendations:
    recommendation_str += recommendation['title'] + ", "
recommendation_str = recommendation_str[:-2]

# Parsing watch providers
watch_providers = dict['watch/providers']['results']
stream_str, buy_str, rent_str = "", "", ""
if 'US' in watch_providers:
    watch_providers = watch_providers['US']
    provider_strings = ['flatrate', 'buy', 'rent']
    for string in provider_strings:
        if string not in watch_providers:
            continue

        _str = ""

        for element in watch_providers[string]:
            _str += element['provider_name'] + ", "
        _str = _str[:-2] + " "

        if string == 'flatrate':
            stream_str += _str
        elif string == 'buy':
            buy_str += _str
        else:
            rent_str += _str


credits = dict['credits']
cast_list, crew_list = [], []

# Parsing cast
cast = credits['cast']
for member in cast:
    cast_list.append(member["name"])

# Parsing crew
crew = credits['crew']
for member in crew:
    crew_list.append(member["name"])

cast_str = ', '.join(list(set(cast_list)))
crew_str = ', '.join(list(set(crew_list)))

print(f"cast: {cast_str}")
print(f"crew: {crew_str}")

{
    "adult": false,
    "backdrop_path": "/xStFwzqBWDRyUz3BldisuUY7kjW.jpg",
    "belongs_to_collection": null,
    "budget": 0,
    "genres": [
        {
            "id": 14,
            "name": "Fantasy"
        },
        {
            "id": 12,
            "name": "Adventure"
        },
        {
            "id": 10770,
            "name": "TV Movie"
        }
    ],
    "homepage": "",
    "id": 885303,
    "imdb_id": "tt15426722",
    "original_language": "it",
    "original_title": "I viaggiatori",
    "overview": "Time jump to 1939 with teenager Max and his friends as they try to navigate Mussolini's Rome and find Max's missing brother.",
    "popularity": 462.769,
    "poster_path": "/38N7GRJLu7IWyEdil96YobJ5LWM.jpg",
    "production_companies": [
        {
            "id": 86177,
            "logo_path": "/n3bX5SOkpXwL6xOCetwlCDIUF1p.png",
            "name": "Groenlandia",
            "origin_country": "IT"
        },
        {
            "id": 179289,
            "log