In [14]:
import requests
import pandas as pd
import time
from dotenv import load_dotenv
import os

load_dotenv()  # reads the .env file

API_KEY = os.getenv("TMDB_API_KEY")  
BASE_URL = "https://api.themoviedb.org/3"

rotten_df = pd.read_csv("datasets/movie_info.csv")
movies = []

for i, title in enumerate(rotten_df["title"]):
    if len(movies) >= 700:  # only need 500 movies
        break

    clean_title = title.strip().lower()
    response = requests.get(f"{BASE_URL}/search/movie", params={
        "api_key": API_KEY,
        "query": clean_title,
        "language": "en-US"
    })
    results = response.json().get("results", [])

    if results:
        movies.append(results[0])
        print(f"[{len(movies)}/700] Found: {title}")  # only prints on success
    else:
        print(f"  Skipped (no match): {title}")       # see misses

    time.sleep(0.25)

print(f"\nDone! Fetched {len(movies)} movies.")

df = pd.DataFrame(movies)
df = df[["id", "title", "release_date", "vote_average", "vote_count", "popularity", "overview"]]
df.to_csv("datasets/tmdb_movies.csv", index=False)

print(f"Fetched {len(df)} movies")

[1/700] Found: Love Story
[2/700] Found: Airport
[3/700] Found: M*A*S*H
[4/700] Found: Patton
[5/700] Found: The Aristocats
[6/700] Found: The Aristocats
[7/700] Found: Woodstock
[8/700] Found: Little Big Man
[9/700] Found: Ryan's Daughter
[10/700] Found: Tora! Tora! Tora!
[11/700] Found: Catch-22
[12/700] Found: Scream and Scream Again
[13/700] Found: Jenny
[14/700] Found: ... Tick... Tick... Tick...
[15/700] Found: Beyond the Dunwich Horror
[16/700] Found: The Dunwich Horror
[17/700] Found: Days and Nights in the Forest
[18/700] Found: The Only Game in Town
[19/700] Found: Rider on the Rain
[20/700] Found: M*A*S*H
[21/700] Found: The Molly Maguires
[22/700] Found: The Honeymoon Killers
[23/700] Found: Start the Revolution Without Me
[24/700] Found: Patton
[25/700] Found: Zabriskie Point
[26/700] Found: End of the Road
[27/700] Found: King of the Grizzlies
[28/700] Found: The Bird With the Crystal Plumage
[29/700] Found: Hercules in New York
[30/700] Found: The Butcher
[31/700] Found: