In [1]:
import requests
import time
import requests.exceptions as RequestException
import pandas as pd
import os
from dotenv import load_dotenv

load_dotenv()

API_KEY = os.getenv("TMDB_API_KEY")
BASE_URL = "https://api.themoviedb.org/3"

def get_movie_ids(start_page=1, end_page=501):

  all_ids = []

  for page in range(start_page, end_page):
    try:
      url = f"{BASE_URL}/discover/movie"
      params = {
        "api_key": API_KEY,
        "language": "en-US",
        "sort": "popularity.desc",
        "page": page
      }

      response = requests.get(url, params=params)

      if response.status_code != 200:
        print(f"Page {page} returned {response.status_code}. Stopping.")
        break
      data = response.json()

      if "results" not in data:
        print(f"No results key on page {page}")
        print(data)
        break

      ids = [movie["id"] for movie in data["results"]]
      all_ids.extend(ids)

      print(f"Collected page {page}")
      time.sleep(0.25)  # To respect API rate limits

      id_series = pd.DataFrame({"id": list(range(len(all_ids))), "Movie_id": all_ids})
      id_series.to_csv("../data/movie_ids.csv", index=False)

      print(f"Total IDs collected so far: {len(all_ids)}")
    
    except RequestException as e:
      print(f"Error fetching page {page}: {e}")
      print("Stopping program")
      break

  return len(all_ids)



In [2]:
if __name__ == "__main__":
    total = get_movie_ids()
    print(f"Final total IDs collected: {total}")

Collected page 1
Total IDs collected so far: 20
Collected page 2
Total IDs collected so far: 40
Collected page 3
Total IDs collected so far: 60
Collected page 4
Total IDs collected so far: 80
Collected page 5
Total IDs collected so far: 100
Collected page 6
Total IDs collected so far: 120
Collected page 7
Total IDs collected so far: 140
Collected page 8
Total IDs collected so far: 160
Collected page 9
Total IDs collected so far: 180
Collected page 10
Total IDs collected so far: 200
Collected page 11
Total IDs collected so far: 220
Collected page 12
Total IDs collected so far: 240
Collected page 13
Total IDs collected so far: 260
Collected page 14
Total IDs collected so far: 280
Collected page 15
Total IDs collected so far: 300
Collected page 16
Total IDs collected so far: 320
Collected page 17
Total IDs collected so far: 340
Collected page 18
Total IDs collected so far: 360
Collected page 19
Total IDs collected so far: 380
Collected page 20
Total IDs collected so far: 400
Collected pag