In [1]:
import pandas as pd
import requests
import time
from tqdm import tqdm
from dotenv import load_dotenv
import os

In [14]:
load_dotenv()
TMDB_API_KEY = os.getenv("TMDB_API_KEY")

In [17]:
IMAGE_BASE_URL = "https://image.tmdb.org/t/p/w500"

In [18]:
def get_poster_url(title):
    search_url = "https://api.themoviedb.org/3/search/movie"
    params = {
        "api_key": TMDB_API_KEY,
        "query": title
    }

    try:
        response = requests.get(search_url, params=params)
        data = response.json()
        results = data.get("results")
        if results:
            poster_path = results[0].get("poster_path")
            if poster_path:
                return IMAGE_BASE_URL + poster_path
    except Exception as e:
        print(f"Error fetching poster for '{title}': {e}")
    return None

In [19]:
movies = pd.read_csv("../data/movies_cleaned_with_genres.csv")

tqdm.pandas(desc="Fetching posters")

movies["poster_url"] = movies["original_title"].progress_apply(get_poster_url)


Fetching posters: 100%|██████████| 9006/9006 [1:06:36<00:00,  2.25it/s]


In [21]:
movies["poster_url"][1]

'https://image.tmdb.org/t/p/w500/9cqNxx0GxF0bflZmeSMuL5tnGzr.jpg'

In [22]:
movies.to_csv("../data/movies_with_posters.csv", index=False)