#### Data Prepration

In [1]:
import requests
import pandas as pd
from tqdm import tqdm

In [2]:
# API keys and base URLs
API_KEY = "8265bd1679663a7ea12ac168da84d2e8"
MOVIE_URL = "https://api.themoviedb.org/3/movie/top_rated"
GENRE_URL = "https://api.themoviedb.org/3/genre/movie/list"

In [3]:
# 1. Get Genre Mapping
genre_response = requests.get(GENRE_URL, 
                              params={"api_key": API_KEY, 
                                      "language": "en-US"})
genres_data = genre_response.json()["genres"]
genre_map = {g["id"]: g["name"] for g in genres_data}

In [4]:
genre_map

{28: 'Action',
 12: 'Adventure',
 16: 'Animation',
 35: 'Comedy',
 80: 'Crime',
 99: 'Documentary',
 18: 'Drama',
 10751: 'Family',
 14: 'Fantasy',
 36: 'History',
 27: 'Horror',
 10402: 'Music',
 9648: 'Mystery',
 10749: 'Romance',
 878: 'Science Fiction',
 10770: 'TV Movie',
 53: 'Thriller',
 10752: 'War',
 37: 'Western'}

In [5]:
# 2. Fetch Movie Data from All Pages
movies = []

for page in tqdm(range(1, 472)):  # Pages 1 to 471
    response = requests.get(MOVIE_URL, params={
        "api_key": API_KEY,
        "language": "en-US",
        "page": page
    })
    if response.status_code == 200: # process the API response safely if 404 is an error
        page_data = response.json().get("results", [])
        for movie in page_data:
            # Map genre_ids to genre names
            genres = [genre_map.get(gid, "Unknown") for gid in movie.get("genre_ids", [])]
            movies.append({
                "id": movie["id"],
                "title": movie["title"],
                "overview": movie["overview"],
                "rating": movie["vote_average"],
                "genre_names": genres
            })
    else:
        print(f"Failed to fetch page {page}")

100%|██████████| 471/471 [05:57<00:00,  1.32it/s]


In [7]:
# 3. Convert to DataFrame
df = pd.DataFrame(movies)

# 4. Preview
df.head()

Unnamed: 0,id,title,overview,rating,genre_names
0,278,The Shawshank Redemption,Imprisoned in the 1940s for the double murder ...,8.7,"[Drama, Crime]"
1,238,The Godfather,"Spanning the years 1945 to 1955, a chronicle o...",8.688,"[Drama, Crime]"
2,803796,KPop Demon Hunters,"When K-pop superstars Rumi, Mira and Zoey aren...",8.599,"[Animation, Fantasy, Action, Comedy, Music, Fa..."
3,240,The Godfather Part II,In the continuing saga of the Corleone crime f...,8.571,"[Drama, Crime]"
4,424,Schindler's List,The true story of how businessman Oskar Schind...,8.6,"[Drama, History, War]"


In [8]:
df.shape

(9420, 5)