In [57]:
import requests
import pandas as pd
import time
API_KEY = ''  # replace with your actual API key
BASE_URL = 'https://api.themoviedb.org/3'
base_image_url = 'https://image.tmdb.org/t/p/w500'

In [15]:
movie_id = 27205  # Inception

url = f'{BASE_URL}/movie/{movie_id}'
params = {
    'api_key': API_KEY,
    'language': 'en-US'
}

print("Requesting:", url)
response = requests.get(url, params=params)
print("Status Code:", response.status_code)

if response.status_code == 200:
    movie = response.json()
    print("Title:", movie['title'])
    print("Release Date:", movie['release_date'])
    print("Overview:", movie['overview'])
else:
    print("Error:", response.status_code)
    print("Full response:", response.text)

Requesting: https://api.themoviedb.org/3/movie/27205
Status Code: 200
Title: Inception
Release Date: 2010-07-15
Overview: Cobb, a skilled thief who commits corporate espionage by infiltrating the subconscious of his targets is offered a chance to regain his old life as payment for a task considered to be impossible: "inception", the implantation of another person's idea into a target's subconscious.


In [17]:
def get_movie_details(movie_id):
    url = f"{BASE_URL}/movie/{movie_id}"
    params = {'api_key': API_KEY, 'language': 'en-US'}
    response = requests.get(url, params=params)
    return response.json() if response.status_code == 200 else None

In [19]:
def get_movie_credits(movie_id):
    url = f"{BASE_URL}/movie/{movie_id}/credits"
    params = {'api_key': API_KEY}
    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.json()
        cast = [member['name'] for member in data['cast'][:5]]  # Top 5 cast
        director = next((c['name'] for c in data['crew'] if c['job'] == 'Director'), None)
        return cast, director
    return [], None

In [21]:
def get_movie_keywords(movie_id):
    url = f"{BASE_URL}/movie/{movie_id}/keywords"
    params = {'api_key': API_KEY}
    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.json()
        return [k['name'] for k in data.get('keywords', [])]
    return []

In [23]:
movie_ids = [27205, 157336, 155, 299534, 603]  # Inception, Interstellar, Dark Knight, Endgame, The Matrix

In [25]:
movie_data = []

for movie_id in movie_ids:
    print(f"Fetching ID {movie_id}...")
    details = get_movie_details(movie_id)
    if not details:
        continue

    cast, director = get_movie_credits(movie_id)
    keywords = get_movie_keywords(movie_id)

    movie = {
        'id': details['id'],
        'title': details['title'],
        'overview': details.get('overview'),
        'genres': [g['name'] for g in details.get('genres', [])],
        'vote_average': details.get('vote_average'),
        'vote_count': details.get('vote_count'),
        'poster_path': details.get('poster_path'),
        'release_date': details.get('release_date'),
        'original_language': details.get('original_language'),
        'popularity': details.get('popularity'),
        'adult': details.get('adult'),
        'runtime': details.get('runtime'),
        'budget': details.get('budget'),
        'revenue': details.get('revenue'),
        'cast': cast,
        'director': director,
        'keywords': keywords
    }

    movie_data.append(movie)
    time.sleep(0.25)  # Be kind to the API

Fetching ID 27205...
Fetching ID 157336...
Fetching ID 155...
Fetching ID 299534...
Fetching ID 603...


In [41]:
df = pd.DataFrame(movie_data)

In [43]:
base_image_url = 'https://image.tmdb.org/t/p/w500'
df['poster_url'] = df['poster_path'].apply(lambda path: base_image_url + path if pd.notnull(path) else None)

In [45]:
df.head()

Unnamed: 0,id,title,overview,genres,vote_average,vote_count,poster_path,release_date,original_language,popularity,adult,runtime,budget,revenue,cast,director,keywords,poster_url
0,27205,Inception,"Cobb, a skilled thief who commits corporate es...","[Action, Science Fiction, Adventure]",8.369,37298,/ljsZTbVsrQSqZgWeep2B1QiDKuh.jpg,2010-07-15,en,27.8694,False,148,160000000,839030630,"[Leonardo DiCaprio, Joseph Gordon-Levitt, Ken ...",Christopher Nolan,"[rescue, mission, dreams, airplane, paris, fra...",https://image.tmdb.org/t/p/w500/ljsZTbVsrQSqZg...
1,157336,Interstellar,The adventures of a group of explorers who mak...,"[Adventure, Drama, Science Fiction]",8.453,36880,/gEU2QniE6E77NI6lCU6MxlNBvIx.jpg,2014-11-05,en,46.0951,False,169,165000000,746606706,"[Matthew McConaughey, Anne Hathaway, Michael C...",Christopher Nolan,"[rescue, future, spacecraft, race against time...",https://image.tmdb.org/t/p/w500/gEU2QniE6E77NI...
2,155,The Dark Knight,Batman raises the stakes in his war on crime. ...,"[Drama, Action, Crime, Thriller]",8.519,33667,/qJ2tW6WMUDux911r6m7haRef0WH.jpg,2008-07-16,en,31.0077,False,152,185000000,1004558444,"[Christian Bale, Heath Ledger, Aaron Eckhart, ...",Christopher Nolan,"[joker, sadism, chaos, secret identity, crime ...",https://image.tmdb.org/t/p/w500/qJ2tW6WMUDux91...
3,299534,Avengers: Endgame,After the devastating events of Avengers: Infi...,"[Adventure, Science Fiction, Action]",8.238,26202,/ulzhLuWrPK07P1YkdWQLZnQh1JL.jpg,2019-04-24,en,22.6136,False,181,356000000,2799439100,"[Robert Downey Jr., Chris Evans, Mark Ruffalo,...",Joe Russo,"[superhero, time travel, space travel, time ma...",https://image.tmdb.org/t/p/w500/ulzhLuWrPK07P1...
4,603,The Matrix,"Set in the 22nd century, The Matrix tells the ...","[Action, Science Fiction]",8.2,26231,/dXNAPwY7VrqMAo51EKhhCJfaGb5.jpg,1999-03-31,en,28.7297,False,136,63000000,463517383,"[Keanu Reeves, Laurence Fishburne, Carrie-Anne...",Lana Wachowski,"[man vs machine, martial arts, dreams, artific...",https://image.tmdb.org/t/p/w500/dXNAPwY7VrqMAo...


In [59]:
def discover_movies(page=1, release_from='2000-01-01', release_to='2025-12-31'):
    url = f"{BASE_URL}/discover/movie"
    params = {
        'api_key': API_KEY,
        'language': 'en-US',
        'sort_by': 'popularity.desc',
        'include_adult': False,
        'include_video': False,
        'page': page,
        'release_date.gte': release_from,
        'release_date.lte': release_to,
        'vote_count.gte': 100
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        return response.json()['results']
    else:
        print("Error:", response.status_code, response.text)
        return []

In [61]:
def get_movie_details(movie_id):
    url = f"{BASE_URL}/movie/{movie_id}"
    params = {'api_key': API_KEY, 'language': 'en-US'}
    response = requests.get(url, params=params)
    return response.json() if response.status_code == 200 else None

def get_movie_credits(movie_id):
    url = f"{BASE_URL}/movie/{movie_id}/credits"
    params = {'api_key': API_KEY}
    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.json()
        cast = [member['name'] for member in data['cast'][:5]]
        director = next((c['name'] for c in data['crew'] if c['job'] == 'Director'), None)
        return cast, director
    return [], None

def get_movie_keywords(movie_id):
    url = f"{BASE_URL}/movie/{movie_id}/keywords"
    params = {'api_key': API_KEY}
    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.json()
        return [k['name'] for k in data.get('keywords', [])]
    return []

In [63]:
all_movies_raw = []
for page in range(1, 6):  # Adjust page range for more movies (up to 500)
    print(f"Fetching page {page}...")
    results = discover_movies(page=page)
    all_movies_raw.extend(results)
    time.sleep(0.25)

Fetching page 1...
Fetching page 2...
Fetching page 3...
Fetching page 4...
Fetching page 5...


In [65]:
def enrich_movie_data(movie):
    movie_id = movie['id']
    print(f"Enriching: {movie['title']} ({movie_id})")

    details = get_movie_details(movie_id)
    cast, director = get_movie_credits(movie_id)
    keywords = get_movie_keywords(movie_id)

    return {
        'id': movie['id'],
        'title': movie['title'],
        'overview': movie.get('overview'),
        'genres': [g['name'] for g in details.get('genres', [])] if details else [],
        'vote_average': movie.get('vote_average'),
        'vote_count': movie.get('vote_count'),
        'release_date': movie.get('release_date'),
        'original_language': movie.get('original_language'),
        'popularity': movie.get('popularity'),
        'adult': movie.get('adult'),
        'runtime': details.get('runtime') if details else None,
        'budget': details.get('budget') if details else None,
        'revenue': details.get('revenue') if details else None,
        'cast': cast,
        'director': director,
        'keywords': keywords,
        'poster_url': base_image_url + movie['poster_path'] if movie.get('poster_path') else None
    }

In [67]:
enriched_movies = []
for movie in all_movies_raw:
    enriched = enrich_movie_data(movie)
    enriched_movies.append(enriched)
    time.sleep(0.25)

Enriching: A Minecraft Movie (950387)
Enriching: Cleaner (1125899)
Enriching: Captain America: Brave New World (822119)
Enriching: A Working Man (1197306)
Enriching: Ask Me What You Want (1252309)
Enriching: Mickey 17 (696506)
Enriching: Moana 2 (1241982)
Enriching: Mufasa: The Lion King (762509)
Enriching: Black Bag (1233575)
Enriching: Sonic the Hedgehog 3 (939243)
Enriching: The Life List (1254786)
Enriching: Flight Risk (1126166)
Enriching: Snow White (447273)
Enriching: Batman Ninja vs. Yakuza League (1297763)
Enriching: Counterattack (1356039)
Enriching: The Gorge (950396)
Enriching: Demon City (1405338)
Enriching: Holland (257094)
Enriching: Kraven the Hunter (539972)
Enriching: Amaran (927342)
Enriching: Venom: The Last Dance (912649)
Enriching: Gladiator II (558449)
Enriching: Memoir of a Snail (1064486)
Enriching: Companion (1084199)
Enriching: The Monkey (1124620)
Enriching: Flow (823219)
Enriching: Panda Plan (1160956)
Enriching: Despicable Me 4 (519182)
Enriching: Deadpool

In [71]:
df = pd.DataFrame(enriched_movies)
df = df[[
    'id', 'title', 'overview', 'genres', 'vote_average', 'vote_count',
    'release_date', 'original_language', 'popularity', 'adult',
    'runtime', 'budget', 'revenue', 'cast', 'director', 'keywords', 'poster_url'
]]
df

Unnamed: 0,id,title,overview,genres,vote_average,vote_count,release_date,original_language,popularity,adult,runtime,budget,revenue,cast,director,keywords,poster_url
0,950387,A Minecraft Movie,Four misfits find themselves struggling with o...,"[Family, Comedy, Adventure, Fantasy]",6.100,295,2025-03-31,en,1022.7906,False,101,150000000,313453003,"[Jason Momoa, Jack Black, Sebastian Eugene Han...",Jared Hess,"[friendship, surrealism, exploration, portal, ...",https://image.tmdb.org/t/p/w500/yFHHfHcUgGAxzi...
1,1125899,Cleaner,When a group of radical activists take over an...,"[Action, Thriller]",6.517,174,2025-02-19,en,343.3057,False,96,0,0,"[Daisy Ridley, Clive Owen, Taz Skylar, Flavia ...",Martin Campbell,[],https://image.tmdb.org/t/p/w500/mwzDApMZAGeYCE...
2,822119,Captain America: Brave New World,After meeting with newly elected U.S. Presiden...,"[Action, Thriller, Science Fiction]",6.091,1232,2025-02-12,en,339.0809,False,119,180000000,411409721,"[Anthony Mackie, Harrison Ford, Danny Ramirez,...",Julius Onah,"[hero, superhero, revenge, aftercreditsstinger...",https://image.tmdb.org/t/p/w500/pzIddUEMWhWzfv...
3,1197306,A Working Man,Levon Cade left behind a decorated military ca...,"[Action, Crime, Thriller]",6.467,151,2025-03-26,en,293.7838,False,116,40000000,44417000,"[Jason Statham, David Harbour, Michael Peña, J...",David Ayer,"[based on novel or book, kidnapping, vigilante...",https://image.tmdb.org/t/p/w500/6FRFIogh3zFnVW...
4,1252309,Ask Me What You Want,"After his father's death, Eric Zimmerman trave...","[Romance, Drama]",5.681,119,2024-11-29,es,286.5930,False,0,0,0,"[Gabriela Andrada, Mario Ermito, Celia Freijei...",Lucía Alemany,"[spain, based on novel or book, woman director...",https://image.tmdb.org/t/p/w500/76qnVxU2rPdVvi...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,1195506,Novocaine,"When the girl of his dreams is kidnapped, ever...","[Action, Comedy, Thriller]",7.014,176,2025-03-12,en,29.7101,False,109,18000000,32254000,"[Jack Quaid, Amber Midthunder, Ray Nicholson, ...",Dan Berk,"[pain, dark comedy, bank robbery, held hostage...",https://image.tmdb.org/t/p/w500/xmMHGz9dVRaMY6...
96,603692,John Wick: Chapter 4,"With the price on his head ever increasing, Jo...","[Action, Thriller, Crime]",7.721,6956,2023-03-22,en,30.5829,False,170,90000000,440157245,"[Keanu Reeves, Donnie Yen, Bill Skarsgård, Ian...",Chad Stahelski,"[new york city, martial arts, berlin, germany,...",https://image.tmdb.org/t/p/w500/vZloFAK7NmvMGK...
97,872585,Oppenheimer,The story of J. Robert Oppenheimer's role in t...,"[Drama, History]",8.067,10063,2023-07-19,en,27.7046,False,181,100000000,952000000,"[Cillian Murphy, Emily Blunt, Matt Damon, Robe...",Christopher Nolan,"[husband wife relationship, based on novel or ...",https://image.tmdb.org/t/p/w500/8Gxv8gSFCU0XGD...
98,98,Gladiator,"After the death of Emperor Marcus Aurelius, hi...","[Action, Drama, Adventure]",8.218,19545,2000-05-04,en,29.9042,False,155,103000000,465361176,"[Russell Crowe, Joaquin Phoenix, Connie Nielse...",Ridley Scott,"[epic, gladiator, rome, italy, arena, senate, ...",https://image.tmdb.org/t/p/w500/ty8TGRuvJLPUmA...


In [73]:
all_batches = []

for batch_num in range(1, 6):  # 5 batches = 5000 movies
    print(f"\n🚀 Batch {batch_num}: Fetching pages {((batch_num - 1) * 50 + 1)} to {batch_num * 50}")
    
    all_movies_raw = []
    for page in range((batch_num - 1) * 50 + 1, batch_num * 50 + 1):
        print(f"Fetching page {page}...")
        results = discover_movies(page=page)
        all_movies_raw.extend(results)
        time.sleep(0.25)

    enriched_movies = []
    for movie in all_movies_raw:
        enriched = enrich_movie_data(movie)
        enriched_movies.append(enriched)
        time.sleep(0.25)
    
    df_batch = pd.DataFrame(enriched_movies)
    
    df_batch = df_batch[[
        'id', 'title', 'overview', 'genres', 'vote_average', 'vote_count',
        'release_date', 'original_language', 'popularity', 'adult',
        'runtime', 'budget', 'revenue', 'cast', 'director', 'keywords', 'poster_url'
    ]]
    
    # Save this batch to CSV
    filename = f"movies_batch_{batch_num}.csv"
    df_batch.to_csv(filename, index=False)
    print(f"✅ Saved batch {batch_num} to {filename}")
    
    # Save in memory too if you want to combine later
    all_batches.append(df_batch)


🚀 Batch 1: Fetching pages 1 to 50
Fetching page 1...
Fetching page 2...
Fetching page 3...
Fetching page 4...
Fetching page 5...
Fetching page 6...
Fetching page 7...
Fetching page 8...
Fetching page 9...
Fetching page 10...
Fetching page 11...
Fetching page 12...
Fetching page 13...
Fetching page 14...
Fetching page 15...
Fetching page 16...
Fetching page 17...
Fetching page 18...
Fetching page 19...
Fetching page 20...
Fetching page 21...
Fetching page 22...
Fetching page 23...
Fetching page 24...
Fetching page 25...
Fetching page 26...
Fetching page 27...
Fetching page 28...
Fetching page 29...
Fetching page 30...
Fetching page 31...
Fetching page 32...
Fetching page 33...
Fetching page 34...
Fetching page 35...
Fetching page 36...
Fetching page 37...
Fetching page 38...
Fetching page 39...
Fetching page 40...
Fetching page 41...
Fetching page 42...
Fetching page 43...
Fetching page 44...
Fetching page 45...
Fetching page 46...
Fetching page 47...
Fetching page 48...
Fetching page 

In [75]:
df_all = pd.concat(all_batches, ignore_index=True)
df_all.to_csv("all_movies_combined.csv", index=False)
print("🎉 Combined all batches into all_movies_combined.csv")

🎉 Combined all batches into all_movies_combined.csv


In [78]:
df_all

Unnamed: 0,id,title,overview,genres,vote_average,vote_count,release_date,original_language,popularity,adult,runtime,budget,revenue,cast,director,keywords,poster_url
0,950387,A Minecraft Movie,Four misfits find themselves struggling with o...,"[Family, Comedy, Adventure, Fantasy]",6.100,295,2025-03-31,en,1022.7906,False,101,150000000,313453003,"[Jason Momoa, Jack Black, Sebastian Eugene Han...",Jared Hess,"[friendship, surrealism, exploration, portal, ...",https://image.tmdb.org/t/p/w500/yFHHfHcUgGAxzi...
1,1125899,Cleaner,When a group of radical activists take over an...,"[Action, Thriller]",6.517,174,2025-02-19,en,343.3057,False,96,0,0,"[Daisy Ridley, Clive Owen, Taz Skylar, Flavia ...",Martin Campbell,[],https://image.tmdb.org/t/p/w500/mwzDApMZAGeYCE...
2,822119,Captain America: Brave New World,After meeting with newly elected U.S. Presiden...,"[Action, Thriller, Science Fiction]",6.091,1232,2025-02-12,en,339.0809,False,119,180000000,411409721,"[Anthony Mackie, Harrison Ford, Danny Ramirez,...",Julius Onah,"[hero, superhero, revenge, aftercreditsstinger...",https://image.tmdb.org/t/p/w500/pzIddUEMWhWzfv...
3,1197306,A Working Man,Levon Cade left behind a decorated military ca...,"[Action, Crime, Thriller]",6.467,151,2025-03-26,en,293.7838,False,116,40000000,44417000,"[Jason Statham, David Harbour, Michael Peña, J...",David Ayer,"[based on novel or book, kidnapping, vigilante...",https://image.tmdb.org/t/p/w500/6FRFIogh3zFnVW...
4,1252309,Ask Me What You Want,"After his father's death, Eric Zimmerman trave...","[Romance, Drama]",5.681,119,2024-11-29,es,286.5930,False,0,0,0,"[Gabriela Andrada, Mario Ermito, Celia Freijei...",Lucía Alemany,"[spain, based on novel or book, woman director...",https://image.tmdb.org/t/p/w500/76qnVxU2rPdVvi...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,49948,Fantasia 2000,"Blending lively music and brilliant animation,...","[Animation, Family, Music]",7.000,1289,2000-01-01,en,2.5349,False,74,80000000,60655420,"[Steve Martin, Itzhak Perlman, Quincy Jones, B...",James Algar,"[magic, classical music, orchestra, musical, l...",https://image.tmdb.org/t/p/w500/5rwAtUaKEK48CP...
4996,38358,The Last Exorcism,After years of performing “exorcisms” and taki...,"[Horror, Thriller, Mystery]",5.700,1029,2010-08-27,en,2.5348,False,87,1800000,69400000,"[Patrick Fabian, Ashley Bell, Iris Bahr, Louis...",Daniel Stamm,"[farm, exorcism, louisiana, fraud, mockumentar...",https://image.tmdb.org/t/p/w500/eOo9tgGjxdsQEg...
4997,11860,Sabrina,"Sabrina Fairchild, a chauffeur's daughter, gre...","[Romance, Drama]",6.205,642,1995-12-15,en,2.5346,False,127,58000000,53672080,"[Harrison Ford, Julia Ormond, Greg Kinnear, Na...",Sydney Pollack,"[chauffeur, sibling relationship, paris, franc...",https://image.tmdb.org/t/p/w500/i8PbLJDPU7vCww...
4998,10617,Once Upon a Time in China,"Set in late 19th century Canton, this martial ...","[Action, Drama]",7.158,427,1991-08-15,cn,2.5342,False,133,0,3826459,"[Jet Li, Yuen Biao, Jacky Cheung, Rosamund Kwa...",Tsui Hark,"[martial arts, kung fu, hero, china, colonisat...",https://image.tmdb.org/t/p/w500/dkBQC0jmkmTOJJ...
