In [1]:
import requests
import time
import csv

API_KEY = 'your_tmdb_api_key'
BASE_URL = 'https://api.themoviedb.org/3'
MOVIE_COUNT = 10000
REQUESTS_PER_SECOND = 40
SLEEP_TIME = 1.0 / REQUESTS_PER_SECOND

In [8]:

def fetch_movie_details(movie_id):
    time.sleep(SLEEP_TIME)
    movie_url = f"{BASE_URL}/movie/{movie_id}?api_key={API_KEY}"
    credits_url = f"{BASE_URL}/movie/{movie_id}/credits?api_key={API_KEY}"

    movie_response = requests.get(movie_url)
    credits_response = requests.get(credits_url)

    if movie_response.status_code == 200 and credits_response.status_code == 200:
        movie_data = movie_response.json()
        credits_data = credits_response.json()

        title = movie_data.get('title', 'N/A')
        description = movie_data.get('overview', 'N/A')
        genres = ', '.join([genre['name'] for genre in movie_data.get('genres', [])])  # Get genre names

        actors = ', '.join([actor['name'] for actor in credits_data.get('cast', [])[:5]])  # Get top 5 actors
        director = next((member['name'] for member in credits_data.get('crew', []) if member['job'] == 'Director'), 'N/A')

        return {
            'title': title,
            'description': description,
            'genres': genres,
            'actors': actors,
            'director': director
        }
    else:
        return None

In [10]:
def fetch_top_movies():
    time.sleep(SLEEP_TIME)
    movies = []
    page = 1
    while len(movies) < MOVIE_COUNT:
        discover_url = f"{BASE_URL}/discover/movie?api_key={API_KEY}&sort_by=popularity.desc&page={page}"
        response = requests.get(discover_url)
        if response.status_code == 200:
            data = response.json()
            for movie in data.get('results', []):
                if len(movies) >= MOVIE_COUNT:
                    break
                movie_id = movie['id']
                movie_details = fetch_movie_details(movie_id)
                if movie_details:
                    movies.append(movie_details)
            page += 1
        else:
            print(f"Failed to fetch page {page}")
            break
        time.sleep(0.5)  # To avoid hitting rate limits

    return movies

top_movies = fetch_top_movies()

Failed to fetch page 1


In [21]:
import csv

# Assume 'movies' is the list of dictionaries holding your movie data
with open('movies_so_far.csv', 'w', newline='', encoding='utf-8') as file:
    fieldnames = ['title', 'description', 'genres', 'actors', 'director']  # adjust according to your data structure
    writer = csv.DictWriter(file, fieldnames=fieldnames)
    writer.writeheader()
    for movie in top_movies:  # 'movies' should be the variable holding your data
        writer.writerow(movie)

In [None]:
top_movies[0]

{'title': 'Kingdom of the Planet of the Apes',
 'description': "Several generations in the future following Caesar's reign, apes are now the dominant species and live harmoniously while humans have been reduced to living in the shadows. As a new tyrannical ape leader builds his empire, one young ape undertakes a harrowing journey that will cause him to question all that he has known about the past and to make choices that will define a future for apes and humans alike.",
 'genres': 'Science Fiction, Adventure, Action',
 'actors': 'Owen Teague, Freya Allan, Kevin Durand, Peter Macon, William H. Macy',
 'director': 'Wes Ball'}

In [None]:
# Save the results to a CSV file
with open('top_10000_movies.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=['title', 'description', 'actors', 'director'])
    writer.writeheader()
    for movie in top_movies:
        writer.writerow(movie)

print(f"Fetched details of {len(top_movies)} movies and saved to CSV.")


In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('movies.csv')

In [3]:
df.head()

Unnamed: 0,id,title,description,release_date,rating,vote_count,genres,actors,director
0,27205,Inception,"Cobb, a skilled thief who commits corporate es...",2010-07-15,8.368,35811,"Action, Science Fiction, Adventure","Leonardo DiCaprio, Joseph Gordon-Levitt, Ken W...",Christopher Nolan
1,157336,Interstellar,The adventures of a group of explorers who mak...,2014-11-05,8.434,34465,"Adventure, Drama, Science Fiction","Matthew McConaughey, Anne Hathaway, Michael Ca...",Christopher Nolan
2,155,The Dark Knight,Batman raises the stakes in his war on crime. ...,2008-07-16,8.515,32012,"Drama, Action, Crime, Thriller","Christian Bale, Heath Ledger, Michael Caine, G...",Christopher Nolan
3,19995,Avatar,"In the 22nd century, a paraplegic Marine is di...",2009-12-15,7.581,30907,"Action, Adventure, Fantasy, Science Fiction","Sam Worthington, Zoe Saldaña, Sigourney Weaver...",James Cameron
4,24428,The Avengers,When an unexpected enemy emerges and threatens...,2012-04-25,7.714,30090,"Science Fiction, Action, Adventure","Robert Downey Jr., Chris Evans, Mark Ruffalo, ...",Joss Whedon


In [5]:
df['id'].nunique()

5315

In [6]:
df.shape

(5315, 9)