In [4]:
import requests
import pandas as pd
import time
import os

In [None]:
API_KEY = 'eecbe712cda930d6aa221ec491896b9d'
BASE_URL = 'https://api.themoviedb.org/3'

# Function to get movies from a specific page
def get_movies(api_key, page):
    url = f'{BASE_URL}/movie/popular'
    params = {'api_key': api_key, 'page': page}
    response = requests.get(url, params=params)
    if response.status_code == 200:
        return response.json()['results']
    else:
        print(f'Error on page {page}: {response.status_code}')
        return None

# Function to get movie details (budget, actors, etc.)
def get_movie_details(api_key, movie_id):
    # Get basic movie details
    url = f'{BASE_URL}/movie/{movie_id}'
    params = {'api_key': api_key}
    response = requests.get(url, params=params)
    if response.status_code != 200:
        print(f'Error fetching details for movie {movie_id}: {response.status_code}')
        return None

    movie_details = response.json()

    # Get credits (actors and crew)
    credits_url = f'{BASE_URL}/movie/{movie_id}/credits'
    credits_response = requests.get(credits_url, params=params)
    if credits_response.status_code == 200:
        credits = credits_response.json()
        movie_details['actors'] = [actor['name'] for actor in credits['cast'][:5]]  # Top 5 actors
        movie_details['director'] = next((member['name'] for member in credits['crew'] if member['job'] == 'Director'), None)
    else:
        print(f'Error fetching credits for movie {movie_id}: {credits_response.status_code}')
        movie_details['actors'] = []
        movie_details['director'] = None

    return movie_details

# Function to save data to CSV
def save_to_csv(data, filename):
    df = pd.DataFrame(data)
    # Append data to the file if it exists
    try:
        existing_df = pd.read_csv(filename)
        df = pd.concat([existing_df, df], ignore_index=True)
    except FileNotFoundError:
        pass
    df.to_csv(filename, index=False)

# Function to collect data from multiple pages
def collect_movies(api_key, total_pages=1000, delay=0.25, output_file='movies.csv'):
    all_movies = []
    for page in range(1, total_pages + 1):
        print(f'Collecting page {page}...')
        movies = get_movies(api_key, page)
        if movies:
            for movie in movies:
                movie_id = movie['id']
                print(f'Fetching details for movie {movie_id}...')
                details = get_movie_details(api_key, movie_id)
                if details:
                    # Combine basic info and details
                    full_data = {**movie, **details}
                    all_movies.append(full_data)
                    save_to_csv([full_data], output_file)  # Save each movie
                time.sleep(delay)  # Delay to avoid API rate limits
    return all_movies

# Main function
if __name__ == '__main__':
    # Create a 'data' folder if it doesn't exist
    if not os.path.exists('data'):
        os.makedirs('data')

    # Path to the output file inside the 'data' folder
    output_file_path = os.path.join('data', 'movies.csv')

    # Collect data from 500 pages (10,000 movies if 20 per page)
    collect_movies(API_KEY, total_pages=500, delay=0.25, output_file=output_file_path)
    print('Data collection complete!')

Collecting page 1...
Fetching details for movie 950396...
Fetching details for movie 1126166...
Fetching details for movie 1064213...
Fetching details for movie 1241982...
Fetching details for movie 762509...
Fetching details for movie 939243...
Fetching details for movie 822119...
Fetching details for movie 927342...
Fetching details for movie 1084199...
Fetching details for movie 823219...
Fetching details for movie 426889...
Fetching details for movie 1160956...
Fetching details for movie 539972...
Fetching details for movie 912649...
Fetching details for movie 549509...
Fetching details for movie 933260...
Fetching details for movie 696506...
Fetching details for movie 558449...
Fetching details for movie 1064486...
Fetching details for movie 1188104...
Collecting page 2...
Fetching details for movie 1188104...
Fetching details for movie 926670...
Fetching details for movie 1249289...
Fetching details for movie 774370...
Fetching details for movie 402431...
Fetching details for mov

KeyboardInterrupt: 