Lab 1 Project

---

TMDB Movie Data Analysis Project - Phase 1: Data Collection

This script fetches movie data from TMDB API for analysis.


In [1]:

# ============================================================================
# All the libraries needed
# ============================================================================
import requests  # For making API calls
import pandas as pd  # For data manipulation
import json  # For handling JSON data
import time  # For adding delays between API calls


In [None]:

# ============================================================================
# CONFIGURATION WITH THE API KEY
# ============================================================================

API_KEY = 'eb77b42bff2056cee45702849bbf307f'  

# TMDB API base URL
BASE_URL = 'https://api.themoviedb.org/3/'

# List of movie IDs we need to fetch 
MOVIE_IDS = [
    0, 299534, 19995, 140607, 299536, 597, 135397, 420818, 
    24428, 168259, 99861, 284054, 12445, 181808, 330457, 
    351286, 109445, 321612, 260513
]


In [3]:

# ============================================================================
# FUNCTION 1: Fetch Basic Movie Data
# ============================================================================

def fetch_movie_details(movie_id, api_key):
    """
    Fetches detailed information about a single movie from TMDB.
    
    Parameters:
    -----------
    movie_id : int
        The TMDB movie ID
    api_key : str
        Your TMDB API key
    
    Returns:
    --------
    dict or None
        Movie data as a dictionary, or None if request fails
    """
    
    # Build the endpoint URL for movie details
    endpoint = f'movie/{movie_id}'
    
    # Add API key as parameter
    params = {'api_key': api_key}
    
    # Create full URL
    full_url = BASE_URL + endpoint
    
    # Make the API request
    try:
        response = requests.get(full_url, params=params)
        
        # Check if request was successful (status code 200)
        if response.status_code == 200:
            print(f" Successfully fetched movie ID: {movie_id}")
            return response.json()  # Return the JSON data as dictionary
        
        elif response.status_code == 404:
            print(f" Movie ID {movie_id} not found (404)")
            return None
        
        else:
            print(f" Error fetching movie ID {movie_id}: Status {response.status_code}")
            return None
    
    except Exception as e:
        print(f" Exception occurred for movie ID {movie_id}: {str(e)}")
        return None


In [4]:
# ============================================================================
# FUNCTION 2: Fetch Movie Credits (Cast & Crew)
# ============================================================================

def fetch_movie_credits(movie_id, api_key):
    """
    Fetches cast and crew information for a movie.
    
    Parameters:
    -----------
    movie_id : int
        The TMDB movie ID
    api_key : str
        Your TMDB API key
    
    Returns:
    --------
    dict or None
        Credits data as a dictionary, or None if request fails
    """
    
    # Build the endpoint URL for credits
    endpoint = f'movie/{movie_id}/credits'
    
    # Add API key as parameter
    params = {'api_key': api_key}
    
    # Create full URL
    full_url = BASE_URL + endpoint
    
    # Make the API request
    try:
        response = requests.get(full_url, params=params)
        
        if response.status_code == 200:
            return response.json()
        else:
            return None
    
    except Exception as e:
        print(f"âœ— Error fetching credits for movie ID {movie_id}: {str(e)}")
        return None


In [5]:
# ============================================================================
# FUNCTION 3: Combine Movie Details + Credits
# ============================================================================

def fetch_complete_movie_data(movie_id, api_key):
    """
    Fetches both movie details and credits, combines them into one dictionary.
    
    Parameters:
    -----------
    movie_id : int
        The TMDB movie ID
    api_key : str
        Your TMDB API key
    
    Returns:
    --------
    dict or None
        Complete movie data, or None if request fails
    """
    
    # Get basic movie details
    movie_data = fetch_movie_details(movie_id, api_key)
    
    if movie_data is None:
        return None
    
    # Get credits (cast and crew)
    credits_data = fetch_movie_credits(movie_id, api_key)
    
    if credits_data:
        # Add cast and crew to movie data
        movie_data['cast'] = credits_data.get('cast', [])
        movie_data['crew'] = credits_data.get('crew', [])
    else:
        # If credits fail, add empty lists
        movie_data['cast'] = []
        movie_data['crew'] = []
    
    return movie_data



In [6]:
# ============================================================================
# FUNCTION 4: Fetch All Movies
# ============================================================================

def fetch_all_movies(movie_ids, api_key):
    """
    Fetches data for all movies in the list.
    
    Parameters:
    -----------
    movie_ids : list
        List of TMDB movie IDs
    api_key : str
        Your TMDB API key
    
    Returns:
    --------
    list
        List of dictionaries containing movie data
    """
    
    all_movies = []
    
    print(f"\n{'='*60}")
    print(f"Starting to fetch {len(movie_ids)} movies...")
    print(f"{'='*60}\n")
    
    for i, movie_id in enumerate(movie_ids, 1):
        print(f"[{i}/{len(movie_ids)}] Fetching movie ID: {movie_id}...")
        
        # Fetch complete data for this movie
        movie_data = fetch_complete_movie_data(movie_id, api_key)
        
        if movie_data:
            all_movies.append(movie_data)
        
        # Be polite to the API: wait 0.25 seconds between requests
        # (TMDB allows 40 requests per 10 seconds)
        time.sleep(0.25)
    
    print(f"\n{'='*60}")
    print(f" Successfully fetched {len(all_movies)} out of {len(movie_ids)} movies")
    print(f"{'='*60}\n")
    
    return all_movies


In [None]:
# ============================================================================
# TEST for fetching ONE movie first
# ============================================================================

if __name__ == "__main__":
    
    # Check if API key is set
    if API_KEY == 'YOUR_API_KEY_HERE':
        print("\n  WARNING: Please add your API key at the top of the file!")
        print("Find this line: API_KEY = 'YOUR_API_KEY_HERE'")
        print("Replace YOUR_API_KEY_HERE with your actual TMDB API key\n")
    
    else:
        # Test with ONE movie first: Avengers Endgame (ID: 299534)
        print("\n" + "="*60)
        print("TEST: Fetching Avengers: Endgame (ID: 299534)")
        print("="*60 + "\n")
        
        test_movie = fetch_complete_movie_data(299534, API_KEY)
        
        if test_movie:
            print("\n SUCCESS! Here's what we got:\n")
            print(f"Title: {test_movie.get('title')}")
            print(f"Release Date: {test_movie.get('release_date')}")
            print(f"Budget: ${test_movie.get('budget'):,}")
            print(f"Revenue: ${test_movie.get('revenue'):,}")
            print(f"Rating: {test_movie.get('vote_average')}/10")
            print(f"Number of cast members: {len(test_movie.get('cast', []))}")
            print(f"Number of crew members: {len(test_movie.get('crew', []))}")
            
            print("\n" + "="*60)
            print(" API is working! Ready to fetch all movies!")
            print("="*60)
            
            # Ask user if they want to fetch all movies
            print("\nNext step: Fetch all 19 movies")
            user_input = input("Do you want to fetch all movies now? (yes/no): ")
            
            if user_input.lower() in ['yes', 'y']:
                # Fetch all movies
                all_movies_data = fetch_all_movies(MOVIE_IDS, API_KEY)
                
                # Convert to DataFrame
                df = pd.DataFrame(all_movies_data)
                
                # Save raw data to CSV
                df.to_csv('movies_raw_data.csv', index=False)
                print("\n All movies saved to 'movies_raw_data.csv'")
                print(f" DataFrame shape: {df.shape}")
                print("\nFirst few columns:")
                print(df[['id', 'title', 'release_date', 'budget', 'revenue']].head())
        
        else:
            print("\n Failed to fetch test movie. Please check:")
            print("1. Your API key is correct")
            print("2. You have internet connection")
            print("3. TMDB API is accessible")


TEST: Fetching Avengers: Endgame (ID: 299534)

 Successfully fetched movie ID: 299534

 SUCCESS! Here's what we got:

Title: Avengers: Endgame
Release Date: 2019-04-24
Budget: $356,000,000
Revenue: $2,799,439,100
Rating: 8.237/10
Number of cast members: 106
Number of crew members: 607

 API is working! Ready to fetch all movies!

Next step: Fetch all 19 movies

Starting to fetch 19 movies...

[1/19] Fetching movie ID: 0...
 Movie ID 0 not found (404)
[2/19] Fetching movie ID: 299534...
 Successfully fetched movie ID: 299534
[3/19] Fetching movie ID: 19995...
 Successfully fetched movie ID: 19995
[4/19] Fetching movie ID: 140607...
 Successfully fetched movie ID: 140607
[5/19] Fetching movie ID: 299536...
 Successfully fetched movie ID: 299536
[6/19] Fetching movie ID: 597...
 Successfully fetched movie ID: 597
[7/19] Fetching movie ID: 135397...
 Successfully fetched movie ID: 135397
[8/19] Fetching movie ID: 420818...
 Successfully fetched movie ID: 420818
[9/19] Fetching movie ID: 2