In [1]:
import concurrent.futures
from datetime import datetime, timedelta
import requests
import csv
#Fetches important details like budget and revenue
def get_movie_details(movie_id, api_key):
    url = f"https://api.themoviedb.org/3/movie/{movie_id}"
    params = {"api_key": api_key}
    response = requests.get(url, params=params)
    if response.status_code == 200:
        return response.json()
    else:
        return None
#Fetch keywords for the movie
def get_movie_keywords(movie_id, api_key):
    url = f"https://api.themoviedb.org/3/movie/{movie_id}/keywords"
    params = {"api_key": api_key}
    response = requests.get(url, params=params)
    if response.status_code == 200:
        return [keyword['name'] for keyword in response.json().get('keywords', [])]
    else:
        return []
#Fetch the casts for the movie
def get_movie_cast(movie_id, api_key):
    url = f"https://api.themoviedb.org/3/movie/{movie_id}/credits"
    params = {"api_key": api_key}
    response = requests.get(url, params=params)
    if response.status_code == 200:
        return [cast['name'] for cast in response.json().get('cast', [])]
    else:
        return []
#Function allows to fetch movie details,keywords and cast concurrently to allow more efficient fetching
def get_movie_data(movie_id, api_key):
    """Fetch movie details, keywords, and cast concurrently."""
    with concurrent.futures.ThreadPoolExecutor() as executor:
        detail_future = executor.submit(get_movie_details, movie_id, api_key)
        keywords_future = executor.submit(get_movie_keywords, movie_id, api_key)
        cast_future = executor.submit(get_movie_cast, movie_id, api_key)

    detail_data = detail_future.result()
    keywords_data = keywords_future.result()
    cast_data = cast_future.result()

    if detail_data:
        detail_data['keywords'] = keywords_data
        detail_data['cast'] = cast_data

    return detail_data
#Fetch movies that are limited for the date range from TMDB 
def get_movies(api_key, start_date, end_date):
    """Fetch movies released within a specified date range from TMDB."""
    movies = []
    url = "https://api.themoviedb.org/3/discover/movie"
    page = 1
    total_pages = 1

    while page <= total_pages:
        params = {
            "api_key": api_key,
            "primary_release_date.gte": start_date,
            "primary_release_date.lte": end_date,
            "page": page
        }
        response = requests.get(url, params=params)
        if response.status_code == 200:
            data = response.json()
            with concurrent.futures.ThreadPoolExecutor() as executor:
                future_to_movie_id = {executor.submit(get_movie_data, movie['id'], api_key): movie for movie in data['results']}
                for future in concurrent.futures.as_completed(future_to_movie_id):
                    movie_id = future_to_movie_id[future]['id']
                    movie_data = future.result()
                    if movie_data:
                        movies.append(movie_data)
            total_pages = data['total_pages']
            page += 1
        else:
            print(f"Error: Unable to fetch data for page {page}")
            break

    return movies
#Save the list of movies into a csv file
def save_movies_to_csv(movies, filename):
    """Save a list of movies to a CSV file."""
    if not movies:
        return

    keys = movies[0].keys()
    with open(filename, 'w', newline='', encoding='utf-8-sig') as file:
        writer = csv.DictWriter(file, fieldnames=keys)
        writer.writeheader()
        for movie in movies:
            writer.writerow(movie)

# TMDB API key
api_key = ""

# Define date ranges and data update intervals
year = 2023
initial_start_date = datetime(year, 1, 1)
initial_end_date = initial_start_date + timedelta(days=59)  # Initial end date is 59 days later
date_update_interval = timedelta(days=60)

# Create an empty list to store all movies
all_movies = []

while initial_start_date < datetime(year, 12, 31):  # Adjust the end date as needed
    end_date = (initial_start_date + timedelta(days=59)).strftime("%Y-%m-%d")
    start_date = initial_start_date.strftime("%Y-%m-%d")

    movies = get_movies(api_key, start_date, end_date)
    print(f"Movies found from {start_date} to {end_date}: {len(movies)}")

    all_movies.extend(movies)

    # Update start date for the next interval
    initial_start_date += date_update_interval

# Save all movies to CSV
csv_filename = 'movies2023.csv'
save_movies_to_csv(all_movies, csv_filename)
print(f"Total movies saved to {csv_filename}: {len(all_movies)}")


KeyboardInterrupt: 