In [6]:
# 🎬 Flop Score Analysis: Data Gathering by Title + Year

# Imports
import pandas as pd
import requests
import getpass
import time
from google.colab import files

# Upload your worst_200_movies.csv file
print("⬆️ Please upload your worst_200_movies.csv file")
uploaded = files.upload()

# Load CSV (adjust filename if different)
worst_movies_df = pd.read_csv('worst_200_movies.csv')

# Show columns & sample rows to verify
print("Columns in CSV:", worst_movies_df.columns.tolist())
print(worst_movies_df.head())

# Input your TMDb API key securely
API_KEY = getpass.getpass("Enter your TMDb API key:")

# Function to search TMDb by title and year, then fetch movie details
def get_tmdb_data_by_title_year(title, year):
    try:
        search_url = 'https://api.themoviedb.org/3/search/movie'
        params = {
            'api_key': API_KEY,
            'query': title,
            'year': int(year) if pd.notnull(year) and str(year).isdigit() else None
        }
        resp = requests.get(search_url, params=params).json()
        results = resp.get('results', [])
        if not results:
            print(f"❌ No TMDb match for: {title} ({year})")
            return None

        tmdb_id = results[0]['id']
        movie_url = f'https://api.themoviedb.org/3/movie/{tmdb_id}'
        movie_resp = requests.get(movie_url, params={'api_key': API_KEY}).json()

        return {
            'title': title,
            'year': year,
            'tmdb_id': tmdb_id,
            'budget': movie_resp.get('budget'),
            'revenue': movie_resp.get('revenue'),
            'release_date': movie_resp.get('release_date'),
            'popularity': movie_resp.get('popularity'),
            'vote_average': movie_resp.get('vote_average'),
            'vote_count': movie_resp.get('vote_count')
        }

    except Exception as e:
        print(f"⚠️ Error fetching data for {title} ({year}): {e}")
        return None

# Loop through movies and gather TMDb data
results = []
for idx, row in worst_movies_df.iterrows():
    title = row['primaryTitle']
    year = row['startYear']
    result = get_tmdb_data_by_title_year(title, year)
    if result:
        results.append(result)
    time.sleep(0.25)  # Rate limiting

    if idx % 20 == 0 and idx > 0:
        print(f"Processed {idx}/{len(worst_movies_df)} movies")

# Save results as CSV
tmdb_df = pd.DataFrame(results)
tmdb_df.to_csv('tmdb_movie_data_by_title.csv', index=False)
print("✅ Saved TMDb data to 'tmdb_movie_data_by_title.csv'")


⬆️ Please upload your worst_200_movies.csv file


Saving worst_200_movies.csv to worst_200_movies (2).csv
Columns in CSV: ['primaryTitle', 'startYear', 'averageRating', 'numVotes', 'genres']
                           primaryTitle  startYear  averageRating  numVotes  \
0                                  Reis       2017            1.0     74331   
1                          Cumali Ceber       2017            1.0     39587   
2                   A Business Proposal       2025            1.0     22512   
3                            321 Action       2020            1.0     10246   
4  2025 - The World enslaved by a Virus       2021            1.0      2773   

                 genres  
0       Biography,Drama  
1                Comedy  
2  Comedy,Drama,Romance  
3                 Drama  
4      Adventure,Sci-Fi  
Enter your TMDb API key:··········
❌ No TMDb match for: 321 Action (2020)
❌ No TMDb match for: Nyay: The Justice (2021)
❌ No TMDb match for: A Cosmic Adventure on Earth (2002)
❌ No TMDb match for: 15/07: Break of Dawn (2021)
Pro

# 🎬 Flop Score Analysis: Data Gathering

This notebook fetches metadata (e.g., budget, revenue) from TMDb for IMDb's 200 lowest-rated movies.
