In [5]:
# 📦 Install and import libraries
!pip install ipywidgets

from google.colab import drive
drive.mount('/content/drive')

import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display
from IPython.display import clear_output

# 🎯 Connect to DB and load CSVs
conn = sqlite3.connect('imdb_filtered.db')

df_movies = pd.read_csv('/content/drive/MyDrive/IMDB_Data/filtered/merged_movies.csv')
df_movies.to_sql('movies', conn, if_exists='replace', index=False)

df_principals = pd.read_csv('/content/drive/MyDrive/IMDB_Data/filtered/principals_filtered.csv')
df_principals.to_sql('filtered_principals', conn, if_exists='replace', index=False)

df_names = pd.read_csv('/content/drive/MyDrive/IMDB_Data/filtered/names_filtered.csv')
df_names.to_sql('filtered_names', conn, if_exists='replace', index=False)

# 🧱 Create temporary table for bottom 200 movies
conn.execute("""
CREATE TEMP TABLE worst_200_movies AS
SELECT * FROM movies
ORDER BY averageRating ASC, numVotes DESC
LIMIT 200
""")

# 👥 Frequent contributors
query_frequent = """
SELECT
    n.primaryName,
    COUNT(*) AS appearances
FROM worst_200_movies w
JOIN filtered_principals p ON w.tconst = p.tconst
JOIN filtered_names n ON p.nconst = n.nconst
GROUP BY n.primaryName
ORDER BY appearances DESC
LIMIT 50;
"""
df_frequent = pd.read_sql(query_frequent, conn)

# ⚖️ Weighted contributors by role impact
query_weighted = """
WITH role_weights AS (
  SELECT 'producer' AS role, 4 AS weight UNION ALL
  SELECT 'director', 3 UNION ALL
  SELECT 'writer', 2 UNION ALL
  SELECT 'actor', 1 UNION ALL
  SELECT 'actress', 1
),
merged AS (
  SELECT
    n.primaryName,
    LOWER(p.category) AS role,
    w.numVotes,
    COUNT(*) as appearances,
    rw.weight
  FROM worst_200_movies w
  JOIN filtered_principals p ON w.tconst = p.tconst
  JOIN filtered_names n ON p.nconst = n.nconst
  JOIN role_weights rw ON LOWER(p.category) = rw.role
  GROUP BY n.primaryName, p.category
),
scored AS (
  SELECT
    primaryName,
    SUM(appearances * weight * numVotes) AS weighted_score
  FROM merged
  GROUP BY primaryName
)
SELECT primaryName, weighted_score
FROM scored
ORDER BY weighted_score DESC
LIMIT 50;
"""
df_weighted = pd.read_sql(query_weighted, conn)

# ➖ People with high weight but low visibility
diff_df = df_weighted[~df_weighted.primaryName.isin(df_frequent.primaryName)]
appearances_map = df_frequent.set_index('primaryName')['appearances'].to_dict()
diff_df['appearances'] = diff_df['primaryName'].map(appearances_map).fillna(0).astype(int)

# 📊 Plot weighted vs appearances
diff_df_sorted = diff_df.sort_values('weighted_score')
chart_output = widgets.Output()
with chart_output:
    plt.figure(figsize=(12,8))
    plt.hlines(y=diff_df_sorted['primaryName'], xmin=0, xmax=diff_df_sorted['weighted_score'], color='red', alpha=0.6, linewidth=4)
    plt.plot(diff_df_sorted['weighted_score'], diff_df_sorted['primaryName'], "o", color='crimson', label='Weighted Score', markersize=8)
    plt.plot(diff_df_sorted['appearances'], diff_df_sorted['primaryName'], "o", color='blue', label='Appearances', markersize=5)

    plt.xlabel('Score / Appearances')
    plt.title('Weighted Culprits NOT in Top 50 Frequent People\nRed=Weighted Score, Blue=Appearances')
    plt.legend()
    plt.grid(axis='x', linestyle='--', alpha=0.5)
    plt.tight_layout()
    plt.show()

# 📋 Output container for project results
table_output = widgets.Output()

# 🔍 Define dropdown callback
def show_culprit_projects(name):
    table_output.clear_output(wait=True)
    with table_output:
        sql = f"""
        WITH role_weights AS (
          SELECT 'producer' AS role, 4 AS weight UNION ALL
          SELECT 'director', 3 UNION ALL
          SELECT 'writer', 2 UNION ALL
          SELECT 'actor', 1 UNION ALL
          SELECT 'actress', 1
        ),
        culprit_movies AS (
          SELECT
            n.primaryName,
            w.tconst,
            t.primaryTitle,
            t.startYear,
            w.averageRating,
            w.numVotes,
            p.category,
            rw.weight,
            rw.weight * w.numVotes AS impact_score
          FROM worst_200_movies w
          JOIN filtered_principals p ON w.tconst = p.tconst
          JOIN filtered_names n ON p.nconst = n.nconst
          JOIN role_weights rw ON LOWER(p.category) = rw.role
          JOIN movies t ON w.tconst = t.tconst
          WHERE n.primaryName = ?
        )
        SELECT
          primaryTitle,
          startYear,
          averageRating,
          numVotes,
          category AS role,
          impact_score
        FROM culprit_movies
        ORDER BY impact_score DESC
        LIMIT 5;
        """
        df_projects = pd.read_sql(sql, conn, params=(name,))
        if df_projects.empty:
            print(f"No data found for {name}.")
        else:
            print(f"Top bad projects for {name}:")
            display(df_projects)

# 🧠 Dropdown setup
selector = widgets.Dropdown(
    options=diff_df['primaryName'].tolist(),
    description='Select Culprit:',
    layout=widgets.Layout(width='70%')
)
selector.observe(lambda change: show_culprit_projects(change['new']), names='value')

# 📌 Display all components
display(chart_output)
display(selector)
display(table_output)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  diff_df['appearances'] = diff_df['primaryName'].map(appearances_map).fillna(0).astype(int)


Output()

Dropdown(description='Select Culprit:', layout=Layout(width='70%'), options=('Jared LeBoff', 'Marc Platt', 'Ma…

Output()

# ⚠️ Interactive Notebook Notice

This notebook contains **interactive widgets** that unfortunately **do not render properly on GitHub** due to notebook widget metadata limitations.

To fully experience the interactive features, please open this notebook directly in Google Colab:

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Data-Matt0/imdb-lowest-rated-analysis/blob/main/notebooks/top_culprits/top_culprits_interactive.ipynb)

---

**How to use:**

- Click the "Open In Colab" badge above.
- Interact with the dropdowns and visuals in Colab.
- Enjoy the full interactive experience!

---

*This message is added here to help users understand why GitHub shows an error and how to proceed.*
