In [3]:
# 📌 Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# 📌 Import libraries
import sqlite3
import pandas as pd

# 📌 Connect to filtered IMDb SQLite database
db_path = '/content/drive/MyDrive/IMDB_Data/imdb_filtered.db'
conn = sqlite3.connect(db_path)

# 📌 Helper function to run SQL
def run_query(query):
    return pd.read_sql(query, conn)

# 📌 Step 1: Get the 200 worst-rated movies (post-1950, 1000+ votes)
query_worst_movies = """
SELECT tconst, primaryTitle, startYear, averageRating, numVotes, genres
FROM movies
WHERE startYear >= 1950 AND numVotes >= 1000
ORDER BY averageRating ASC, numVotes DESC
LIMIT 200;
"""

df_worst_movies = run_query(query_worst_movies)

# Save results
df_worst_movies.to_csv('/content/worst_200_movies.csv', index=False)
with open('/content/worst_200_movies.sql', 'w') as f:
    f.write(query_worst_movies)

# 📌 Step 2: Find frequent people involved in those worst movies
query_frequent_people = """
SELECT
    n.primaryName,
    p.category,
    COUNT(*) AS appearances
FROM (SELECT tconst FROM movies
      WHERE startYear >= 1950 AND numVotes >= 1000
      ORDER BY averageRating ASC, numVotes DESC
      LIMIT 200) w
JOIN principals p ON w.tconst = p.tconst
JOIN names n ON p.nconst = n.nconst
GROUP BY n.primaryName, p.category
HAVING appearances >= 2
ORDER BY appearances DESC
LIMIT 50;
"""

df_frequent_people = run_query(query_frequent_people)

# Save results
df_frequent_people.to_csv('/content/top_50_frequent_people.csv', index=False)
with open('/content/top_50_frequent_people.sql', 'w') as f:
    f.write(query_frequent_people)

# 📌 Preview output
df_frequent_people.head()


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Unnamed: 0,primaryName,category,appearances
0,David Michael Latt,producer,12
1,Jane Alexander,actress,10
2,Anna Mazzotti,actress,6
3,Gregory Snegoff,actor,6
4,Anthony Cardoza,actor,5
