In [5]:
import pandas as pd
import sqlite3

# Connect (or create) your SQLite DB
conn = sqlite3.connect('imdb_filtered.db')

# Define your file paths (update these if needed)
file_paths = {
    'movies': '/content/drive/MyDrive/IMDB_Data/filtered/merged_movies.csv',
    'principals': '/content/drive/MyDrive/IMDB_Data/filtered/principals_filtered.csv',
    'names': '/content/drive/MyDrive/IMDB_Data/filtered/names_filtered.csv'
}

for table_name, path in file_paths.items():
    print(f"Loading {table_name} from {path}")
    df = pd.read_csv(path)
    df.to_sql(table_name, conn, if_exists='replace', index=False)

print("✅ All filtered CSVs loaded into the SQLite database.")



def run_query(query):
    return pd.read_sql(query, conn)


query = """
WITH worst_200_movies AS (
    SELECT primaryTitle, tconst, startYear, averageRating, numVotes, genres
    FROM movies
    ORDER BY averageRating ASC, numVotes DESC
    LIMIT 200
)
SELECT
    n.primaryName,
    p.category,
    COUNT(*) AS appearances
FROM worst_200_movies w
JOIN principals p ON w.tconst = p.tconst
JOIN names n ON p.nconst = n.nconst
GROUP BY n.primaryName, p.category
HAVING appearances >= 2
ORDER BY appearances DESC
LIMIT 50;
"""

df_top_people = run_query(query)
df_top_people



Loading movies from /content/drive/MyDrive/IMDB_Data/filtered/merged_movies.csv
Loading principals from /content/drive/MyDrive/IMDB_Data/filtered/principals_filtered.csv
Loading names from /content/drive/MyDrive/IMDB_Data/filtered/names_filtered.csv
✅ All filtered CSVs loaded into the SQLite database.


Unnamed: 0,primaryName,category,appearances
0,David Michael Latt,producer,12
1,Jane Alexander,actress,10
2,Anna Mazzotti,actress,6
3,Gregory Snegoff,actor,6
4,Anthony Cardoza,actor,5
5,Kamil Çetin,director,5
6,Uwe Boll,director,5
7,Uwe Boll,producer,5
8,Uwe Boll,writer,5
9,Anthony Cardoza,producer,4
