In [None]:
import sqlite3
import pandas as pd
import os

# Connect to the SQLite database


DB_PATH = "movie_data.db"
conn = sqlite3.connect(DB_PATH)
print("Connected to database:", DB_PATH)


# Query 1: Movie with the highest average rating

query1 = """
SELECT 
    m.movieId,
    m.title AS title,
    ROUND(AVG(r.rating), 2) AS avg_rating,
    COUNT(r.rating) AS rating_count
FROM movies m
JOIN ratings r ON m.movieId = r.movieId
GROUP BY m.movieId, m.title
ORDER BY avg_rating DESC
LIMIT 1;
"""

print("\nQuery 1: Movie with the highest average rating")
df1 = pd.read_sql_query(query1, conn)
print(df1)


# Query 2: Top 5 genres with the highest average rating

query2 = """
WITH split_genres AS (
    SELECT 
        m.movieId,
        m.title,
        trim(value) AS genre,
        ROUND(AVG(r.rating), 2) AS avg_rating
    FROM movies m
    JOIN ratings r ON m.movieId = r.movieId,
         json_each('[' || replace(m.genres, '|', '","') || ']')
    GROUP BY m.movieId, genre
)
SELECT 
    genre,
    ROUND(AVG(avg_rating), 2) AS avg_rating,
    COUNT(*) AS movie_count
FROM split_genres
GROUP BY genre
ORDER BY avg_rating DESC
LIMIT 5;
"""

print("\nQuery 2: Top 5 genres with the highest average rating")
df2 = pd.read_sql_query(query2, conn)
print(df2)


# Query 3: Director with the most movies

query3 = """
SELECT 
    Director,
    COUNT(*) AS movie_count
FROM movies
WHERE Director IS NOT NULL AND Director != ''
GROUP BY Director
ORDER BY movie_count DESC
LIMIT 1;
"""

print("\nQuery 3: Director with the most movies")
df3 = pd.read_sql_query(query3, conn)
print(df3)


# Query 4: Average rating of movies released each year

query4 = """
SELECT 
    m.year,
    ROUND(AVG(r.rating), 2) AS avg_rating,
    COUNT(r.rating) AS movie_count
FROM movies m
JOIN ratings r ON m.movieId = r.movieId
WHERE m.year IS NOT NULL
GROUP BY m.year
ORDER BY m.year;
"""

print("\nQuery 4: Average rating of movies released each year")
df4 = pd.read_sql_query(query4, conn)
print(df4.head(10))

# Close the connection
conn.close()
print("\nAll queries executed successfully.")
