In [1]:
from sqlalchemy import create_engine, text
import pandas as pd

In [2]:
engine = create_engine('sqlite:///movies.db')

# Tasks

In [None]:
'''Task 1: Calculate the Average Budget Growth Rate for Each Production Company
Calculate the average budget growth rate for each production company across all movies
 they have produced. Use window functions to determine the budget growth rate and then calculate the average growth rate.'''


pd.read_sql_query('''
WITH company_budgets AS (
    SELECT pc.company_name, m.budget, m.release_date,
           LAG(m.budget) OVER (PARTITION BY pc.company_name ORDER BY m.release_date) AS previous_budget
    FROM movie AS m
    JOIN movie_company AS mc ON m.movie_id = mc.movie_id
    JOIN production_company AS pc ON mc.company_id = pc.company_id
    WHERE m.budget IS NOT NULL
)
SELECT company_name, 
       AVG(CASE WHEN previous_budget IS NOT NULL THEN (budget - previous_budget) / previous_budget END) AS avg_budget_growth_rate
FROM company_budgets
GROUP BY company_name
ORDER BY avg_budget_growth_rate DESC;
                  ''', engine)  

Unnamed: 0,company_name,avg_budget_growth_rate
0,Artists Production Group (APG),1428570.50
1,Homegrown Pictures,466665.00
2,Focus Films,357142.00
3,Muse Productions,250000.50
4,Paramount Animation,104166.25
...,...,...
5012,10th Hole Productions,
5013,101st Street Films,
5014,1019 Entertainment,
5015,10 West Studios,


In [4]:
'''Task 2: Determine the Most Consistently High-Rated Actor
Identify the actor who has appeared in the most movies that are rated above the average 
rating of all movies. Use window functions and CTEs to calculate the average rating and filter the actors based on this criterion.'''

pd.read_sql_query('''
WITH movie_avg_rating AS (
    SELECT AVG(m.vote_average) AS overall_avg_rating FROM movie AS m
),
actor_high_rated_movies AS (
    SELECT p.person_name, COUNT(*) AS high_rated_count
    FROM movie_cast AS mc
    JOIN person AS p ON mc.person_id = p.person_id
    JOIN movie AS m ON mc.movie_id = m.movie_id,
    movie_avg_rating
    WHERE m.vote_average > overall_avg_rating
    GROUP BY p.person_name
)
SELECT person_name, high_rated_count
FROM actor_high_rated_movies
ORDER BY high_rated_count DESC
LIMIT 1;
                    ''', engine)

Unnamed: 0,person_name,high_rated_count
0,Samuel L. Jackson,45


In [7]:
'''Task 3: Calculate the Rolling Average Revenue for Each Genre
Calculate the rolling average revenue for movies within each genre, considering only 
the last three movies released in the genre. Use window functions with the ROWS frame specification to achieve this.'''

pd.read_sql_query('''
WITH last_three_movies AS (
    SELECT g.genre_name, m.revenue, m.release_date,
           ROW_NUMBER() OVER (PARTITION BY g.genre_name ORDER BY m.release_date DESC) AS row_num
    FROM movie AS m
    JOIN movie_genres AS mg ON m.movie_id = mg.movie_id
    JOIN genre AS g ON mg.genre_id = g.genre_id
    WHERE m.revenue IS NOT NULL
)
SELECT genre_name, AVG(revenue) AS rolling_avg_revenue
FROM last_three_movies
WHERE row_num <= 3
GROUP BY genre_name
ORDER BY rolling_avg_revenue DESC;
                    ''', engine)

Unnamed: 0,genre_name,rolling_avg_revenue
0,Animation,455496800.0
1,Action,412495000.0
2,Science Fiction,388490600.0
3,Fantasy,372614300.0
4,Family,339884500.0
5,Adventure,296231800.0
6,Crime,281381400.0
7,Western,230036000.0
8,Thriller,158342900.0
9,Mystery,139536200.0


In [6]:
''' Task 4: Identify the Highest-Grossing Movie Series
Identify the movie series (based on shared keywords) with the highest total revenue. 
Use window functions and CTEs to group movies by their series and calculate the total revenue.'''

pd.read_sql_query('''
WITH series_revenue AS (
    SELECT k.keyword_id, SUM(m.revenue) AS total_revenue
    FROM movie_keywords AS mk
    JOIN movie AS m ON mk.movie_id = m.movie_id
    JOIN keyword AS k ON mk.keyword_id = k.keyword_id
    WHERE m.revenue IS NOT NULL
    GROUP BY k.keyword_id
)
SELECT k.keyword_id, k.keyword_name, total_revenue
FROM series_revenue
JOIN keyword AS k ON series_revenue.keyword_id = k.keyword_id
ORDER BY total_revenue DESC
LIMIT 1;
                    ''', engine)


Unnamed: 0,keyword_id,keyword_name,total_revenue
0,179431,duringcreditsstinger,57827617707
