In [3]:
from sqlalchemy import create_engine, text
import pandas as pd

In [4]:
engine = create_engine('sqlite:///movies.db')

In [7]:
pd.read_sql_query("SELECT name FROM sqlite_master WHERE type='table';", engine)

Unnamed: 0,name
0,country
1,gender
2,genre
3,language
4,language_role
5,department
6,keyword
7,person
8,production_company
9,movie


# Exercise 1: Movie Rankings and Analysis

In [None]:
'''Task 1: Rank Movies by Popularity within Each Genre

Use the RANK() function to rank movies by their popularity within each genre. 
Display the genre name, movie title, and their rank based on popularity.'''

pd.read_sql_query(''' 
                    SELECT g.genre_name, m.title, 
                        RANK() OVER (PARTITION BY g.genre_name ORDER BY m.popularity DESC) AS popularity_rank
                    FROM movie AS m
                    JOIN movie_genres AS mg ON m.movie_id = mg.movie_id
                    JOIN genre AS g ON mg.genre_id = g.genre_id
                    ORDER BY g.genre_name, popularity_rank;               
                  ''', engine)

Unnamed: 0,genre_name,title,popularity_rank
0,Action,Deadpool,1
1,Action,Guardians of the Galaxy,2
2,Action,Mad Max: Fury Road,3
3,Action,Jurassic World,4
4,Action,Pirates of the Caribbean: The Curse of the Bla...,5
...,...,...,...
12155,Western,The Ballad of Gregorio Cortez,78
12156,Western,Western Religion,79
12157,Western,Doc Holliday's Revenge,80
12158,Western,All Hat,81


In [13]:
'''Task 2: Identify the Top 3 Movies by Revenue within Each Production Company

Use the NTILE() function to divide the movies produced by each production company into 
quartiles based on revenue. Display the company name, movie title, revenue, and quartile.'''

pd.read_sql_query(''' 
                    SELECT company_name, title, revenue, revenue_quartile FROM (
                        SELECT pc.company_name, m.title, m.revenue, 
                            NTILE(4) OVER (PARTITION BY pc.company_name ORDER BY m.revenue DESC) AS revenue_quartile
                        FROM movie AS m
                        JOIN movie_company AS mc ON m.movie_id = mc.movie_id
                        JOIN production_company AS pc ON mc.company_id = pc.company_id
                    ) AS ranked_movies
                    WHERE revenue_quartile <= 3
                    ORDER BY company_name, revenue_quartile;             
                  ''', engine)

Unnamed: 0,company_name,title,revenue,revenue_quartile
0,1.85 Films,Rubber,98017,1
1,10 West Studios,Do You Believe?,0,1
2,100 Bares,El secreto de sus ojos,33965843,1
3,100 Bares,Metegol,24000000,2
4,1019 Entertainment,Captive,2801508,1
...,...,...,...,...
11798,uFilm,The Adventurer: The Curse of the Midas Box,6399,3
11799,unafilm,Heli,0,1
11800,verture Films,Mad Money,0,1
11801,warner bross Turkey,The Conjuring 2,320170008,1


In [9]:
'''Task 3: Calculate the Running Total of Movie Budgets for Each Genre

Use the SUM() function with the ROWS frame specification to calculate the running total of movie budgets within each genre. 
Display the genre name, movie title, budget, and running total budget.'''

pd.read_sql_query(''' 
                    SELECT g.genre_name, m.title, m.budget, 
                        SUM(m.budget) OVER (PARTITION BY g.genre_name ORDER BY m.title ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_total_budget
                    FROM movie AS m
                    JOIN movie_genres AS mg ON m.movie_id = mg.movie_id
                    JOIN genre AS g ON mg.genre_id = g.genre_id
                    ORDER BY g.genre_name, running_total_budget;             
                  ''', engine)

Unnamed: 0,genre_name,title,budget,running_total_budget
0,Action,10th & Wolf,8000000,8000000
1,Action,12 Rounds,20000000,28000000
2,Action,13 Hours: The Secret Soldiers of Benghazi,50000000,78000000
3,Action,15 Minutes,60000000,138000000
4,Action,16 Blocks,55000000,193000000
...,...,...,...,...
12155,Western,Western Religion,0,1964453601
12156,Western,Wild Wild West,170000000,2134453601
12157,Western,Wyatt Earp,63000000,2197453601
12158,Western,Young Guns,13000000,2210453601


In [12]:
'''Task 4: Identify the Most Recent Movie for Each Genre

Use the FIRST_VALUE() function to find the most recent movie within each genre based on the release date. 
Display the genre name, movie title, and release date'''

pd.read_sql_query(''' 
                    SELECT genre_name, title, release_date
                    FROM (
                        SELECT g.genre_name, m.title, m.release_date,
                            ROW_NUMBER() OVER (PARTITION BY g.genre_name ORDER BY m.release_date DESC) AS row_num
                        FROM movie AS m
                        JOIN movie_genres AS mg ON m.movie_id = mg.movie_id
                        JOIN genre AS g ON mg.genre_id = g.genre_id
                    ) AS ranked_movies
                    WHERE row_num = 1
                    ORDER BY genre_name;           
                  ''', engine)

Unnamed: 0,genre_name,title,release_date
0,Action,Suicide Squad,2016-08-02
1,Adventure,Kicks,2016-09-09
2,Animation,Sausage Party,2016-07-11
3,Comedy,Growing Up Smith,2017-02-03
4,Crime,Suicide Squad,2016-08-02
5,Documentary,"To Be Frank, Sinatra at 100",2015-12-12
6,Drama,Growing Up Smith,2017-02-03
7,Family,Growing Up Smith,2017-02-03
8,Fantasy,Pete's Dragon,2016-08-10
9,Foreign,Burn,2012-11-01


# Exercise 2: Cast and Crew Performance Analysis

In [None]:
'''Task 1: Rank Actors by Their Appearance in Movies

Use the DENSE_RANK() function to rank actors based on the number of movies they have appeared in. 
Display the actor’s name and their rank.'''

pd.read_sql_query('''
SELECT p.person_name, 
       DENSE_RANK() OVER (ORDER BY COUNT(mc.movie_id) DESC) AS appearance_rank
FROM movie_cast AS mc
JOIN person AS p ON mc.person_id = p.person_id
GROUP BY p.person_name
ORDER BY appearance_rank;
''', engine)

Unnamed: 0,person_name,appearance_rank
0,Samuel L. Jackson,1
1,Robert De Niro,2
2,Bruce Willis,3
3,Matt Damon,4
4,Morgan Freeman,5
...,...,...
54196,A. J. Benza,47
54197,A. David Burleigh,47
54198,'Wild Bill' Laczko,47
54199,Larry Mullen Jr.,47


In [15]:
'''Task 2: Identify the Top Director by Average Movie Rating

Use a CTE and the RANK() function to find the director with the highest average movie rating. 
Display the director’s name and their average rating.'''

pd.read_sql_query('''
WITH director_avg_ratings AS (
    SELECT p.person_name, AVG(m.vote_average) AS avg_rating
    FROM movie_crew AS mc
    JOIN person AS p ON mc.person_id = p.person_id
    JOIN movie AS m ON mc.movie_id = m.movie_id
    WHERE mc.job = 'Director'
    GROUP BY p.person_name
)
SELECT person_name, avg_rating
FROM director_avg_ratings
ORDER BY avg_rating DESC
LIMIT 1;
''', engine)

Unnamed: 0,person_name,avg_rating
0,Gary Sinyor,10.0


In [16]:
'''Task 3: Calculate the Cumulative Revenue of Movies Acted by Each Actor

Use the SUM() function to calculate the cumulative revenue of movies acted by each actor. 
Display the actor’s name and the cumulative revenue.'''

pd.read_sql_query('''
SELECT p.person_name, SUM(m.revenue) AS cumulative_revenue
FROM movie_cast AS mc
JOIN person AS p ON mc.person_id = p.person_id
JOIN movie AS m ON mc.movie_id = m.movie_id
GROUP BY p.person_name
ORDER BY cumulative_revenue DESC;
''', engine)

Unnamed: 0,person_name,cumulative_revenue
0,Stan Lee,17364063582
1,Samuel L. Jackson,14806065788
2,Frank Welker,11614837160
3,John Ratzenberger,11038044745
4,Hugo Weaving,10822190781
...,...,...
54196,AJ Meijer,0
54197,AJ Bowen,0
54198,A.J. DeLucia,0
54199,A.D. Johnson,0


In [17]:
'''Task 4: Identify the Director Whose Movies Have the Highest Total Budget

Use a CTE and a window function to find the director whose movies have the highest total budget. 
Display the director’s name and the total budget.'''

pd.read_sql_query('''
WITH director_total_budgets AS (
    SELECT p.person_name, SUM(m.budget) AS total_budget
    FROM movie_crew AS mc
    JOIN person AS p ON mc.person_id = p.person_id
    JOIN movie AS m ON mc.movie_id = m.movie_id
    WHERE mc.job = 'Director'
    GROUP BY p.person_name
)
SELECT person_name, total_budget
FROM director_total_budgets
ORDER BY total_budget DESC
LIMIT 1;
                  ''', engine)


Unnamed: 0,person_name,total_budget
0,Steven Spielberg,1667500000
