## Question 1: **Answer**

In [1]:
import pandas as pd
import sqlite3

In [None]:
def add_data_to_database(input_data):
    try:
        df = pd.read_csv(input_data)
        engine = sqlite3.connect("movies_data.db")
        df.to_sql('movies',con=engine, index = False)
    except Exception as e:
            print(f"Error: {e}")

data = "/content/movies_dataset.csv"
add_data_to_database(data)

In [3]:
# Loading SQL extension
%load_ext sql

In [4]:
# Connecting to the database
%sql sqlite:///movies_data.db

In [5]:
# Default to duplicated style
%config SqlMagic.style = '_DEPRECATED_DEFAULT'

In [6]:
%%sql
SELECT * FROM movies;

 * sqlite:///movies_data.db
Done.


rank,title,year,director,genre,imdb_rating,academy_awards,box_office_gross
1,The Shawshank Redemption,1994,Frank Darabont,Drama,9.3,7,58.7
2,The Godfather,1972,Francis Ford Coppola,Crime,9.2,7,245.7
3,The Dark Knight,2008,Christopher Nolan,Action,9.0,8,534.8
4,12 Angry Men,1957,Sidney Lumet,Drama,8.9,3,4.5
5,Schindler's List,1993,Steven Spielberg,Historical Drama,8.9,7,321.2
6,Pulp Fiction,1994,Quentin Tarantino,Crime,8.9,7,213.9
7,The Lord of the Rings: The Return of the King,2003,Peter Jackson,Fantasy,8.9,11,555.8
8,Fight Club,1999,David Fincher,Thriller,8.8,2,109.5
9,The Good the Bad and the Ugly,1966,Sergio Leone,Western,8.8,2,161.0
10,Citizen Kane,1941,Orson Welles,Drama,8.7,9,5.9


## Question 2: **Answer**

In [7]:
%%sql
-- CTE for calculating each director average
WITH director_avg_rating AS (
    SELECT
        director,
        ROUND(AVG(imdb_rating), 2) AS avg_imdb_rating
    FROM movies
    GROUP BY director
)
-- main query to return directors and average ratings
SELECT
    director,
    avg_imdb_rating
FROM director_avg_rating
WHERE avg_imdb_rating = (SELECT MAX(avg_imdb_rating) FROM director_avg_rating)
      OR avg_imdb_rating = (SELECT MIN(avg_imdb_rating) FROM director_avg_rating);

 * sqlite:///movies_data.db
Done.


director,avg_imdb_rating
Frank Darabont,9.3
Terry Gilliam,8.1


## Question 3: **Answer**

In [8]:
%%sql
SELECT
    title,
    year,
    imdb_rating,
    box_office_gross
FROM movies
WHERE imdb_rating > 8.5
    AND box_office_gross < 5
ORDER BY imdb_rating DESC, box_office_gross ASC;

 * sqlite:///movies_data.db
Done.


title,year,imdb_rating,box_office_gross
12 Angry Men,1957,8.9,4.5
Seven Samurai,1954,8.6,2.6
Casablanca,1942,8.6,3.4


## Question 4: **Answer**

In [9]:
%%sql
-- Ranking the top three movies
WITH ranked_movies AS (
    SELECT
        title,
        year AS year_of_release,
        director,
        box_office_gross,
        academy_awards,
        ROUND(box_office_gross * 100.0 / (SELECT SUM(box_office_gross)
                                          FROM movies), 2) || '%'
        AS percentage_of_total_gross,
        DENSE_RANK() OVER (ORDER BY academy_awards DESC) AS rank
    FROM movies
)
-- Returning the top three ranked movies
SELECT
    title,
    year_of_release,
    director,
    printf('%20s', box_office_gross) AS box_office_gross,
    academy_awards,
    percentage_of_total_gross
FROM ranked_movies
WHERE rank <= 3  -- Include all movies ranked in the top 3 (handling ties)
ORDER BY rank, box_office_gross DESC;

 * sqlite:///movies_data.db
Done.


title,year_of_release,director,box_office_gross,academy_awards,percentage_of_total_gross
The Lord of the Rings: The Return of the King,2003,Peter Jackson,555.8,11,13.62%
Citizen Kane,1941,Orson Welles,5.9,9,0.14%
The Dark Knight,2008,Christopher Nolan,534.8,8,13.1%


## Question 5: **Answer**

In [10]:
%%sql
WITH min_max_awards AS (
    SELECT
        MIN(academy_awards) AS min_awards,
        MAX(academy_awards) AS max_awards
    FROM movies
),
-- Average box office gross for movies with min Academy Awards
mini_gross_awards AS (
    SELECT
        ROUND(AVG(box_office_gross),2) AS avg_gross_min_awards,
        min_awards AS min_academy_awards_count
    FROM movies, min_max_awards
    WHERE academy_awards = min_awards
),
-- Average box office gross for movies with max Academy Awards
max_gross_awards AS (
    SELECT
        ROUND(AVG(box_office_gross), 2) AS avg_gross_max_awards,
        max_awards AS max_academy_awards_count
    FROM movies, min_max_awards
    WHERE academy_awards = max_awards
)
-- Combine the results from both CTEs
SELECT *
FROM mini_gross_awards
CROSS JOIN max_gross_awards;

 * sqlite:///movies_data.db
Done.


avg_gross_min_awards,min_academy_awards_count,avg_gross_max_awards,max_academy_awards_count
4.65,0,555.8,11


#### Another method

In [11]:
%%sql
SELECT
    -- Subquery to calculate avg gross for zero awards movies
    (SELECT
         ROUND(AVG(box_office_gross), 2)
     FROM movies
     WHERE academy_awards = (SELECT MIN(academy_awards) FROM movies))
                            AS avg_gross_min_awards,

    -- Subquery to retain minimum academy awards
    (SELECT
         academy_awards
     FROM movies
     WHERE academy_awards = (SELECT MIN(academy_awards) FROM movies))
                            AS min_academy_awards_count,

     -- Subquery to calculate avg gross for movies with max awards
    (SELECT
         ROUND(AVG(box_office_gross),2)
     FROM  movies
     WHERE academy_awards = (SELECT MAX(academy_awards) FROM movies))
     AS avg_gross_max_awards,

     -- Subquery to return max awards
    (SELECT
         MAX(academy_awards)
    FROM movies) AS max_academy_awards;

 * sqlite:///movies_data.db
Done.


avg_gross_min_awards,min_academy_awards_count,avg_gross_max_awards,max_academy_awards
4.65,0,555.8,11


## Question 6: **Answer**

In [12]:
%%sql
SELECT
-- CTE for calculating avg for movies in 50s
    (SELECT
         AVG(imdb_rating) AS avg_imdb_50s
    FROM movies
    WHERE year LIKE '%195%'
    GROUP BY title) AS avg_imdb_movies_of_the_50s,

-- CTE for calculating avg for movies in 60s
    (SELECT
         AVG(imdb_rating) AS avg_imdb_60s
    FROM movies
WHERE year LIKE '%196%'
GROUP BY title) AS avg_imdb_movies_of_the_60s;

 * sqlite:///movies_data.db
Done.


avg_imdb_movies_of_the_50s,avg_imdb_movies_of_the_60s
8.9,8.3


In [13]:
%%sql
SELECT
-- CTE for calculating avg for movies in 50s
    (SELECT
         AVG(imdb_rating) AS avg_imdb_50s
    FROM movies
    WHERE year BETWEEN 1950 AND 1959
    GROUP BY title) AS avg_imdb_movies_of_the_50s,

-- CTE for calculating avg for movies in 60s
    (SELECT
         AVG(imdb_rating) AS avg_imdb_60s
    FROM movies
    WHERE year BETWEEN 1960 AND 1969
GROUP BY title) AS avg_imdb_movies_of_the_60s;

 * sqlite:///movies_data.db
Done.


avg_imdb_movies_of_the_50s,avg_imdb_movies_of_the_60s
8.9,8.3
