In [2]:
import psycopg

In [3]:
%reload_ext sql

In [4]:
%sql postgresql://michellelin@localhost:5432/yelp

In [5]:
%config SqlMagic.displaylimit = 30

# Views
### What the top 5 cities with the most 5-star businesses?

### Without any views

In [10]:
%%sql
SELECT city, state, COUNT(*) AS five_star_count
FROM business
WHERE stars = 5
GROUP BY city, state
ORDER BY five_star_count DESC
LIMIT 5;

city,state,five_star_count
Santa Barbara,CA,9
Reno,NV,7
Tucson,AZ,6
Tampa,FL,5
Philadelphia,PA,5


In [11]:
%%sql
EXPLAIN ANALYZE
SELECT city, state, COUNT(*) AS five_star_count
FROM business
WHERE stars = 5
GROUP BY city, state
ORDER BY five_star_count DESC
LIMIT 5;

QUERY PLAN
Limit (cost=102.70..102.71 rows=5 width=21) (actual time=8.379..8.381 rows=5 loops=1)
-> Sort (cost=102.70..102.92 rows=90 width=21) (actual time=8.378..8.379 rows=5 loops=1)
Sort Key: (count(*)) DESC
Sort Method: top-N heapsort Memory: 25kB
-> HashAggregate (cost=100.30..101.20 rows=90 width=21) (actual time=8.323..8.337 rows=56 loops=1)
"Group Key: city, state"
Batches: 1 Memory Usage: 24kB
-> Seq Scan on business (cost=0.00..99.50 rows=107 width=13) (actual time=0.058..8.162 rows=107 loops=1)
Filter: (stars = '5'::numeric)
Rows Removed by Filter: 893


### Optimization with Materialized View

In [19]:
%%sql
-- Create a materialized view to store all 5 stars businesses
DROP MATERIALIZED VIEW IF EXISTS five_stars;
CREATE MATERIALIZED VIEW five_stars AS 
SELECT *
FROM business
WHERE stars = 5;

-- Get the cities with the most 5 stars businesses
SELECT city, COUNT(*) five_star_count
FROM five_stars 
GROUP BY city, state
ORDER BY five_star_count DESC
LIMIT 5;

city,five_star_count
Santa Barbara,9
Reno,7
Tucson,6
Tampa,5
Philadelphia,5


In [18]:
%%sql
DROP MATERIALIZED VIEW IF EXISTS five_stars;
CREATE MATERIALIZED VIEW five_stars AS 
SELECT *
FROM business
WHERE stars = 5;

EXPLAIN ANALYZE
SELECT city, COUNT(*) five_star_count
FROM five_stars 
GROUP BY city, state
ORDER BY five_star_count DESC
LIMIT 5;

QUERY PLAN
Limit (cost=17.94..17.95 rows=5 width=52) (actual time=0.183..0.185 rows=5 loops=1)
-> Sort (cost=17.94..18.39 rows=180 width=52) (actual time=0.182..0.183 rows=5 loops=1)
Sort Key: (count(*)) DESC
Sort Method: top-N heapsort Memory: 25kB
-> HashAggregate (cost=13.15..14.95 rows=180 width=52) (actual time=0.137..0.151 rows=56 loops=1)
"Group Key: city, state"
Batches: 1 Memory Usage: 40kB
-> Seq Scan on five_stars (cost=0.00..11.80 rows=180 width=44) (actual time=0.007..0.038 rows=107 loops=1)
Planning Time: 0.280 ms
Execution Time: 0.225 ms


### Optimization with Virtual View

In [23]:
%%sql
-- Create a virtual view to store all 5 stars businesses
DROP VIEW IF EXISTS five_stars_view;
CREATE VIEW five_stars_view AS 
SELECT *
FROM business
WHERE stars = 5;

-- Get the cities with the most 5 stars businesses
SELECT city, COUNT(*) five_star_count
FROM five_stars_view 
GROUP BY city, state
ORDER BY five_star_count DESC
LIMIT 5;

city,five_star_count
Santa Barbara,9
Reno,7
Tucson,6
Tampa,5
Philadelphia,5


In [24]:
%%sql
-- Create a virtual view to store all 5 stars businesses
DROP VIEW IF EXISTS five_starS_view;
CREATE VIEW five_stars_view AS 
SELECT *
FROM business
WHERE stars = 5;

-- Get the cities with the most 5 stars businesses
EXPLAIN ANALYZE
SELECT city, COUNT(*) five_star_count
FROM five_stars_view 
GROUP BY city, state
ORDER BY five_star_count DESC
LIMIT 5;

QUERY PLAN
Limit (cost=102.70..102.71 rows=5 width=21) (actual time=0.718..0.719 rows=5 loops=1)
-> Sort (cost=102.70..102.92 rows=90 width=21) (actual time=0.716..0.717 rows=5 loops=1)
Sort Key: (count(*)) DESC
Sort Method: top-N heapsort Memory: 25kB
-> HashAggregate (cost=100.30..101.20 rows=90 width=21) (actual time=0.679..0.690 rows=56 loops=1)
"Group Key: business.city, business.state"
Batches: 1 Memory Usage: 24kB
-> Seq Scan on business (cost=0.00..99.50 rows=107 width=13) (actual time=0.016..0.622 rows=107 loops=1)
Filter: (stars = '5'::numeric)
Rows Removed by Filter: 893


### Optimization with CTE

In [25]:
%%sql
WITH five_stars_cte AS (
SELECT *
FROM business
WHERE stars = 5
)

SELECT city, COUNT(*) five_star_count
FROM five_stars_cte
GROUP BY city, state
ORDER BY five_star_count DESC
LIMIT 5;

city,five_star_count
Santa Barbara,9
Reno,7
Tucson,6
Tampa,5
Philadelphia,5


In [27]:
%%sql
EXPLAIN ANALYZE
WITH five_stars_cte AS (
SELECT *
FROM business
WHERE stars = 5
)

SELECT city, COUNT(*) five_star_count
FROM five_stars_cte
GROUP BY city, state
ORDER BY five_star_count DESC
LIMIT 5;

QUERY PLAN
Limit (cost=102.70..102.71 rows=5 width=21) (actual time=8.916..8.918 rows=5 loops=1)
-> Sort (cost=102.70..102.92 rows=90 width=21) (actual time=8.914..8.916 rows=5 loops=1)
Sort Key: (count(*)) DESC
Sort Method: top-N heapsort Memory: 25kB
-> HashAggregate (cost=100.30..101.20 rows=90 width=21) (actual time=8.865..8.878 rows=56 loops=1)
"Group Key: business.city, business.state"
Batches: 1 Memory Usage: 24kB
-> Seq Scan on business (cost=0.00..99.50 rows=107 width=13) (actual time=0.033..8.789 rows=107 loops=1)
Filter: (stars = '5'::numeric)
Rows Removed by Filter: 893
