In [27]:
import psycopg

In [28]:
%reload_ext sql

In [29]:
%sql postgresql://michellelin@localhost:5432/yelp

In [30]:
%config SqlMagic.displaylimit = 30

# Restaurant Recommendations!

### Top 10 businesses with the most 5-star reviews

In [31]:
%%sql
WITH five_star_reviews AS (
    SELECT 
        business_id, 
        COUNT(review_id) AS five_star_count 
    FROM review 
    WHERE stars = 5 
    GROUP BY business_id
)
SELECT name, five_star_count
FROM five_star_reviews f
JOIN business b
ON f.business_id = b.business_id
ORDER BY five_star_count DESC
LIMIT 10;


name,five_star_count
Sampan,943
Peacemaker Lobster and Crab,450
Pizza Delicious,387
Rocco's Little Chicago,335
Taco Bus,315
Joey K's Restaurant & Bar,315
Poco & Mom's,300
Beau & Mo's Italian Eating House,237
Bourbon Street Blues & Boogie Bar,232
East Bistro - The Mediterranean Eatery,217


###  Top 3 businesses in each state

In [32]:
%%sql
WITH ranked_businesses AS (
    SELECT 
        business_id, 
        name, 
        state, 
        review_count, 
        RANK() OVER (PARTITION BY state ORDER BY review_count DESC) AS rank
    FROM business
)
SELECT *
FROM ranked_businesses
WHERE rank <= 3;


business_id,name,state,review_count,rank
8wjRidyw7pqepAf5MncA-g,Tokiwa Ramen,AB,109,1
WhG8ilHPeqPq9k6vTiiHbg,Five Guys,AB,57,2
qKt9X3OIG4vBnjQ9YokQhg,Red Robin Gourmet Burgers and Brews,AB,56,3
9zlIJ7Q5W4AENjpGgaNSsQ,Rocco's Little Chicago,AZ,613,1
xNd_x23g7wiGAjmm5HXfPA,Poco & Mom's,AZ,500,2
iBnj2wBO5-3moZ7corhp-w,Hotel Congress,AZ,378,3
q-1ICFSqs2MwDGfe5tX-SQ,Yellow Belly,CA,337,1
MAIfD4kJDCydgoD1l2b_vw,Corks n' Crowns,CA,276,2
_W9lITxMHNInaS2Lv2XoUw,Casa Del Mar Inn,CA,194,3
hFMud7XDefdWM1d-p1B2dQ,Bahama Breeze,DE,215,1


### How many users leave reviews for the same business multiple times?

In [33]:
%%sql
WITH repeat_customers AS (
    SELECT 
        user_id, 
        business_id, 
        COUNT(review_id) AS review_count 
    FROM review 
    GROUP BY user_id, business_id 
    HAVING COUNT(review_id) > 1
)
SELECT user_id, rc.review_count, name, state
FROM repeat_customers rc
JOIN business b 
ON rc.business_id = b.business_id
ORDER BY rc.review_count DESC 
LIMIT 10;


user_id,review_count,name,state
zGy0MLbvBurMXTYAlyia5w,6,Discount Tire,AZ
OeMDHEXQcsq0ZGhk9Ta7qg,6,Miller's Ale House,PA
gF9BKX9fuTPQL-EFvSok2Q,6,Bahama Breeze,DE
MjlAbTDND2FOlQI0ySfR2w,5,Jet's Pizza,TN
F-PV6KT9M0yV9as_qQLfIw,5,Smashburger,MO
9E097uOiJYGgbnHpp-eP3w,4,Reno Tahoe Window Cleaning,NV
0YI3p9o-ntRgRaPWpfa22Q,4,Lowe's Home Improvement,IN
CfX4sTIFFNaRchNswqhVfg,4,Pizza Delicious,LA
7Wyq4GK7z_GXqtIzNYwi9g,4,Rocco's Little Chicago,AZ
8PM1bB2ZpytEtpQGo2hGkA,4,McDonald's,MO


###  Top 10 best restaurants for weekends

In [34]:
%%sql
WITH weekend_best AS (
SELECT 
    business_id, 
    COUNT(review_id) AS weekend_reviews
FROM review
WHERE EXTRACT(DOW FROM date) IN (5, 6) 
GROUP BY business_id
)
SELECT wb.*, name, state, city
FROM weekend_best wb
JOIN business b
ON wb.business_id = b.business_id
ORDER BY weekend_reviews DESC 
LIMIT 10
;


business_id,weekend_reviews,name,state,city
kZ1q0K13tFYG_ZJrVvsJHA,577,Sampan,PA,Philadelphia
wVrJOXjRrPKIfZbNnrl3ow,242,Pizza Delicious,LA,New Orleans
BuQGsltd3B4L_HRCP2jpTQ,239,Joey K's Restaurant & Bar,LA,New Orleans
yb2vAoH3E-R11yWmnT570w,239,Taco Bus,FL,Tampa
L-VNs3YquPGKVsXl2Ze-Yg,238,Peacemaker Lobster and Crab,MO,Saint Louis
pG3BViWKznBrEPlbV9wxPA,210,Bar Louie - Nashville,TN,Nashville
9zlIJ7Q5W4AENjpGgaNSsQ,195,Rocco's Little Chicago,AZ,Tucson
xNd_x23g7wiGAjmm5HXfPA,153,Poco & Mom's,AZ,Tucson
wtVJXrChHTjcaisSpzGCVw,137,Beau & Mo's Italian Eating House,FL,Saint Petersburg
HhR2Lw4DxN_XKQZHG_TImQ,117,Gumbo POT,LA,New Orleans


# Yelp Reviews Analysis

### Positive vs. Negative Sentiments Over Time
Are users generally leaving more positive or negative reviews over time?

In [35]:
%%sql
SELECT 
    DATE_TRUNC('year', date) AS review_year,
    SUM(CASE WHEN stars >= 4 THEN 1 ELSE 0 END) AS positive_reviews,
    SUM(CASE WHEN stars <= 2 THEN 1 ELSE 0 END) AS negative_reviews
FROM review
GROUP BY review_year
ORDER BY review_year;

review_year,positive_reviews,negative_reviews
2005-01-01 00:00:00-08:00,8,1
2006-01-01 00:00:00-08:00,18,2
2007-01-01 00:00:00-08:00,83,18
2008-01-01 00:00:00-08:00,219,56
2009-01-01 00:00:00-08:00,310,86
2010-01-01 00:00:00-08:00,653,178
2011-01-01 00:00:00-08:00,1195,340
2012-01-01 00:00:00-08:00,1423,472
2013-01-01 00:00:00-08:00,1810,642
2014-01-01 00:00:00-08:00,2481,900


### Most loved and most critized business
Which businesses receive the most positive and negative reviews?

In [36]:
%%sql
WITH love_critized AS (
SELECT 
    business_id, 
    SUM(CASE WHEN stars >= 4 THEN 1 ELSE 0 END) AS positive_reviews,
    SUM(CASE WHEN stars <= 2 THEN 1 ELSE 0 END) AS negative_reviews
FROM review
GROUP BY business_id
)

SELECT lc.*, name
FROM love_critized lc
JOIN business b
ON lc.business_id = b.business_id
ORDER BY positive_reviews DESC 
LIMIT 10;

business_id,positive_reviews,negative_reviews,name
kZ1q0K13tFYG_ZJrVvsJHA,1629,227,Sampan
L-VNs3YquPGKVsXl2Ze-Yg,652,91,Peacemaker Lobster and Crab
yb2vAoH3E-R11yWmnT570w,596,204,Taco Bus
wVrJOXjRrPKIfZbNnrl3ow,571,65,Pizza Delicious
BuQGsltd3B4L_HRCP2jpTQ,548,102,Joey K's Restaurant & Bar
9zlIJ7Q5W4AENjpGgaNSsQ,506,92,Rocco's Little Chicago
xNd_x23g7wiGAjmm5HXfPA,428,54,Poco & Mom's
O_l8AIlR2RJvNPq4CDMbtQ,348,50,Bourbon Street Blues & Boogie Bar
nF5QByCz21MY0KLtSoB5KA,294,62,Penrose Diner
q-1ICFSqs2MwDGfe5tX-SQ,288,37,Yellow Belly


### Impact of review length on ratings
Do longer reviews tend to be more positive or negative?

In [37]:
%%sql
SELECT 
    stars,
    AVG(LENGTH(text)) AS avg_review_length
FROM review
GROUP BY stars
ORDER BY stars DESC;


stars,avg_review_length
5,474.21232876712327
4,585.3131313131313
3,679.1481733605627
2,718.6142738128298
1,716.2167942962767
