In [1]:
import psycopg

In [2]:
%reload_ext sql

In [3]:
%sql postgresql://michellelin@localhost:5432/yelp

In [4]:
%config SqlMagic.displaylimit = 30

# Restaurant Recommendations!

### Top 10 businesses with the most 5-star reviews

In [5]:
%%sql
WITH five_star_reviews AS (
    SELECT 
        business_id, 
        COUNT(review_id) AS five_star_count 
    FROM review 
    WHERE stars = 5 
    GROUP BY business_id
)
SELECT name, five_star_count
FROM five_star_reviews f
JOIN business b
ON f.business_id = b.business_id
ORDER BY five_star_count DESC
LIMIT 10;


name,five_star_count
Chef Ron's Gumbo Stop,944
The Black Sheep,693
Tokyo Sushi,468
Daisy Dukes - French Quarter,456
The Company Burger,339
Poke King,294
Spice 28,290
Pepperfire Hot Chicken,258
Pad Thai,246
Slice Pizzeria,239


###  Top 3 businesses in each state

In [17]:
%%sql
WITH ranked_businesses AS (
    SELECT 
        business_id, 
        name, 
        state, 
        review_count, 
        RANK() OVER (PARTITION BY state ORDER BY review_count DESC) AS rank
    FROM business
)
SELECT *
FROM ranked_businesses
WHERE rank <= 3;


business_id,name,state,review_count,rank
O4grjToHwGSIlbSHjQ1dQQ,Under the High Wheel,AB,122,1
lZJ7UL3SaqgPsItQnnykKA,Café Linnea,AB,103,2
SVFt22qfFNML2lOl_7g5WA,I Love Sushi,AB,70,3
pnZQOGEV-Ru6vHIYaUuQBw,Miss Saigon,AZ,237,1
lqSJkyNSE1yPeux4PoR-pg,Arizona Aquatic Gardens,AZ,206,2
juL8ovMlnjkXNACxZ8HLAQ,Sam Levitz Furniture,AZ,186,3
8Vo6LN9gqULhXzoxj5k6kQ,The Black Sheep,CA,983,1
yEmEDomLUofVtbZoUeFV5g,The Daily Grind Coffee & Tea Station,CA,312,2
6E9KjyDAdL_vDQCWwhj_KA,Crushcakes & Cafe,CA,264,3
bbm5_njEZyw68E7UUY0xgQ,Red Robin Gourmet Burgers and Brews,DE,86,1


### How many users leave reviews for the same business multiple times?

In [23]:
%%sql
WITH repeat_customers AS (
    SELECT 
        user_id, 
        business_id, 
        COUNT(review_id) AS review_count 
    FROM review 
    GROUP BY user_id, business_id 
    HAVING COUNT(review_id) > 1
)
SELECT user_id, rc.review_count, name, state
FROM repeat_customers rc
JOIN business b 
ON rc.business_id = b.business_id
ORDER BY rc.review_count DESC 
LIMIT 10;


user_id,review_count,name,state
o14c28vxH5AomMc8rPIgnA,16,The Cat Practice,LA
R7NM7vIyUfSTXvMsw7jNTA,7,Bach's Greenhouse Cactus Nursery,AZ
pO8_YbBpkZ3F1WnC-nsWWw,6,Napoleon Blonde,CA
Yj4SpqG0OJxbg8L1Qsrlsw,6,Yellowbelly,MO
B1OVDsstzC_RaESmtd1oWQ,6,Tokyo Sushi,NV
Yj4SpqG0OJxbg8L1Qsrlsw,6,Dressel's Public House,MO
UMh9KhqlScXlkuYq8HQT4Q,6,Chopstick House,TN
daBNJmXVo5LwmL5zTqf6Mw,5,Burger Monger,FL
GVtX02uGPXPV4H-X5gyFkg,5,Carmel Kitchen & Wine Bar,FL
dgGUKdOh2a01OwLw1qgfDw,5,Opry Mills,TN


###  Top 10 best restaurants for weekends

In [15]:
%%sql
WITH weekend_best AS (
SELECT 
    business_id, 
    COUNT(review_id) AS weekend_reviews
FROM review
WHERE EXTRACT(DOW FROM date) IN (5, 6) 
GROUP BY business_id
)
SELECT wb.*, name, state, city
FROM weekend_best wb
JOIN business b
ON wb.business_id = b.business_id
ORDER BY weekend_reviews DESC 
LIMIT 10
;


business_id,weekend_reviews,name,state,city
bp5Mk2d0qofUeF5uLauIbg,430,Chef Ron's Gumbo Stop,LA,Metairie
3WU1ZobAqXQ07xYoKE2Vyg,402,Daisy Dukes - French Quarter,LA,New Orleans
8Vo6LN9gqULhXzoxj5k6kQ,274,The Black Sheep,CA,Santa Barbara
3FPi2yKCIh0Hh3iBg8faYA,237,Tokyo Sushi,NV,Reno
-1B9pP_CrRBJYPICE5WbRA,234,Spice 28,PA,Philadelphia
q6hWVlVeiCgMgM22wt1wfw,208,The Company Burger,LA,New Orleans
XeogL9kqQxKEO42ZEqMbvg,193,Pepperfire Hot Chicken,TN,Nashville
0S3TvsmbCeOZ9UitnpRd1A,151,Dim Sum House,PA,Philadelphia
kpl3H_sgfTm-tH3Aa-ML7g,137,Benton Park Café & Coffee Bar,MO,Saint Louis
vxLV9tci88Hsr1G01uleag,133,Slice Pizzeria,LA,New Orleans


# Yelp Reviews Analysis

### Positive vs. Negative Sentiments Over Time
Are users generally leaving more positive or negative reviews over time?

In [16]:
%%sql
SELECT 
    DATE_TRUNC('year', date) AS review_year,
    SUM(CASE WHEN stars >= 4 THEN 1 ELSE 0 END) AS positive_reviews,
    SUM(CASE WHEN stars <= 2 THEN 1 ELSE 0 END) AS negative_reviews
FROM review
GROUP BY review_year
ORDER BY review_year;

review_year,positive_reviews,negative_reviews
2005-01-01 00:00:00-08:00,0,1
2006-01-01 00:00:00-08:00,18,1
2007-01-01 00:00:00-08:00,52,11
2008-01-01 00:00:00-08:00,181,51
2009-01-01 00:00:00-08:00,286,92
2010-01-01 00:00:00-08:00,533,132
2011-01-01 00:00:00-08:00,950,292
2012-01-01 00:00:00-08:00,1321,366
2013-01-01 00:00:00-08:00,1784,580
2014-01-01 00:00:00-08:00,2443,716


### Most loved and most critized business
Which businesses receive the most positive and negative reviews?

In [19]:
%%sql
WITH love_critized AS (
SELECT 
    business_id, 
    SUM(CASE WHEN stars >= 4 THEN 1 ELSE 0 END) AS positive_reviews,
    SUM(CASE WHEN stars <= 2 THEN 1 ELSE 0 END) AS negative_reviews
FROM review
GROUP BY business_id
)

SELECT lc.*, name
FROM love_critized lc
JOIN business b
ON lc.business_id = b.business_id
ORDER BY positive_reviews DESC 
LIMIT 10;

business_id,positive_reviews,negative_reviews,name
bp5Mk2d0qofUeF5uLauIbg,1191,101,Chef Ron's Gumbo Stop
3WU1ZobAqXQ07xYoKE2Vyg,894,314,Daisy Dukes - French Quarter
8Vo6LN9gqULhXzoxj5k6kQ,849,93,The Black Sheep
3FPi2yKCIh0Hh3iBg8faYA,634,174,Tokyo Sushi
-1B9pP_CrRBJYPICE5WbRA,593,128,Spice 28
q6hWVlVeiCgMgM22wt1wfw,576,85,The Company Burger
XeogL9kqQxKEO42ZEqMbvg,479,83,Pepperfire Hot Chicken
vxLV9tci88Hsr1G01uleag,404,87,Slice Pizzeria
kpl3H_sgfTm-tH3Aa-ML7g,392,89,Benton Park Café & Coffee Bar
0S3TvsmbCeOZ9UitnpRd1A,386,77,Dim Sum House


### Impact of review length on ratings
Do longer reviews tend to be more positive or negative?

In [23]:
%%sql
SELECT 
    stars,
    AVG(LENGTH(text)) AS avg_review_length
FROM review
GROUP BY stars
ORDER BY stars DESC;


stars,avg_review_length
5,453.25662895527665
4,570.378765720971
3,645.3111757374324
2,697.4688981868898
1,717.3744469768066
