In [1]:
import sqlite3
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
from sklearn.decomposition import TruncatedSVD
from sklearn.cluster import KMeans

In [2]:
conn = sqlite3.connect('yelp_dataset_business.db') 
df_business= pd.read_sql("""SELECT * FROM business""",conn )
conn.close()

In [3]:
df_business.is_open = df_business.is_open.astype(int)
df_business["review_count"]= df_business["review_count"].astype(int)
df_business['attributes']= df_business['attributes'].apply(lambda x: eval(x) if x else None)
df_business['hours']= df_business['hours'].apply(lambda x: eval(x) if x else None)

In [4]:
conn = sqlite3.connect('yelp_dataset_reviews.db') 
df_reviews= pd.read_sql("""SELECT ubt.*
                        FROM reviews ubt
                        INNER JOIN (
                         SELECT user_id, business_id, MAX(date) as max_date
                        FROM reviews
                        GROUP BY 1,2
) grouped_ubt ON ubt.user_id = grouped_ubt.user_id
              AND ubt.business_id = grouped_ubt.business_id
              AND ubt.date = grouped_ubt.max_date;
""",conn )
conn.close()

In [5]:
df_reviews.head()

Unnamed: 0,review_id,user_id,business_id,stars,useful,funny,cool,text,date
0,KU_O5udG6zpxOg-VcAEodg,mh_-eMZ6K5RLWhZyISBhwA,XQfwVwDr-v0ZS3_CbbE5Xw,3.0,0,0,0,"If you decide to eat here, just be aware it is...",2018-07-07 22:09:11
1,BiTunyQ73aT9WBnpR9DZGw,OyoGAe7OKpv6SyGZT5g77Q,7ATYjTIgM3jUlt4UM3IypQ,5.0,1,0,1,I've taken a lot of spin classes over the year...,2012-01-03 15:28:18
2,saUsX_uimxRlCVr67Z4Jig,8g_iMtfSiwikVnbP2etR0A,YjUWPpI6HXG530lwP-fb2A,3.0,0,0,0,Family diner. Had the buffet. Eclectic assortm...,2014-02-05 20:30:30
3,AqPFMleE6RsU23_auESxiA,_7bHUi9Uuf5__HHc_Q8guQ,kxX2SOes4o-D3ZQBkiMRfA,5.0,1,0,1,"Wow! Yummy, different, delicious. Our favo...",2015-01-04 00:01:03
4,JrIxlS1TzJ-iCu79ul40cQ,eUta8W_HdHMXPzLBBZhL1A,04UD14gamNjLY0IDYVhHJg,1.0,1,2,1,I am a long term frequent customer of this est...,2015-09-23 23:10:31


In [None]:
select_feature = df_reviews[['review_id','user_id','business_id','stars']]

In [6]:
df_merge = pd.merge(df_reviews, df_business, on='business_id', how='inner')

In [7]:
df_merge

Unnamed: 0,review_id,user_id,business_id,stars_x,useful,funny,cool,text,date,name,...,state,postal_code,latitude,longitude,stars_y,review_count,is_open,attributes,categories,hours
0,KU_O5udG6zpxOg-VcAEodg,mh_-eMZ6K5RLWhZyISBhwA,XQfwVwDr-v0ZS3_CbbE5Xw,3.0,0,0,0,"If you decide to eat here, just be aware it is...",2018-07-07 22:09:11,Turning Point of North Wales,...,PA,19454,40.210196,-75.223639,3.0,169,1,"{'NoiseLevel': 'u'average'', 'HasTV': 'False',...","Restaurants, Breakfast & Brunch, Food, Juice B...","{'Monday': '7:30-15:0', 'Tuesday': '7:30-15:0'..."
1,VJxlBnJmCDIy8DFG0kjSow,Iaee7y6zdSB3B-kRCo4z1w,XQfwVwDr-v0ZS3_CbbE5Xw,2.0,0,0,0,This is the second time we tried turning point...,2017-05-13 17:06:55,Turning Point of North Wales,...,PA,19454,40.210196,-75.223639,3.0,169,1,"{'NoiseLevel': 'u'average'', 'HasTV': 'False',...","Restaurants, Breakfast & Brunch, Food, Juice B...","{'Monday': '7:30-15:0', 'Tuesday': '7:30-15:0'..."
2,S6pQZQocMB1WHMjTRbt77A,ejFxLGqQcWNLdNByJlIhnQ,XQfwVwDr-v0ZS3_CbbE5Xw,4.0,2,0,1,The place is cute and the staff was very frien...,2017-08-08 00:58:18,Turning Point of North Wales,...,PA,19454,40.210196,-75.223639,3.0,169,1,"{'NoiseLevel': 'u'average'', 'HasTV': 'False',...","Restaurants, Breakfast & Brunch, Food, Juice B...","{'Monday': '7:30-15:0', 'Tuesday': '7:30-15:0'..."
3,WqgTKVqWVHDHjnjEsBvUgg,f7xa0p_1V9lx53iIGN5Sug,XQfwVwDr-v0ZS3_CbbE5Xw,3.0,0,0,0,We came on a Saturday morning after waiting a ...,2017-11-19 02:20:23,Turning Point of North Wales,...,PA,19454,40.210196,-75.223639,3.0,169,1,"{'NoiseLevel': 'u'average'', 'HasTV': 'False',...","Restaurants, Breakfast & Brunch, Food, Juice B...","{'Monday': '7:30-15:0', 'Tuesday': '7:30-15:0'..."
4,M0wzFFb7pefOPcxeRVbLag,dCooFVCk8M1nVaQqcfTL3Q,XQfwVwDr-v0ZS3_CbbE5Xw,2.0,0,0,0,"Mediocre at best. The decor is very nice, and ...",2017-09-09 17:49:47,Turning Point of North Wales,...,PA,19454,40.210196,-75.223639,3.0,169,1,"{'NoiseLevel': 'u'average'', 'HasTV': 'False',...","Restaurants, Breakfast & Brunch, Food, Juice B...","{'Monday': '7:30-15:0', 'Tuesday': '7:30-15:0'..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6745778,0gai4MzBzFCa7JsS31RRjg,xHu1jmrnv4DdJMuC8IxeRg,vI4vyi1dfG93oAiSRFDymA,1.0,2,0,0,Ridiculous wait times. I showed up right on ti...,2018-06-05 14:25:32,Aesthetic Dermatology Associates,...,PA,19301,40.045448,-75.501140,2.5,5,1,"{'BikeParking': 'True', 'WheelchairAccessible'...","Medical Spas, Doctors, Laser Hair Removal, Der...","{'Monday': '7:30-15:0', 'Tuesday': '7:30-17:30..."
6745779,7MZu6bFdFoqr7MyPKqvoew,aYveEctPYcZiubXyEgLhTA,vI4vyi1dfG93oAiSRFDymA,5.0,4,1,2,Love this office. All the staff us super kind ...,2020-02-17 01:53:23,Aesthetic Dermatology Associates,...,PA,19301,40.045448,-75.501140,2.5,5,1,"{'BikeParking': 'True', 'WheelchairAccessible'...","Medical Spas, Doctors, Laser Hair Removal, Der...","{'Monday': '7:30-15:0', 'Tuesday': '7:30-17:30..."
6745780,3emHuLQvuAw0l66TzSWmKA,oz-So7Kwo5tW51HrT-BgIg,vI4vyi1dfG93oAiSRFDymA,1.0,0,0,0,It was a HUGE mistake to give this place a try...,2021-09-22 16:51:46,Aesthetic Dermatology Associates,...,PA,19301,40.045448,-75.501140,2.5,5,1,"{'BikeParking': 'True', 'WheelchairAccessible'...","Medical Spas, Doctors, Laser Hair Removal, Der...","{'Monday': '7:30-15:0', 'Tuesday': '7:30-17:30..."
6745781,T-adPwD-eP2LvuDINKOA9g,09zj3b4tM-xJjozvtk34wQ,vI4vyi1dfG93oAiSRFDymA,1.0,0,0,0,Friendly staff but the praise ends there. Gett...,2021-03-09 19:21:44,Aesthetic Dermatology Associates,...,PA,19301,40.045448,-75.501140,2.5,5,1,"{'BikeParking': 'True', 'WheelchairAccessible'...","Medical Spas, Doctors, Laser Hair Removal, Der...","{'Monday': '7:30-15:0', 'Tuesday': '7:30-17:30..."
