In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
import string
import nltk
nltk.download ('stopwords')
from nltk.corpus import stopwords 
from nltk.tokenize import WordPunctTokenizer

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/MariloyHJimenez/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
import warnings
warnings.filterwarnings('ignore')

# Importing Clean Data

In [3]:
file_path = "Resources/Final_philadelphia_reviews.csv"

phillies_df = pd.read_csv(file_path)
phillies_df["business_id"].count()

10000

In [4]:
yelp_reviews_df = phillies_df[['review_id', 'user_id', 'business_id', 'text', 
                               'stars_business', 'review_count']]

In [5]:
#restaurants_df = pd.read_csv('Resources/Philadelphia_businesses.csv')
restaurants_df = pd.read_csv("Resources/Philly_NearRestaurants.csv")
#Check Null values in restaurantes
restaurants_df.rename (columns={'ID':'Business_Id'}, inplace=True)
restaurants_df['Business_Id'].count()

50

In [6]:
restaurants_df.head(10)

Unnamed: 0,name,latitude,longitude,stars_business,categories,review_count,Business_Id
0,Saad's Halal Restaurant,39.954963,-75.211851,4.5,"Halal, Middle Eastern, Restaurants, Food, Indi...",702,6_LnAQQ0-mml8YgpfRjGuA
1,Mood Cafe,39.948664,-75.215832,4.5,"Cafes, Pakistani, Juice Bars & Smoothies, Rest...",458,U30ggGzFpXvc2NZYwOW3qg
2,48th Street Grille,39.953137,-75.218661,4.5,"Internet Cafes, Restaurants, Food, Seafood, Am...",275,KYPhGJIibu_7ePIX4HNs6A
3,Cucina Zapata,39.954289,-75.185538,4.5,"Thai, Food Stands, Restaurants, Mexican",248,B-DiQpcSTJ7oMMnwzbAGTQ
4,Fu-Wah Mini Market,39.948292,-75.217014,4.5,"Grocery, Food, Convenience Stores, Restaurants...",247,Biidj3QszVuVO0Q85g5NPw
5,Dottie's Donuts,39.948345,-75.213656,4.5,"Restaurants, Food, Vegan, Cafes, Donuts",206,A3Qt87F7ZaAwCW4CyyB1Fw
6,Tacos Don Memo,39.951988,-75.199123,4.5,"Food Trucks, Specialty Food, Food, Restaurants...",200,eXKblEHP3YJYU1Awz08hVw
7,Lee's Deli,39.948441,-75.217116,4.5,"Sandwiches, Burgers, Delis, Restaurants, Local...",175,KHe6HAqmyioITwTvg9Bbcg
8,White Dog Cafe,39.953558,-75.192905,4.0,"Cocktail Bars, Gluten-Free, Bars, Diners, Vege...",1301,ZKPrXH_GNW_AtZ31tP3NmA
9,Sabrina's Café,39.959879,-75.190681,4.0,"Breakfast & Brunch, Restaurants, Vegetarian, A...",833,6ewV-e7-39oqYUq3yZuIyw


In [7]:
# Fill with empty string the NaN reviews
yelp_reviews_df[['text']] = yelp_reviews_df[['text']].fillna('')
yelp_reviews_df.head()

Unnamed: 0,review_id,user_id,business_id,text,stars_business,review_count
0,kKC5pBPkUCWo6mKYFUewRw,mGnZFbk2gqLLtGW-mYo__A,ytynqOUb3hjKeJfRj5Tshw,,4.5,5721
1,sFQrhTbTah0o2kU_Pi2D0Q,Tu4ATXLhy8kRTjpQCnl2pA,ytynqOUb3hjKeJfRj5Tshw,One;of;my;favorite;places;to;go;to;in;Philly;;...,4.5,5721
2,kqn1uP3LRVjVDUD44ZSu1A,vRNb2IaGlsZRA_wUf3Ov8w,ytynqOUb3hjKeJfRj5Tshw,This might be a bit unfair to have a single re...,4.5,5721
3,qMsTe9QznpNQk1AKbYLp-w,29K-usmZfVDeIaQ85EG54A,ytynqOUb3hjKeJfRj5Tshw,Alright; I remember the first time I went to t...,4.5,5721
4,nmMIRBNONIICe7CFHnfadQ,1jE--VcTddwXGampD23JCg,ytynqOUb3hjKeJfRj5Tshw,It's;an;experience;;to;say;the;least!;;Not;as;...,4.5,5721


In [8]:
yelp_reviews_df.rename (columns={'review_id': 'Review_ID', 'user_id' :'User_Id', 
                        'business_id':'Business_Id', 'text':'Reviews', 
                'stars_business': 'Rating', 'review_count' :'Review_count'}, inplace=True)


# Begin the reviews cleaning, selecting only stars and text

In [9]:
yelp_reviews_df["Reviews"]

0                                                        
1       One;of;my;favorite;places;to;go;to;in;Philly;;...
2       This might be a bit unfair to have a single re...
3       Alright; I remember the first time I went to t...
4       It's;an;experience;;to;say;the;least!;;Not;as;...
                              ...                        
9995    I;would;give;negative;stars;if;I;could.;Chewy;...
9996                                                     
9997    So if you want a cheesesteak at PHL airport; t...
9998                                                     
9999    Have;to;say;Geno's;is;a;great;place;to;get;a;c...
Name: Reviews, Length: 10000, dtype: object

In [10]:
#Select only stars and text
reviews_df = yelp_reviews_df[['Business_Id', 'User_Id', 'Rating', 'Reviews']]
reviews_df["Reviews"] = yelp_reviews_df["Reviews"].str.replace(";", " ")
reviews_df["Reviews"]

0                                                        
1       One of my favorite places to go to in Philly  ...
2       This might be a bit unfair to have a single re...
3       Alright  I remember the first time I went to t...
4       It's an experience  to say the least!  Not as ...
                              ...                        
9995    I would give negative stars if I could. Chewy ...
9996                                                     
9997    So if you want a cheesesteak at PHL airport  t...
9998                                                     
9999    Have to say Geno's is a great place to get a c...
Name: Reviews, Length: 10000, dtype: object

In [11]:
def text_process(mess):
    """
    Takes in a string of text, then performs the following:
    1. Remove all punctuation
    2. Remove all stopwords
    3. Returns a list of the cleaned text
    """
    print (mess)
    
   # Check characters to see if they are in punctuation          
    nopunc = [char for char in mess if char not in string.punctuation]

    # Join t('stop_word(he characters again to form the string.
    nopunc = ''.join(nopunc)
    
    # Now just remove any stopwords
    return " ".join([word for word in nopunc.split() if word.lower() not in stop])

In [12]:
from nltk.corpus import stopwords
stop = []

for word in stopwords.words('english'):
    s = [char for char in word if char not in string.punctuation]
    stop.append(''.join(s))

In [13]:
reviews_df['Reviews'] = reviews_df['Reviews'].apply(text_process)


One of my favorite places to go to in Philly  and I take every one of my visiting friends here! This place has a lot of yummy stores and awesome stores ( recommended: the grilled cheese place at Meltkraft Grilled Cheese  the donuts at Beilers  4 Seasons Juice Bar  Muellar's Chocolate Co.  icecream from Bassetts  and local honey from Bee Natural). I love coming here  and even though it can get really crowded during lunch hours on the weekends  it's still worth it.
This might be a bit unfair to have a single review  as each stall has very different fare with a variety of qualities. It can definitely be considered food heaven  but on the contrary on a Saturday morning can be Philadelphia hell. The produce at Iovines is very good and fresh  and very very reasonably priced. It can be hectic but the lines go extremely quickly. The rest of the market tends to have very long lines - but the lines can be an indication of the HAVE TO eat. The Original Turkeys turkey sandwhich with cranberry and

The last time I ate here  I was with my best friend. We wanted to sit outside  but it was too crowded  so they sat us inside. We were the ONLY table inside  and we didn't receive a menu or water or chips for almost 25 minutes. When we finally got these things  we ordered drinks that also took about 20 minutes to arrive. We were pretty annoyed  and then  as we were eating our chips  waiting to order food (still)  there was a large chunk of beef in our chips! As a vegetarian  this was a real bummer for me. I had to send the chips back and get new ones. We got our food  it was pretty okay  but the service was so bad  we barely left a tip. I hate doing that  considering I've worked in the industry before  and tips were how I made a living. But I worked really hard to earn it. This is not my first time receiving terrible service here.
I never remember anything I order from here (although I never have a bad experience) but OMG the salsa verde. My man and I have been slowly tweaking our homem

I haven't had a Max's cheesesteak in years so i was looking forward to it. The steak was mediocre  not much taste. I think the problem is the salt and pepper need to be added while it's cooking. And then there was a little too much grease (is there such a thing?). I might go back in the future...maybe.
m not totally sure why I hadn't been to Eulogy in so long. Sure  it's in a spot that annoying to drive to  and weekend night here are more crazy than that of the zoo. But my love of beer and food should bring me here more often.  Eulogy has a lot in common with Monk's: Excellent Belgian bars with great tap and bottle lists. They both have great food  including mussels  and also sport multiple bars with different taps. So far  everything is great. But I'm not just going to write a comparison between the two  because it might just be boring.  I've been on a quest for my favorite burger in Philly  but after eating too many fist-thick pub burgers  I decided to scale back a bit. Still  I've h

They've absolutely improved in the last year.  They have an official website now!  The fish was super fresh  the chirashi bowl was delicious and well worth it (12-13 pieces of sashimi)  tuna princess roll was delicious and well done!  I do agree with most people that their teas are sweeter than expected so I went with 50% sugar and it was perfect for me (side note: I don't like sweets too much  so it may depend on how much sugar you like).  Definitely our new place close by for sushi!
We were so excited when \A\" brought this little diamond in the [extreme] rough to our attention. Our mouths watered all day at pictures of Instagram-Worthy drinks and beautifully plated sushi. Alas  we couldn't control ourselves and set out to Bubblefish.   Unfortunately  it was not quite the experience that we anticipated. Bubble fish is either understaffed  under-experienced  or some combo of the two which left service to suffer.  Judging by other reviews  we can assume that the service is much better 

Ting Wong is my go to place if I want something filling yet affordable. When I go here  I would usually order the duck and wonton noodle soup but I've had other items on the menu like the pork blood congee and the seafood pan fried noodle. If its your first time here  I would recommend one of the noodle soup. The broth is very light with a little hint of flavor but its recommended that you eat the noodles with a little piece of meat and broth. The saltiness from the meat will balance out the broth.

Quick and simple place to grab a bite  the service there is straight to the point. The food is always good. I usually order the rice with meat options or one of their delicious soups.

Love this place! It is my go-to date night spot.  I always get the same thing - chips and salsa trio  cauliflower tacos and the house margarita.   The vibe is lively and fun  the food is great and I've never had a bad time!

Very poor female bar tender this is why places go out of business rude and obnoxious.

I'm a big fan of this new addition to Fairmount.  It's super spacious with plenty of seating and it's very well lit with bright walls and a clean  fresh ambiance.  When its nice out  they open the big garage-door style windows for an open layout--very nice touch now that Fairmount can be hard to snag in the warmer weather months.  Their coffee is priced comparable to the other coffee shops in the 'hood... which was a little disappointing.  I was hoping this place would have great prices that would hopefully drive down the cost of a cup-o-Joe in Fairmount.  That being said  they have a big menu with a BYO option for Mimosas and Bloody Mary's.  Me likey.  They have some classic coffee-shop food choices  with some new funky twists (tomato  basil and goat-cheese cornbread?).  Again  I was kind of hoping this place would serve up some more inexpensive options... but I'll take what I can get.
Coffee is good - apple cider is completely amazing. I'll give it four stars bc the beverages are goo

This place is great! We happened upon it accidentally on a Saturday morning but will definitely be back. Lots of outdoor seating  nice brunch menu  excellent Bloody Mary bar !! Friendly staff too. We had hummus and a bomb diggity kale salad. I'm a vegetarian so only a few options for brunch but they seem very accommodating.  We will be back!
We stopped here for post-dinner drinks after eating at Bridgid's. I can't speak to the food (since we didn't order any)  but the bar was pretty nice. We got there at around 10pm on a Saturday night and it wasn't that busy. We got a table near the bar and had plenty of room.  The beer selection was good (a requisite for me). I got a Brooklyn Pilsner and enjoyed it. The service was OK - a little slow  but the waitress was pleasant.   The only thing I wasn't crazy about was that there was a table with middle-aged people making out right next to our table - I mean really making out  not just like a kiss here and there. I found this to be somewhat egreg

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [14]:
#Split train test for testing the model later
vld_size=0.15
X_train, X_valid, y_train, y_valid = train_test_split(reviews_df['Reviews'], yelp_reviews_df['Business_Id'], test_size = vld_size) 


# Create two tables of user, text and bussiness

In [15]:
reviews_df['Business_Id'].count()

10000

In [16]:
newreviews_df = reviews_df.merge(restaurants_df, left_on="Business_Id", right_on="Business_Id", how="left")
newreviews_df.count()


Business_Id       10000
User_Id            9998
Rating            10000
Reviews           10000
name                500
latitude            500
longitude           500
stars_business      500
categories          500
review_count        500
dtype: int64

In [17]:
newreviews_df = reviews_df[reviews_df['Business_Id'].isin(restaurants_df["Business_Id"])]
newreviews_df['Business_Id'].count()

500

In [18]:
newreviews_df.head(25)

Unnamed: 0,Business_Id,User_Id,Rating,Reviews
390,ZKPrXH_GNW_AtZ31tP3NmA,cARxOd_5yKCgsCbUZ5ED4Q,4.0,
391,ZKPrXH_GNW_AtZ31tP3NmA,uIHI_BzJcNMxFAZA5bYeXg,4.0,Recently visited dinner daughter visiting Univ...
392,ZKPrXH_GNW_AtZ31tP3NmA,s9FrHlk9kPnfSAdAgKNtWA,4.0,love place 2 reasons 1 husband proposed 2 amaz...
393,ZKPrXH_GNW_AtZ31tP3NmA,eFXy9H9H2tBfnyNahNUUEQ,4.0,Gotta love White Dog food service reliably goo...
394,ZKPrXH_GNW_AtZ31tP3NmA,ETyduRMWNWg7cjoZaRxmaQ,4.0,got chicken sandwich broccoli rabe side salad ...
395,ZKPrXH_GNW_AtZ31tP3NmA,zCDU0cm2K_R7HozWqTgm0w,4.0,Mold hot dog ordered mold bun Im paying 750 ho...
396,ZKPrXH_GNW_AtZ31tP3NmA,UZ6YOaTiZ1lTZWKASIkVuA,4.0,
397,ZKPrXH_GNW_AtZ31tP3NmA,d2SmmiFbMXUxT5-cTzu0OQ,4.0,happened White Dog Cafe First time friend Dans...
398,ZKPrXH_GNW_AtZ31tP3NmA,nBdv5Br4G8pDWXXqSXhUug,4.0,Hello Last night wonderful meal tuna tartar du...
399,ZKPrXH_GNW_AtZ31tP3NmA,T9aisv9FFuEbO9hSgi-efw,4.0,wonderful little spot University City offers c...


In [19]:
userid_df = newreviews_df[['User_Id','Reviews']]
business_df = newreviews_df[['Business_Id', 'Reviews']]

In [20]:
userid_df.head()

Unnamed: 0,User_Id,Reviews
390,cARxOd_5yKCgsCbUZ5ED4Q,
391,uIHI_BzJcNMxFAZA5bYeXg,Recently visited dinner daughter visiting Univ...
392,s9FrHlk9kPnfSAdAgKNtWA,love place 2 reasons 1 husband proposed 2 amaz...
393,eFXy9H9H2tBfnyNahNUUEQ,Gotta love White Dog food service reliably goo...
394,ETyduRMWNWg7cjoZaRxmaQ,got chicken sandwich broccoli rabe side salad ...


In [21]:
business_df.head()

Unnamed: 0,Business_Id,Reviews
390,ZKPrXH_GNW_AtZ31tP3NmA,
391,ZKPrXH_GNW_AtZ31tP3NmA,Recently visited dinner daughter visiting Univ...
392,ZKPrXH_GNW_AtZ31tP3NmA,love place 2 reasons 1 husband proposed 2 amaz...
393,ZKPrXH_GNW_AtZ31tP3NmA,Gotta love White Dog food service reliably goo...
394,ZKPrXH_GNW_AtZ31tP3NmA,got chicken sandwich broccoli rabe side salad ...


In [22]:
userid_df = userid_df.groupby('User_Id').agg({'Reviews': ' '.join})
business_df = business_df.groupby('Business_Id').agg({'Reviews': ' '.join})

In [23]:
userid_df.head()

Unnamed: 0_level_0,Reviews
User_Id,Unnamed: 1_level_1
-5QrmUZTvniwryx3l3JM8g,favorite trendy ramen spot comfortable atmosph...
02EPgkmrh39Dr7uXc1bqTQ,Food excellent cheap quality quantity waitress...
03hJqyQVBUwTdDkIFB-bZg,
0DB3Irpf_ETVXu_Ou9vPow,used live right next Desi Chaat House pretty m...
0RiI15CtWkpgnEtBSaqKlA,


# User Tfidf Vectorizer with 5000 Features (represent 88% of all words)

In [24]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [25]:
#userid vectorizer
userid_vectorizer = TfidfVectorizer(tokenizer = WordPunctTokenizer().tokenize, max_features=5000)
userid_vectors = userid_vectorizer.fit_transform(userid_df['Reviews'])
userid_vectors.shape

(465, 4786)

In [26]:
userid_vectors

<465x4786 sparse matrix of type '<class 'numpy.float64'>'
	with 19738 stored elements in Compressed Sparse Row format>

In [27]:
#Business id vectorizer
businessid_vectorizer = TfidfVectorizer(tokenizer = WordPunctTokenizer().tokenize, max_features=5000)
businessid_vectors = businessid_vectorizer.fit_transform(business_df['Reviews'])
businessid_vectors.shape

(50, 4786)

# Matrix Factorization

In [28]:
userid_rating_matrix = pd.pivot_table(reviews_df, values='Rating', index=['User_Id'], columns=['Business_Id'])
userid_rating_matrix.shape

(7881, 1000)

In [29]:
userid_rating_matrix.head()

Business_Id,-0TffRSXXIlBYVbb5AwfTg,-1B9pP_CrRBJYPICE5WbRA,-5Rah4ZvWsDu4oilUZxhtw,0-7N3z_cb3Z1xMGDrbez6w,0-JRAY0LpBazDuA9761U5w,03jQGGJ2ch0uHTtW-UUUqg,04UD14gamNjLY0IDYVhHJg,05ev984NYfimRN0UiFrxaA,0IEn6AxLNFfdfuwd2bQlbQ,0K4RwxdAcViifyU3Htzxww,...,zRjUMHQJ5gAmFhcXZtLacA,zU5PPd9j4bYC_MepNSEDcA,zXgksmhdzgL4Xx5FylbP3w,zfn7V7FVH5_J5A9dInfbnA,ziyXteCmNmRprs10buP2iQ,zjTBfbvbN2Ps6_Ar0w-fuQ,zpKTPWoW56wF6d9qNnxM3Q,zrQAj03aHI7kpmAiyKcKhA,zujdPV3HT-Y-CKE1GgkMHQ,zwd4dyQ5ovnjVojWfAuhMw
User_Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
-0ouXH70JU4xkVw-A4Gs8w,,,,,,,,,,,...,,,,,,,,,,
-2Ro3V4zgRZa92tPI5ul7Q,,,,,,,,,,,...,,,,,,,,,,
-31pS3jJqUjUrcqG2s-aNg,,,,,,,,,,,...,,,,,,,,,,
-35jJ1OiIvEzZSYSJ4bHcw,,,,,,,,,,,...,,,,,,,,,,
-5QrmUZTvniwryx3l3JM8g,,,,,,,,,,,...,,,,,,,,,,


In [32]:
P = pd.DataFrame(userid_vectors.toarray(), index=userid_df.index, columns=userid_vectorizer.get_feature_names())
Q = pd.DataFrame(businessid_vectors.toarray(), index=business_df.index, columns=businessid_vectorizer.get_feature_names())


In [31]:
Q.head()

Unnamed: 0_level_0,0,030503062010,04,05,08,09,1,10,100,1012,...,étaient,、,。,不喜欢的人会很不喜欢,可以提前跟店家说一下不要加,唯一的缺憾是冰粉上面会加橙皮以及其他一些类似五仁的材料,红油,食物非常正宗,麻酱和红糖用得都很正,，
Business_Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
-5Rah4ZvWsDu4oilUZxhtw,0.0,0.0,0.0,0.037209,0.0,0.0,0.0,0.02148,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14ZGwnDyydXdSBsLXpSUrA,0.0,0.0,0.058513,0.0,0.058513,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3gVSrS4kffGGZT8oXHsIcw,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
41LEg4ZgFDVnZfToxGXiGA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5ICrDkwtX4ykKOLVJ1tFJA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [34]:
Q.head()

Unnamed: 0_level_0,0,030503062010,04,05,08,09,1,10,100,1012,...,étaient,、,。,不喜欢的人会很不喜欢,可以提前跟店家说一下不要加,唯一的缺憾是冰粉上面会加橙皮以及其他一些类似五仁的材料,红油,食物非常正宗,麻酱和红糖用得都很正,，
Business_Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
-5Rah4ZvWsDu4oilUZxhtw,0.0,0.0,0.0,0.037209,0.0,0.0,0.0,0.02148,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14ZGwnDyydXdSBsLXpSUrA,0.0,0.0,0.058513,0.0,0.058513,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3gVSrS4kffGGZT8oXHsIcw,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
41LEg4ZgFDVnZfToxGXiGA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5ICrDkwtX4ykKOLVJ1tFJA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [35]:
def matrix_factorization(R, P, Q, steps=25, gamma=0.001,lamda=0.02):
    for step in range(steps):
        for i in R.index:
            for j in R.columns:
                if R.loc[i,j]>0:
                    eij=R.loc[i,j]-np.dot(P.loc[i],Q.loc[j])
                    P.loc[i]=P.loc[i]+gamma*(eij*Q.loc[j]-lamda*P.loc[i])
                    Q.loc[j]=Q.loc[j]+gamma*(eij*P.loc[i]-lamda*Q.loc[j])
        e=0
        for i in R.index:
            for j in R.columns:
                if R.loc[i,j]>0:
                    e= e + pow(R.loc[i,j]-np.dot(P.loc[i],Q.loc[j]),2)+lamda*(pow(np.linalg.norm(P.loc[i]),2)+pow(np.linalg.norm(Q.loc[j]),2))
        if e<0.001:
            break
        
    return P,Q

In [36]:
%%time
P, Q = matrix_factorization(userid_rating_matrix, P, Q, steps=25, gamma=0.001,lamda=0.02)

KeyError: '-0ouXH70JU4xkVw-A4Gs8w'

In [37]:
Q.head()

Unnamed: 0_level_0,0,030503062010,04,05,08,09,1,10,100,1012,...,étaient,、,。,不喜欢的人会很不喜欢,可以提前跟店家说一下不要加,唯一的缺憾是冰粉上面会加橙皮以及其他一些类似五仁的材料,红油,食物非常正宗,麻酱和红糖用得都很正,，
Business_Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
-5Rah4ZvWsDu4oilUZxhtw,0.0,0.0,0.0,0.037209,0.0,0.0,0.0,0.02148,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14ZGwnDyydXdSBsLXpSUrA,0.0,0.0,0.058513,0.0,0.058513,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3gVSrS4kffGGZT8oXHsIcw,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
41LEg4ZgFDVnZfToxGXiGA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5ICrDkwtX4ykKOLVJ1tFJA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [38]:
# Store P, Q and vectorizer in pickle file
import pickle
output = open('yelp_recommendation_model_8.pkl', 'wb')
pickle.dump(P,output)
pickle.dump(Q,output)
pickle.dump(userid_vectorizer,output)
output.close()

In [39]:
#Prediction for input text

In [40]:
words = "burger fries"
test_df= pd.DataFrame([words], columns=['Reviews'])
test_df['Reviews'] = test_df['Reviews'].apply(text_process)
test_vectors = userid_vectorizer.transform(test_df['Reviews'])
test_v_df = pd.DataFrame(test_vectors.toarray(), index=test_df.index, columns=userid_vectorizer.get_feature_names())

predictItemRating=pd.DataFrame(np.dot(test_v_df.loc[0],Q.T),index=Q.index,columns=['Rating'])
topRecommendations=pd.DataFrame.sort_values(predictItemRating,['Rating'],ascending=[0])[:7]

for i in topRecommendations.index:
    print(restaurants_df[restaurants_df['Business_Id']==i]['name'].iloc[0])
    print(restaurants_df[restaurants_df['Business_Id']==i]['categories'].iloc[0])
    print(str(restaurants_df[restaurants_df['Business_Id']==i]['stars_business'].iloc[0])+ ' ' +str(restaurants_df[restaurants_df['Business_Id']==i]['review_count'].iloc[0]))
    print('')



burger fries
Bobby's Burger Palace
Sandwiches, American (Traditional), Burgers, Restaurants, Salad
3.5 788

HipCityVeg - University City
Sandwiches, Food, Burgers, Restaurants, Vegan, Desserts
4.0 314

Lee's Deli
Sandwiches, Burgers, Delis, Restaurants, Local Flavor
4.5 175

New Deck Tavern
Irish, Gastropubs, Pubs, American (Traditional), Nightlife, Bars, Restaurants
3.5 419

Sabrina's Café
Breakfast & Brunch, Restaurants, Vegetarian, American (New), American (Traditional), Comfort Food, Cafes
4.0 833

Hummus Grill
Specialty Food, Food, Sandwiches, Middle Eastern, Vegetarian, Ethnic Food, Restaurants, Mediterranean, Greek, Salad
4.0 372

Renata's Kitchen
Breakfast & Brunch, Caterers, Tapas/Small Plates, Event Planning & Services, Middle Eastern, Mediterranean, Restaurants, Greek
4.0 203

