In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
import string
import nltk
nltk.download ('stopwords')
from nltk.corpus import stopwords 
from nltk.tokenize import WordPunctTokenizer

from config import MAPBOX_API_KEY
import plotly
import plotly.offline as py
import plotly.graph_objs as go
import plotly.express as px

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/MariloyHJimenez/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [7]:
import warnings
warnings.filterwarnings('ignore')

# Importing Clean Data

In [8]:
file_path = "Resources/Final_philadelphia_reviews.csv"

phillies_df = pd.read_csv(file_path)
phillies_df["business_id"].count()

10000

In [9]:
yelp_reviews_df = phillies_df[['review_id', 'user_id', 'business_id', 'text', 
                               'stars_business', 'review_count']]

In [10]:
restaurants_df = pd.read_csv("Resources/Philly_NearRestaurants.csv")
#Check Null values in restaurantes
restaurants_df.rename (columns={'ID':'Business_Id'}, inplace=True)
restaurants_df['Business_Id'].count()

50

In [11]:
restaurants_df.head(10)

Unnamed: 0,name,latitude,longitude,stars_business,categories,review_count,Business_Id
0,Saad's Halal Restaurant,39.954963,-75.211851,4.5,"Halal, Middle Eastern, Restaurants, Food, Indi...",702,6_LnAQQ0-mml8YgpfRjGuA
1,Mood Cafe,39.948664,-75.215832,4.5,"Cafes, Pakistani, Juice Bars & Smoothies, Rest...",458,U30ggGzFpXvc2NZYwOW3qg
2,48th Street Grille,39.953137,-75.218661,4.5,"Internet Cafes, Restaurants, Food, Seafood, Am...",275,KYPhGJIibu_7ePIX4HNs6A
3,Cucina Zapata,39.954289,-75.185538,4.5,"Thai, Food Stands, Restaurants, Mexican",248,B-DiQpcSTJ7oMMnwzbAGTQ
4,Fu-Wah Mini Market,39.948292,-75.217014,4.5,"Grocery, Food, Convenience Stores, Restaurants...",247,Biidj3QszVuVO0Q85g5NPw
5,Dottie's Donuts,39.948345,-75.213656,4.5,"Restaurants, Food, Vegan, Cafes, Donuts",206,A3Qt87F7ZaAwCW4CyyB1Fw
6,Tacos Don Memo,39.951988,-75.199123,4.5,"Food Trucks, Specialty Food, Food, Restaurants...",200,eXKblEHP3YJYU1Awz08hVw
7,Lee's Deli,39.948441,-75.217116,4.5,"Sandwiches, Burgers, Delis, Restaurants, Local...",175,KHe6HAqmyioITwTvg9Bbcg
8,White Dog Cafe,39.953558,-75.192905,4.0,"Cocktail Bars, Gluten-Free, Bars, Diners, Vege...",1301,ZKPrXH_GNW_AtZ31tP3NmA
9,Sabrina's Café,39.959879,-75.190681,4.0,"Breakfast & Brunch, Restaurants, Vegetarian, A...",833,6ewV-e7-39oqYUq3yZuIyw


In [12]:
# Fill with empty string the NaN reviews
yelp_reviews_df.dropna(inplace=True)
yelp_reviews_df[['text']] = yelp_reviews_df[['text']].fillna('')
yelp_reviews_df.head()

Unnamed: 0,review_id,user_id,business_id,text,stars_business,review_count
1,sFQrhTbTah0o2kU_Pi2D0Q,Tu4ATXLhy8kRTjpQCnl2pA,ytynqOUb3hjKeJfRj5Tshw,One;of;my;favorite;places;to;go;to;in;Philly;;...,4.5,5721
2,kqn1uP3LRVjVDUD44ZSu1A,vRNb2IaGlsZRA_wUf3Ov8w,ytynqOUb3hjKeJfRj5Tshw,This might be a bit unfair to have a single re...,4.5,5721
3,qMsTe9QznpNQk1AKbYLp-w,29K-usmZfVDeIaQ85EG54A,ytynqOUb3hjKeJfRj5Tshw,Alright; I remember the first time I went to t...,4.5,5721
4,nmMIRBNONIICe7CFHnfadQ,1jE--VcTddwXGampD23JCg,ytynqOUb3hjKeJfRj5Tshw,It's;an;experience;;to;say;the;least!;;Not;as;...,4.5,5721
6,dXVhYlcX9X9kqVI16AoBHg,lavyYLh68LxIBhJdIE5f_g,ytynqOUb3hjKeJfRj5Tshw,fresh;seafood;and;produce.;fresh;breads;;chees...,4.5,5721


In [13]:
yelp_reviews_df.count()

review_id         8303
user_id           8303
business_id       8303
text              8303
stars_business    8303
review_count      8303
dtype: int64

In [14]:
yelp_reviews_df.rename (columns={'review_id': 'Review_ID', 'user_id' :'User_Id', 
                        'business_id':'Business_Id', 'text':'Reviews', 
                'stars_business': 'Rating', 'review_count' :'Review_count'}, inplace=True)


# Begin the reviews cleaning, selecting only stars and text

In [15]:
yelp_reviews_df["Reviews"]

1       One;of;my;favorite;places;to;go;to;in;Philly;;...
2       This might be a bit unfair to have a single re...
3       Alright; I remember the first time I went to t...
4       It's;an;experience;;to;say;the;least!;;Not;as;...
6       fresh;seafood;and;produce.;fresh;breads;;chees...
                              ...                        
9993    Let down. I was in a hurry to catch my next fl...
9994    I;dont;give;one;star;reviews;but;they;earned;i...
9995    I;would;give;negative;stars;if;I;could.;Chewy;...
9997    So if you want a cheesesteak at PHL airport; t...
9999    Have;to;say;Geno's;is;a;great;place;to;get;a;c...
Name: Reviews, Length: 8303, dtype: object

In [16]:
#Select only stars and text
reviews_df = yelp_reviews_df[['Business_Id', 'User_Id', 'Rating', 'Reviews']]
reviews_df["Reviews"] = yelp_reviews_df["Reviews"].str.replace(";", " ")
reviews_df["Reviews"]

1       One of my favorite places to go to in Philly  ...
2       This might be a bit unfair to have a single re...
3       Alright  I remember the first time I went to t...
4       It's an experience  to say the least!  Not as ...
6       fresh seafood and produce. fresh breads  chees...
                              ...                        
9993    Let down. I was in a hurry to catch my next fl...
9994    I dont give one star reviews but they earned i...
9995    I would give negative stars if I could. Chewy ...
9997    So if you want a cheesesteak at PHL airport  t...
9999    Have to say Geno's is a great place to get a c...
Name: Reviews, Length: 8303, dtype: object

In [17]:
def text_process(mess):
    """
    Takes in a string of text, then performs the following:
    1. Remove all punctuation
    2. Remove all stopwords
    3. Returns a list of the cleaned text
    """
    print (mess)
    
   # Check characters to see if they are in punctuation          
    nopunc = [char for char in mess if char not in string.punctuation]

    # Join t('stop_word(he characters again to form the string.
    nopunc = ''.join(nopunc)
    
    # Now just remove any stopwords
    return " ".join([word for word in nopunc.split() if word.lower() not in stop])

In [18]:
from nltk.corpus import stopwords
stop = []

for word in stopwords.words('english'):
    s = [char for char in word if char not in string.punctuation]
    stop.append(''.join(s))

In [19]:
reviews_df['Reviews'] = reviews_df['Reviews'].apply(text_process)

One of my favorite places to go to in Philly  and I take every one of my visiting friends here! This place has a lot of yummy stores and awesome stores ( recommended: the grilled cheese place at Meltkraft Grilled Cheese  the donuts at Beilers  4 Seasons Juice Bar  Muellar's Chocolate Co.  icecream from Bassetts  and local honey from Bee Natural). I love coming here  and even though it can get really crowded during lunch hours on the weekends  it's still worth it.
This might be a bit unfair to have a single review  as each stall has very different fare with a variety of qualities. It can definitely be considered food heaven  but on the contrary on a Saturday morning can be Philadelphia hell. The produce at Iovines is very good and fresh  and very very reasonably priced. It can be hectic but the lines go extremely quickly. The rest of the market tends to have very long lines - but the lines can be an indication of the HAVE TO eat. The Original Turkeys turkey sandwhich with cranberry and 

Ordered a house salad and mussels: Salad consisted of iceberg lettuce  4 cherry tomatoes and French fries on top.  Very bland and u satisfying at   The mussels were ok -- served in thick  Parmesan cheese covered marinara sauce so no broth for dipping. Cold  stale bread was served on the side but was essentially inedible.  Ordered the dish because it was highlighted I the menu \award winning\"....however it was mediocre at best.  Truthfully  the highlight of the meal was the wine selection and the fries...  Service was fast and cordial...  Would only return for a drink ... Maybe try a burger?"
Really  they deserve a 1/2 star.  Ordered the roast pork sandwich and crab fries.  The roast pork sandwich is supposed to be hot  but it was cold.  Further  the bun was not cut all the way through so I essentially had to rip it (plastic cutlery is not particularly effective.)  I told my server the sandwich was not hot.  She returns it with attitude and clearly the entire basket was thrown in the m

AMAZING!  My boyfriend made surprise reservations here for our anniversary.  Apparently reservations were a little tricky a week ahead of time and after experiencing dinner tonight I completely understand why!  We started with Frog Legs and a Salad - both fantastic (my boyfriend refuses to eat salad but actually liked this salad!).  For our main course I had the Faux Filet Sauce au Poive (black pepper-crusted New York strip with pomme puree and bacon-wrapped haricot verts) which was not only the best steak either one of us has ever tasted  but honestly the best meal I've ever had!  My boyfriend had the duck  which was new tonight so I cant give a better description because it's not on their menu yet  but it was as delicious as the steak.  We kept trading bites because neither of use was willing to give up a bite without an equal trade - haha :P  We're grad students so we can't afford fancy meals that often but this was definitely worth it!  We'll certainly be back the next time we have

This place is great! Came here first time with an Amazon local deal  and loved it. Thanks to other Yelp reviews I found out it is a BYOB but you want sake they do have a lot of options for that too. I'm sure they have other drinks  but I'm not asking questions why they allow patrons to bring their own alcohol!  Had the sashimi platter the first time and the tuna pizza. (good!) Bought two more deals to come back in the future. There always seems to be something on Groupon/Living Social for Zento deals- look for them!  Also thanks to the website I found out the restaurant validates parking (completely free!) for the weekends (Starting Thursday) in the garage on 2nd in the block north of Chestnut St.
In a sentence  don't waste your time.  We were advised of Steve's by the bellman at our hotel and were happy to walk just a few blocks to get there. That happiness was short lived.   The attitude of the gal taking our order was neither inviting or even cordiale. Bordering on rude. It summed u

Food: 3.5 Decor: 4.5 Ambiance: 4.5 Service: 5 Value: 4  This place is special. While you are in an historic Italian restaurant in South Philly  it isn't necessarily the food that you are here for. It is the experience of the place. That being said  the food is certainly serviceable. This place would be even higher in terms of experience if the 2 person booths weren't so unbelievably small and uncomfortable.
We walked in and the smile became wider and wider and folks I must tell you it made me have nostalgia as if I had seen this place before like my last life more than 90 years ago.    I don't have to talk about food which is a 5 5. The Burratta is as fresh as one could get direct from the farm. The pesto with bread is breathtaking it was simply delicious. We closed our eyes and savoured every lil bit of morsel.  We had Tortellini the Ravioli and Prince Egor with caviar.  All is all a 6 5 with the service the opera performance.    WE LOVE THIS PLACE
A hidden gem in South Philly! Stoppe

I chose Termini Bros Bakery for my first dessert experience at Reading Terminal Market  and I'm so glad that I did. Surprisingly  it was one of the few bakeries still open during the time I went (around 5:30 PM).  This bakery has a wide spread of cakes  mini cakes  cheesecakes  and more. All the cakes were beautiful  and my friend ordered a Tiramisu cake and had some birthday customization done  the final product looked great. We sampled some pound cake  which was absolutely delicious and moist and soft. After sampling  I knew I had to pick up something from the bakery.  I wanted something small but something sweet  so I decided to choose between the cheesecake slices. At $4 a slice  they had the following flavors: pumpkin  marble and plain (New York). I ended up going for the marble (I'm a chocolate lover at heart  even though I did momentarily pause to consider the seasonal pumpkin flavor).   The cheesecake was so  so good. Perfect texture and consistency  just cheese-like enough to 

This location does not live up to its standards set by Maryland locations servings are nowhere near ass big and the food is not as fresh. It also cost a tad bit more if you are from D.C.  Maryland Va area and know about Sardis don't expect it to be exactly the same. I wouldn't eat here if I hadn't been to the other locations which are all great. Sorry Philadelphia you don't get the real Sardis experience!
Having a great experience for dinner involves many things: Food  Service  Company  Ambiance etc...  Looking at the Branzino Yelp's ratings it is all over the place from five to one stars. After my dinner last week  I understand why. The main gripe that people have is about SERVICE. However service comes from the top (management) and I believe this is where the problem comes from for Branzino. Every time I went I was never greeted in or out by a member of the management.  Last week  our food was good to very good: The Branzino and the pork were 5 stars  the appetizers 3 and the dessert

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [20]:
#Split train test for testing the model later
vld_size=0.15
X_train, X_valid, y_train, y_valid = train_test_split(reviews_df['Reviews'], yelp_reviews_df['Business_Id'], test_size = vld_size) 


# Create two tables of user, text and bussiness

In [21]:
reviews_df['Business_Id'].count()

8303

In [22]:
newreviews_df = reviews_df[reviews_df['Business_Id'].isin(restaurants_df["Business_Id"])]
newreviews_df['Business_Id'].count()

415

In [23]:
newreviews_df.head(25)

Unnamed: 0,Business_Id,User_Id,Rating,Reviews
391,ZKPrXH_GNW_AtZ31tP3NmA,uIHI_BzJcNMxFAZA5bYeXg,4.0,Recently visited dinner daughter visiting Univ...
392,ZKPrXH_GNW_AtZ31tP3NmA,s9FrHlk9kPnfSAdAgKNtWA,4.0,love place 2 reasons 1 husband proposed 2 amaz...
393,ZKPrXH_GNW_AtZ31tP3NmA,eFXy9H9H2tBfnyNahNUUEQ,4.0,Gotta love White Dog food service reliably goo...
394,ZKPrXH_GNW_AtZ31tP3NmA,ETyduRMWNWg7cjoZaRxmaQ,4.0,got chicken sandwich broccoli rabe side salad ...
395,ZKPrXH_GNW_AtZ31tP3NmA,zCDU0cm2K_R7HozWqTgm0w,4.0,Mold hot dog ordered mold bun Im paying 750 ho...
397,ZKPrXH_GNW_AtZ31tP3NmA,d2SmmiFbMXUxT5-cTzu0OQ,4.0,happened White Dog Cafe First time friend Dans...
398,ZKPrXH_GNW_AtZ31tP3NmA,nBdv5Br4G8pDWXXqSXhUug,4.0,Hello Last night wonderful meal tuna tartar du...
399,ZKPrXH_GNW_AtZ31tP3NmA,T9aisv9FFuEbO9hSgi-efw,4.0,wonderful little spot University City offers c...
611,kVTHnhgYUw-Pmr7wgDB4_g,uIMcOM39MNJfkEFVDWu2-A,3.5,Seating horrible came without reservation half...
612,kVTHnhgYUw-Pmr7wgDB4_g,YnbiDyzD3NpBTxeMQdtzDA,3.5,heard great things Distrito dying go Overall g...


In [24]:
userid_df = newreviews_df[['User_Id','Reviews']]
business_df = newreviews_df[['Business_Id', 'Reviews']]

In [25]:
userid_df.head()

Unnamed: 0,User_Id,Reviews
391,uIHI_BzJcNMxFAZA5bYeXg,Recently visited dinner daughter visiting Univ...
392,s9FrHlk9kPnfSAdAgKNtWA,love place 2 reasons 1 husband proposed 2 amaz...
393,eFXy9H9H2tBfnyNahNUUEQ,Gotta love White Dog food service reliably goo...
394,ETyduRMWNWg7cjoZaRxmaQ,got chicken sandwich broccoli rabe side salad ...
395,zCDU0cm2K_R7HozWqTgm0w,Mold hot dog ordered mold bun Im paying 750 ho...


In [26]:
business_df.head()

Unnamed: 0,Business_Id,Reviews
391,ZKPrXH_GNW_AtZ31tP3NmA,Recently visited dinner daughter visiting Univ...
392,ZKPrXH_GNW_AtZ31tP3NmA,love place 2 reasons 1 husband proposed 2 amaz...
393,ZKPrXH_GNW_AtZ31tP3NmA,Gotta love White Dog food service reliably goo...
394,ZKPrXH_GNW_AtZ31tP3NmA,got chicken sandwich broccoli rabe side salad ...
395,ZKPrXH_GNW_AtZ31tP3NmA,Mold hot dog ordered mold bun Im paying 750 ho...


In [27]:
userid_df = userid_df.groupby('User_Id').agg({'Reviews': ' '.join})
business_df = business_df.groupby('Business_Id').agg({'Reviews': ' '.join})

In [28]:
userid_df.head()

Unnamed: 0_level_0,Reviews
User_Id,Unnamed: 1_level_1
-5QrmUZTvniwryx3l3JM8g,favorite trendy ramen spot comfortable atmosph...
02EPgkmrh39Dr7uXc1bqTQ,Food excellent cheap quality quantity waitress...
0DB3Irpf_ETVXu_Ou9vPow,used live right next Desi Chaat House pretty m...
0YmEeQEzqwMwuWKvf3PTNA,got Philly STARVING Yelp place eat Tacos Memo ...
0dcknuP3VLaO_Q6ugNmLQA,eat coin aint cheap business better believe fa...


# User Tfidf Vectorizer with 5000 Features (represent 88% of all words)

In [29]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [30]:
#userid vectorizer
userid_vectorizer = TfidfVectorizer(tokenizer = WordPunctTokenizer().tokenize, max_features=5000)
userid_vectors = userid_vectorizer.fit_transform(userid_df['Reviews'])
userid_vectors.shape

(387, 4786)

In [31]:
userid_vectors

<387x4786 sparse matrix of type '<class 'numpy.float64'>'
	with 19738 stored elements in Compressed Sparse Row format>

In [32]:
#Business id vectorizer
businessid_vectorizer = TfidfVectorizer(tokenizer = WordPunctTokenizer().tokenize, max_features=5000)
businessid_vectors = businessid_vectorizer.fit_transform(business_df['Reviews'])
businessid_vectors.shape

(50, 4786)

# Matrix Factorization

In [33]:
userid_rating_matrix = pd.pivot_table(newreviews_df, values='Rating', index=['User_Id'], columns=['Business_Id'])
userid_rating_matrix.shape

(387, 50)

In [34]:
userid_rating_matrix.head()

Business_Id,-5Rah4ZvWsDu4oilUZxhtw,14ZGwnDyydXdSBsLXpSUrA,3gVSrS4kffGGZT8oXHsIcw,41LEg4ZgFDVnZfToxGXiGA,5ICrDkwtX4ykKOLVJ1tFJA,6GXMHrB8u-3nq87zE1Av0w,6_LnAQQ0-mml8YgpfRjGuA,6ewV-e7-39oqYUq3yZuIyw,8yGZhBwFFLtOVLY9Do8ohQ,99e7bysta1myyrQogFEWUQ,...,lS42krxXMm-HIk7dntRsKQ,pgri9CUZXGy9hi6UMMQR6A,qtyNbCXut-RQnnEQNJ9UzA,s2JiYzE7i68cXIV6YEcVTw,uB12vX5p9T8z0g2Eq3bBBQ,uE40984_YDgVvPeRpFcCaQ,uvj_tGQrGDSVXjNU4pHjCA,yG5tkMER9Ftkv4RDwMaQuA,zU5PPd9j4bYC_MepNSEDcA,ziyXteCmNmRprs10buP2iQ
User_Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
-5QrmUZTvniwryx3l3JM8g,,,,,,,,,,,...,,,,,,,,,,
02EPgkmrh39Dr7uXc1bqTQ,,,,,,,,,,,...,,,,,,,,,,4.0
0DB3Irpf_ETVXu_Ou9vPow,,,,,,,,,,,...,,,,,,,,,,
0YmEeQEzqwMwuWKvf3PTNA,,,,,,,,,,,...,,,,,,,,,,
0dcknuP3VLaO_Q6ugNmLQA,,,,,,,,,,,...,,,,,,,,,,


In [35]:
P = pd.DataFrame(userid_vectors.toarray(), index=userid_df.index, columns=userid_vectorizer.get_feature_names())
Q = pd.DataFrame(businessid_vectors.toarray(), index=business_df.index, columns=businessid_vectorizer.get_feature_names())


In [36]:
Q.head()

Unnamed: 0_level_0,0,030503062010,04,05,08,09,1,10,100,1012,...,étaient,、,。,不喜欢的人会很不喜欢,可以提前跟店家说一下不要加,唯一的缺憾是冰粉上面会加橙皮以及其他一些类似五仁的材料,红油,食物非常正宗,麻酱和红糖用得都很正,，
Business_Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
-5Rah4ZvWsDu4oilUZxhtw,0.0,0.0,0.0,0.037209,0.0,0.0,0.0,0.02148,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14ZGwnDyydXdSBsLXpSUrA,0.0,0.0,0.058513,0.0,0.058513,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3gVSrS4kffGGZT8oXHsIcw,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
41LEg4ZgFDVnZfToxGXiGA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5ICrDkwtX4ykKOLVJ1tFJA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Gradient Decent Optimization

In [37]:
def matrix_factorization(R, P, Q, steps=25, gamma=0.001,lamda=0.02):
    for step in range(steps):
        for i in R.index:
            for j in R.columns:
                if R.loc[i,j]>0:
                    eij=R.loc[i,j]-np.dot(P.loc[i],Q.loc[j])
                    P.loc[i]=P.loc[i]+gamma*(eij*Q.loc[j]-lamda*P.loc[i])
                    Q.loc[j]=Q.loc[j]+gamma*(eij*P.loc[i]-lamda*Q.loc[j])
        e=0
        for i in R.index:
            for j in R.columns:
                if R.loc[i,j]>0:
                    e= e + pow(R.loc[i,j]-np.dot(P.loc[i],Q.loc[j]),2)+lamda*(pow(np.linalg.norm(P.loc[i]),2)+pow(np.linalg.norm(Q.loc[j]),2))
        if e<0.001:
            break
        
    return P,Q

In [38]:
%%time
P, Q = matrix_factorization(userid_rating_matrix, P, Q, steps=25, gamma=0.001,lamda=0.02)

CPU times: user 1min 14s, sys: 8.11 s, total: 1min 23s
Wall time: 31.4 s


In [39]:
Q.head()

Unnamed: 0_level_0,0,030503062010,04,05,08,09,1,10,100,1012,...,étaient,、,。,不喜欢的人会很不喜欢,可以提前跟店家说一下不要加,唯一的缺憾是冰粉上面会加橙皮以及其他一些类似五仁的材料,红油,食物非常正宗,麻酱和红糖用得都很正,，
Business_Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
-5Rah4ZvWsDu4oilUZxhtw,0.0,0.0,0.0,0.042769,0.0,0.0,0.0,0.03229334,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14ZGwnDyydXdSBsLXpSUrA,9.174749e-12,5.411205e-08,0.09292148,0.0,0.09292148,0.0,8.811944e-12,0.006898484,1.308731e-15,1.359473e-11,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3gVSrS4kffGGZT8oXHsIcw,6.377822e-08,4.802993e-08,2.348853e-11,0.0,2.348853e-11,0.0,6.203674e-08,9.758189e-08,1.688802e-11,9.468977e-08,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
41LEg4ZgFDVnZfToxGXiGA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5ICrDkwtX4ykKOLVJ1tFJA,0.0,0.0,0.0,0.0,0.0,0.0,0.0003342382,0.005173488,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [40]:
# Here we can view the principal words that are mentioned about the restaurant.
Q.iloc[7].sort_values(ascending=False).head(10)

brunch       0.241662
toast        0.211703
eggs         0.183513
french       0.176270
fries        0.174674
challah      0.174463
app          0.173930
olives       0.170844
pineapple    0.170172
came         0.161119
Name: 6ewV-e7-39oqYUq3yZuIyw, dtype: float64

In [41]:
# Store P, Q and vectorizer in pickle file
import pickle
output = open('yelp_recommendation_model_8.pkl', 'wb')
pickle.dump(P,output)
pickle.dump(Q,output)
pickle.dump(userid_vectorizer,output)
output.close()

# Run prediction according User's preference

In [42]:
words = "burger fries"
recommendedlist = []

test_df= pd.DataFrame([words], columns=['Reviews'])
test_df['Reviews'] = test_df['Reviews'].apply(text_process)
test_vectors = userid_vectorizer.transform(test_df['Reviews'])
test_v_df = pd.DataFrame(test_vectors.toarray(), index=test_df.index, columns=userid_vectorizer.get_feature_names())

predictItemRating=pd.DataFrame(np.dot(test_v_df.loc[0],Q.T),index=Q.index,columns=['Rating'])
foundRestaurants=pd.DataFrame.sort_values(predictItemRating,['Rating'],ascending=[0])[:7]

for i in foundRestaurants.index:
    name = restaurants_df[restaurants_df['Business_Id']==i]['name'].iloc[0]
    categories =restaurants_df[restaurants_df['Business_Id']==i]['categories'].iloc[0]
    latitude = restaurants_df[restaurants_df['Business_Id']==i]['latitude'].iloc[0]
    longitude = restaurants_df[restaurants_df['Business_Id']==i]['longitude'].iloc[0]
    rating = str(restaurants_df[restaurants_df['Business_Id']==i]['stars_business'].iloc[0])
    
    case = {'Name': name, 'Categories': categories, 'Latitude': latitude, 'Longitude': longitude, 'Rating' : rating}
    recommendedlist.append(case)
    print(restaurants_df[restaurants_df['Business_Id']==i]['name'].iloc[0])
    print(restaurants_df[restaurants_df['Business_Id']==i]['categories'].iloc[0])
    print(restaurants_df[restaurants_df['Business_Id']==i]['latitude'].iloc[0])
    print(restaurants_df[restaurants_df['Business_Id']==i]['longitude'].iloc[0])
    print(str(restaurants_df[restaurants_df['Business_Id']==i]['stars_business'].iloc[0])+ ' ' +str(restaurants_df[restaurants_df['Business_Id']==i]['review_count'].iloc[0]))   
    print('')

topRecommend_df = pd.DataFrame (recommendedlist)
topRecommend_df.head(7)

burger fries
Bobby's Burger Palace
Sandwiches, American (Traditional), Burgers, Restaurants, Salad
39.954086811
-75.2009592271
3.5 788

Lee's Deli
Sandwiches, Burgers, Delis, Restaurants, Local Flavor
39.9484414
-75.2171163
4.5 175

HipCityVeg - University City
Sandwiches, Food, Burgers, Restaurants, Vegan, Desserts
39.953549
-75.202991
4.0 314

New Deck Tavern
Irish, Gastropubs, Pubs, American (Traditional), Nightlife, Bars, Restaurants
39.9535171
-75.1926364399
3.5 419

Sabrina's Café
Breakfast & Brunch, Restaurants, Vegetarian, American (New), American (Traditional), Comfort Food, Cafes
39.9598792
-75.1906815
4.0 833

Hummus Grill
Specialty Food, Food, Sandwiches, Middle Eastern, Vegetarian, Ethnic Food, Restaurants, Mediterranean, Greek, Salad
39.954349
-75.2021586
4.0 372

Renata's Kitchen
Breakfast & Brunch, Caterers, Tapas/Small Plates, Event Planning & Services, Middle Eastern, Mediterranean, Restaurants, Greek
39.9496602
-75.2026672
4.0 203



Unnamed: 0,Name,Categories,Latitude,Longitude,Rating
0,Bobby's Burger Palace,"Sandwiches, American (Traditional), Burgers, R...",39.954087,-75.200959,3.5
1,Lee's Deli,"Sandwiches, Burgers, Delis, Restaurants, Local...",39.948441,-75.217116,4.5
2,HipCityVeg - University City,"Sandwiches, Food, Burgers, Restaurants, Vegan,...",39.953549,-75.202991,4.0
3,New Deck Tavern,"Irish, Gastropubs, Pubs, American (Traditional...",39.953517,-75.192636,3.5
4,Sabrina's Café,"Breakfast & Brunch, Restaurants, Vegetarian, A...",39.959879,-75.190681,4.0
5,Hummus Grill,"Specialty Food, Food, Sandwiches, Middle Easte...",39.954349,-75.202159,4.0
6,Renata's Kitchen,"Breakfast & Brunch, Caterers, Tapas/Small Plat...",39.94966,-75.202667,4.0


In [43]:
import json
import geojson
from geojson import Feature, FeatureCollection, Point


def Data2geojson(df):
    features = []
    insert_features = lambda X: features.append(
                    geojson.Feature(geometry=geojson.Point((X["Longitude"],
                                                    X["Latitude"])),
                    properties=dict(name = X["Name"],
                                    description = X["Categories"],
                                    rating = X['Rating']))
                    )
    df.apply(insert_features, axis=1)
    with open('top7recomendations.geojson', 'w', encoding='utf8') as fp:
        geojson.dump(geojson.FeatureCollection(features), fp, sort_keys=True, ensure_ascii=False,indent=4)

Data2geojson(topRecommend_df)   
   

In [47]:
test_coordinates = {
    'User':[0],
    'latitude': [39.95807],
    'longitude' : [-75.21626]
}
user = pd.DataFrame(test_coordinates)

In [48]:
px.set_mapbox_access_token(MAPBOX_API_KEY)
fig= px.scatter_mapbox(topRecommend_df, lat="Latitude", 
                  lon="Longitude", color="Rating", hover_name="Name", size_max=30, zoom= 13, width=1200, height=800)
fig.add_scattermapbox(lat=user["latitude"], lon=user["longitude"]).update_traces(dict(mode='markers', marker=dict(size=15)))


In [None]:
px.set_mapbox_access_token(MAPBOX_API_KEY)
px.scatter_mapbox(topRecommend_df, lat="Latitude", 
                  lon="Longitude", color="Rating",hover_data=["Name","Latitude","Longitude"],  zoom= 10, width=1200, height=800)