In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.corpus import stopwords 
from nltk.tokenize import WordPunctTokenizer

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
# This is from Google Review
df = pd.read_csv('GoogleReview_data_cleaned.csv')

# This is from Tripasor Review
# df = pd.read_csv('TripAdvisor_data_cleaned.csv')
df = df[['Author', 'Rating', 'Review', 'Restaurant', 'Location']]


df.head()

Unnamed: 0,Author,Rating,Review,Restaurant,Location
0,Jia Pin Lee,4.0,Came here for the High Tea. Great service espe...,Cuisines Restaurant,Ipoh
1,Chui Yi Lum,2.0,"5 stars for the service, even though some of t...",Cuisines Restaurant,Ipoh
2,liezel wong,1.0,"Hi, thank you for your service. But! i feel so...",Cuisines Restaurant,Ipoh
3,Nazri Nor,1.0,I have the worse buffer dinner ever so far. Th...,Cuisines Restaurant,Ipoh
4,Fakru Imran's Channel,5.0,"That's are Known 5 Elmark "" 9H72 "" & KDK "" 3 K...",Cuisines Restaurant,Ipoh


In [5]:
#Check Null values in Dataframe
df.isnull().sum()

Author        0
Rating        0
Review        0
Restaurant    0
Location      0
dtype: int64

In [6]:
df.shape

(222020, 5)

In [7]:
# Copy/Prepare data
df_data = df[['Author', 'Review', 'Rating', 'Restaurant', 'Location']]
df.head()

Unnamed: 0,Author,Rating,Review,Restaurant,Location
0,Jia Pin Lee,4.0,Came here for the High Tea. Great service espe...,Cuisines Restaurant,Ipoh
1,Chui Yi Lum,2.0,"5 stars for the service, even though some of t...",Cuisines Restaurant,Ipoh
2,liezel wong,1.0,"Hi, thank you for your service. But! i feel so...",Cuisines Restaurant,Ipoh
3,Nazri Nor,1.0,I have the worse buffer dinner ever so far. Th...,Cuisines Restaurant,Ipoh
4,Fakru Imran's Channel,5.0,"That's are Known 5 Elmark "" 9H72 "" & KDK "" 3 K...",Cuisines Restaurant,Ipoh


In [8]:
# Consider only those author who have rated more than 10 restaurants and those restaurant which are having at least 20 ratings
x = df_data.groupby('Author').count()['Rating'] > 10
quality_author  = x[x].index

df_data = df_data[df_data['Author'].isin(quality_author)]

y = df_data.groupby('Restaurant')['Rating'].count() >= 20
famous_restaurants = y[y].index

final = df_data[df_data['Restaurant'].isin(famous_restaurants)]

final.head(20)

Unnamed: 0,Author,Review,Rating,Restaurant,Location
69,Secret Moments,Nice foods with comfort environment in Tandoor...,5.0,Tandoor Grill,Ipoh
85,Adelena Dass,Best north Indian dishes in town!! Best place ...,5.0,Tandoor Grill,Ipoh
93,Andrew Lee,Fantastic Indian establishment! I highly recom...,5.0,Tandoor Grill,Ipoh
98,Inês Pereira,"Delicious food! Required more staff, when crow...",4.0,Tandoor Grill,Ipoh
108,Benjamin Bromberg,Fantastic food and great service. Will definit...,5.0,Tandoor Grill,Ipoh
109,Kames Logan,This is review 1.1 an update from my previous ...,5.0,Tandoor Grill,Ipoh
143,Victor Lim,Nice ambience for a big or small groups as it ...,3.0,Tandoor Grill,Ipoh
148,ck lee,Nice food with good dining ambiance,4.0,Tandoor Grill,Ipoh
159,Lisa Khor,Food was good. But the services no1. Table of ...,1.0,Tandoor Grill,Ipoh
192,Adr ian,A good place for northern indian food. Classy ...,4.0,Tandoor Grill,Ipoh


In [9]:
# Create a pivot table: Restaurant as index, Author as column, Rating as value
# Calculate similarity score between restaurants using cosine_similarity function

pt = final.pivot_table(index = 'Restaurant', columns = 'Author', values = 'Rating').fillna(0)
pt.head(5)

Author,5525 Gunner,6od5p33d,A 10,A K,A L,A P,A Y,A.,A.L Lim,AL Lim,...,κεηηγsκ,さなえ,パイパイ,レミィRemmy,兴哥Heng Gor,几米林Jimmy,小虫WeiXiang,暝纥Enoch,洪佳武,纯粹享
Restaurant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
16th St. Cafe,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
28 Food Centre,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
33 Blue Room,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
362 Heong Peah 362炭烧香饼,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7 Spice,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
# Using cosine similarity metrics
from sklearn.metrics.pairwise import cosine_similarity
similarity_scores = cosine_similarity(pt)

def recommend(restaurantName):
    index = np.where(pt.index == restaurantName)[0][0]
    similar_restaurants = sorted(enumerate(similarity_scores[index]),key= lambda x: x[1], reverse =True)[1:6]
    
    for i in similar_restaurants:
        print(pt.index[i[0]])

NameError: name 'similar_restaurants' is not defined

In [11]:
similarity_scores

array([[1.        , 0.02563365, 0.        , ..., 0.        , 0.        ,
        0.03137508],
       [0.02563365, 1.        , 0.03147948, ..., 0.04542827, 0.04084478,
        0.        ],
       [0.        , 0.03147948, 1.        , ..., 0.04118568, 0.08678963,
        0.        ],
       ...,
       [0.        , 0.04542827, 0.04118568, ..., 1.        , 0.        ,
        0.04337058],
       [0.        , 0.04084478, 0.08678963, ..., 0.        , 1.        ,
        0.        ],
       [0.03137508, 0.        , 0.        , ..., 0.04337058, 0.        ,
        1.        ]])

In [12]:
similarity_scores.shape

(420, 420)

In [13]:
recommend("Dancing Fish")

The Ming Room 名城酒家
Din Tai Fung 鼎泰豐 at The Gardens Mall
Kayra Authentic Kerala Cuisine @Bangsar Village
Cor Blimey British Fish and Chips (Damansara Uptown)
La Boca Latino Bar


In [14]:
recommend("After Black")

Restaurant Ban Lee Siang
Antipodean @ Atria
Nancy's Kitchen
March Azalea Kitchen
Antipodean Cafe


In [15]:
recommend("Din Tai Fung 鼎泰豐 at 1 Utama Shopping Centre")

Din Tai Fung 鼎泰豐 at The Gardens Mall
Thai Hou Sek @ 1 Utama
Table & Apron
DC Restaurant
UROKO Japanese Cuisine


In [22]:
# Using cosine similarity metrics
from sklearn.metrics.pairwise import cosine_similarity
similarity_scores = cosine_similarity(pt)

def recommend(restaurantName):
    index = np.where(pt.index == restaurantName)[0][0]
    similar_restaurants = sorted(enumerate(similarity_scores[index]),key= lambda x: x[1], reverse =True)
    
    for i in similar_restaurants:
        print(pt.index[i[0]])

In [28]:
similarity_scores = cosine_similarity(pt)

index = np.where(pt.index == "After Black")[0][0]
similar_restaurants = sorted(enumerate(similarity_scores[index]),key= lambda x: x[1], reverse =True)

similar_restaurants

[(6, 1.0),
 (270, 0.17093429698742052),
 (15, 0.14947833402795319),
 (217, 0.14599447664678164),
 (192, 0.13677530110804828),
 (17, 0.12778076106105987),
 (22, 0.12370857840031563),
 (326, 0.11997563517766854),
 (214, 0.11973018608653986),
 (181, 0.11614766333760625),
 (269, 0.10888623497874521),
 (278, 0.10887786996108006),
 (377, 0.10332848037148004),
 (28, 0.09892427672768718),
 (97, 0.09659092088013585),
 (235, 0.09646651336220545),
 (241, 0.09326958300862695),
 (200, 0.09190182776172597),
 (170, 0.09143579700806714),
 (71, 0.09075168012986737),
 (311, 0.08971471580693186),
 (111, 0.08962214298964416),
 (264, 0.0873704056661038),
 (106, 0.08703249857174102),
 (221, 0.08690837911446843),
 (139, 0.08476366113149054),
 (387, 0.08446717229993575),
 (63, 0.08295018954139965),
 (376, 0.08112327500571223),
 (180, 0.07997867519620919),
 (317, 0.0799684453457937),
 (157, 0.07950376313029447),
 (386, 0.0782799542363719),
 (411, 0.07802186755377293),
 (81, 0.07784110748380277),
 (148, 0.07714

In [None]:
similar_restaurants[similar_restaurants["Location"] == "Ipoh"].head(5)

for i in similar_restaurants:
        print(pt.index[i[0]])

In [62]:
from sklearn.metrics.pairwise import linear_kernel
similarity_scores_lk = linear_kernel(pt)
def recommendByLin(restaurantName):
    index = np.where(pt.index== restaurantName)[0][0]
    similar_restaurants = sorted(enumerate(similarity_scores_lk[index]), key = lambda x: x[1], reverse = True)[1:]
    newdata = restaurants_data
#     print(similar_restaurants)
    indices = list()
    for i in similar_restaurants:
        newdata["Score"] = pt.index[i[1]]
#         print(pt.index[i[0]])
        indices.append(pt.index[i[0]])

    newdata = restaurants_data[restaurants_data['Location'] == 'KL']
    newdata = newdata.sort_values('Score', ascending = False).head(10)
    print(newdata)
    return indices

In [63]:
# Get the restaurants in this dataset without duplicates
restaurants_data = df.drop_duplicates(subset=['Restaurant'])
# no. of restaurants = 420
restaurants_data = restaurants_data.reset_index()

In [64]:
print("Recommend Using Linear Kernel")
indices = recommendByLin('Dancing Fish')
similar_restaurants = pd.DataFrame(columns=['Author', 'Rating', 'Review', 'Restaurant', 'Location'])
for i in range(len(indices)):
    similar_restaurants = similar_restaurants.append(restaurants_data[restaurants_data['Restaurant'] == indices[i]], ignore_index = True)
# similar_restaurants
#     print(restaurants_data.iloc[indices[i]])


Recommend Using Linear Kernel
[(356, 212.0), (80, 198.0), (12, 143.0), (110, 136.0), (146, 130.0), (162, 123.0), (239, 121.0), (38, 120.0), (15, 112.0), (65, 111.0), (30, 108.0), (347, 106.0), (384, 96.0), (139, 94.0), (154, 93.0), (28, 92.0), (163, 90.0), (19, 88.0), (24, 86.0), (214, 85.0), (184, 82.0), (195, 82.0), (16, 81.0), (287, 81.0), (317, 80.0), (291, 79.0), (204, 78.0), (17, 76.0), (319, 76.0), (366, 76.0), (40, 75.0), (253, 74.0), (244, 73.0), (344, 73.0), (172, 71.0), (297, 70.0), (412, 69.0), (288, 68.0), (57, 66.0), (332, 66.0), (241, 65.0), (254, 65.0), (260, 65.0), (262, 65.0), (376, 65.0), (122, 64.0), (221, 64.0), (237, 62.0), (133, 61.0), (354, 61.0), (67, 60.0), (71, 60.0), (93, 60.0), (144, 60.0), (301, 60.0), (275, 59.0), (61, 57.0), (85, 57.0), (165, 57.0), (285, 57.0), (79, 56.0), (92, 56.0), (329, 56.0), (248, 54.0), (266, 54.0), (159, 52.0), (167, 52.0), (229, 52.0), (310, 52.0), (405, 52.0), (87, 51.0), (185, 51.0), (56, 50.0), (148, 50.0), (149, 50.0), (352

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [43]:
newdata = similar_restaurants[similar_restaurants['Location'] == "Petaling Jaya"]
newdata

Unnamed: 0,Author,Rating,Review,Restaurant,Location,index
1,Jean Loh,1.0,Use to be my favourite restaurant for dim sum ...,The Ming Room 名城酒家,Petaling Jaya,221136.0
4,Sam Chor,5.0,Food was delicious! Due to the popularity of c...,Grub by Ahong & Friends,Petaling Jaya,190350.0
5,renuka d,4.0,Had the banana flower dessert which was very s...,Kayra Authentic Kerala Cuisine @Bangsar Village,Petaling Jaya,192837.0
7,Jia Pin Lee,4.0,Came here on Sunday afternoon and it was very ...,One Half,Petaling Jaya,219986.0
8,Whisky Dev,5.0,I had heard that there was some good roast por...,Boon Signature Roast Pork,Petaling Jaya,203637.0
...,...,...,...,...,...,...
371,Sweety Bhosure,3.0,Definitely a good romantic place to plan any s...,Secret Of Louisiana at the lake,Petaling Jaya,219653.0
384,Mei Yi Yeap,5.0,My favorite sushi place in KL. Their sashimi b...,Sushiya One Utama (Ichiro Sushi Bar),Petaling Jaya,188437.0
399,Yosuke,5.0,"Nice place to chill. Opened space, friendly st...","Tom, Dick & Harry's, Oasis Square",Petaling Jaya,217415.0
400,Madeline Tan,2.0,I went there on Friday for dinner. The steak c...,Tony Roma's,Petaling Jaya,199958.0
