In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.corpus import stopwords 
from nltk.tokenize import WordPunctTokenizer

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
# This is from Google Review
df = pd.read_csv('GoogleReview_data_cleaned.csv')

# This is from Tripasor Review
# df = pd.read_csv('TripAdvisor_data_cleaned.csv')
df = df[['Author', 'Rating', 'Review', 'Restaurant', 'Location']]


df.head()

Unnamed: 0,Author,Rating,Review,Restaurant,Location
0,Jia Pin Lee,4.0,Came here for the High Tea. Great service espe...,Cuisines Restaurant,Ipoh
1,Chui Yi Lum,2.0,"5 stars for the service, even though some of t...",Cuisines Restaurant,Ipoh
2,liezel wong,1.0,"Hi, thank you for your service. But! i feel so...",Cuisines Restaurant,Ipoh
3,Nazri Nor,1.0,I have the worse buffer dinner ever so far. Th...,Cuisines Restaurant,Ipoh
4,Fakru Imran's Channel,5.0,"That's are Known 5 Elmark "" 9H72 "" & KDK "" 3 K...",Cuisines Restaurant,Ipoh


In [18]:
#Check Null values in Dataframe
df.isnull().sum()

Author        0
Rating        0
Review        0
Restaurant    0
Location      0
dtype: int64

In [19]:
df.shape

(222020, 5)

In [21]:
# Copy/Prepare data
df_data = df[['Author', 'Review', 'Rating', 'Restaurant', 'Location']]
df.head()

Unnamed: 0,Author,Rating,Review,Restaurant,Location
0,Jia Pin Lee,4.0,Came here for the High Tea. Great service espe...,Cuisines Restaurant,Ipoh
1,Chui Yi Lum,2.0,"5 stars for the service, even though some of t...",Cuisines Restaurant,Ipoh
2,liezel wong,1.0,"Hi, thank you for your service. But! i feel so...",Cuisines Restaurant,Ipoh
3,Nazri Nor,1.0,I have the worse buffer dinner ever so far. Th...,Cuisines Restaurant,Ipoh
4,Fakru Imran's Channel,5.0,"That's are Known 5 Elmark "" 9H72 "" & KDK "" 3 K...",Cuisines Restaurant,Ipoh


In [6]:
# Consider only those author who have rated more than 10 restaurants and those restaurant which are having at least 20 ratings
x = df_data.groupby('Author').count()['Rating'] > 10
quality_author  = x[x].index

df_data = df_data[df_data['Author'].isin(quality_author)]

y = df_data.groupby('Restaurant')['Rating'].count() >= 20
famous_restaurants = y[y].index

final = df_data[df_data['Restaurant'].isin(famous_restaurants)]

final.head(20)

Unnamed: 0,Author,Review,Rating,Restaurant,Location
69,Secret Moments,Nice foods with comfort environment in Tandoor...,5.0,Tandoor Grill,Ipoh
85,Adelena Dass,Best north Indian dishes in town!! Best place ...,5.0,Tandoor Grill,Ipoh
93,Andrew Lee,Fantastic Indian establishment! I highly recom...,5.0,Tandoor Grill,Ipoh
98,Inês Pereira,"Delicious food! Required more staff, when crow...",4.0,Tandoor Grill,Ipoh
108,Benjamin Bromberg,Fantastic food and great service. Will definit...,5.0,Tandoor Grill,Ipoh
109,Kames Logan,This is review 1.1 an update from my previous ...,5.0,Tandoor Grill,Ipoh
143,Victor Lim,Nice ambience for a big or small groups as it ...,3.0,Tandoor Grill,Ipoh
148,ck lee,Nice food with good dining ambiance,4.0,Tandoor Grill,Ipoh
159,Lisa Khor,Food was good. But the services no1. Table of ...,1.0,Tandoor Grill,Ipoh
192,Adr ian,A good place for northern indian food. Classy ...,4.0,Tandoor Grill,Ipoh


In [7]:
# Create a pivot table: Restaurant as index, Author as column, Rating as value
# Calculate similarity score between restaurants using cosine_similarity function

pt = final.pivot_table(index = 'Restaurant', columns = 'Author', values = 'Rating').fillna(0)
pt.head(5)

Author,5525 Gunner,6od5p33d,A 10,A K,A L,A P,A Y,A.,A.L Lim,AL Lim,...,κεηηγsκ,さなえ,パイパイ,レミィRemmy,兴哥Heng Gor,几米林Jimmy,小虫WeiXiang,暝纥Enoch,洪佳武,纯粹享
Restaurant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
16th St. Cafe,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
28 Food Centre,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
33 Blue Room,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
362 Heong Peah 362炭烧香饼,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7 Spice,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
# Using cosine similarity metrics
from sklearn.metrics.pairwise import cosine_similarity
similarity_scores = cosine_similarity(pt)

def recommend(restaurantName):
    index = np.where(pt.index == restaurantName)[0][0]
    similar_restaurants = sorted(enumerate(similarity_scores[index]),key= lambda x: x[1], reverse =True)[1:6]
    
    for i in similar_restaurants:
        print(pt.index[i[0]])

In [9]:
similarity_scores

array([[1.        , 0.02563365, 0.        , ..., 0.        , 0.        ,
        0.03137508],
       [0.02563365, 1.        , 0.03147948, ..., 0.04542827, 0.04084478,
        0.        ],
       [0.        , 0.03147948, 1.        , ..., 0.04118568, 0.08678963,
        0.        ],
       ...,
       [0.        , 0.04542827, 0.04118568, ..., 1.        , 0.        ,
        0.04337058],
       [0.        , 0.04084478, 0.08678963, ..., 0.        , 1.        ,
        0.        ],
       [0.03137508, 0.        , 0.        , ..., 0.04337058, 0.        ,
        1.        ]])

In [10]:
similarity_scores.shape

(420, 420)

In [16]:
recommend("Dancing Fish")

The Ming Room 名城酒家
Din Tai Fung 鼎泰豐 at The Gardens Mall
Kayra Authentic Kerala Cuisine @Bangsar Village
Cor Blimey British Fish and Chips (Damansara Uptown)
La Boca Latino Bar


In [15]:
recommend("After Black")

Restaurant Ban Lee Siang
Antipodean @ Atria
Nancy's Kitchen
March Azalea Kitchen
Antipodean Cafe


In [17]:
recommend("Din Tai Fung 鼎泰豐 at 1 Utama Shopping Centre")

Din Tai Fung 鼎泰豐 at The Gardens Mall
Thai Hou Sek @ 1 Utama
Table & Apron
DC Restaurant
UROKO Japanese Cuisine
