In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.corpus import stopwords 
from nltk.tokenize import WordPunctTokenizer

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
# This is from Google Review
df = pd.read_csv('GoogleReview_data_cleaned.csv')

# This is from Tripasor Review
# df = pd.read_csv('TripAdvisor_data_cleaned.csv')
df = df[['Author', 'Rating', 'Review', 'Restaurant', 'Location']]


df.head()

Unnamed: 0,Author,Rating,Review,Restaurant,Location
0,Jia Pin Lee,4.0,Came here for the High Tea. Great service espe...,Cuisines Restaurant,Ipoh
1,Chui Yi Lum,2.0,"5 stars for the service, even though some of t...",Cuisines Restaurant,Ipoh
2,liezel wong,1.0,"Hi, thank you for your service. But! i feel so...",Cuisines Restaurant,Ipoh
3,Nazri Nor,1.0,I have the worse buffer dinner ever so far. Th...,Cuisines Restaurant,Ipoh
4,Fakru Imran's Channel,5.0,"That's are Known 5 Elmark "" 9H72 "" & KDK "" 3 K...",Cuisines Restaurant,Ipoh


In [4]:
#Check Null values in Dataframe
df.isnull().sum()

Author        0
Rating        0
Review        0
Restaurant    0
Location      0
dtype: int64

In [5]:
df.shape

(222020, 5)

In [6]:
# Copy/Prepare data
df_data = df[['Author', 'Review', 'Rating', 'Restaurant', 'Location']]
df.head()

Unnamed: 0,Author,Rating,Review,Restaurant,Location
0,Jia Pin Lee,4.0,Came here for the High Tea. Great service espe...,Cuisines Restaurant,Ipoh
1,Chui Yi Lum,2.0,"5 stars for the service, even though some of t...",Cuisines Restaurant,Ipoh
2,liezel wong,1.0,"Hi, thank you for your service. But! i feel so...",Cuisines Restaurant,Ipoh
3,Nazri Nor,1.0,I have the worse buffer dinner ever so far. Th...,Cuisines Restaurant,Ipoh
4,Fakru Imran's Channel,5.0,"That's are Known 5 Elmark "" 9H72 "" & KDK "" 3 K...",Cuisines Restaurant,Ipoh


In [7]:
# Building User-Item Interactions Matrix
author_restaurant_matrix = df_data.pivot_table(index = 'Author', columns = ['Restaurant'], values = 'Rating').fillna(0)
author_restaurant_matrix.head()

Restaurant,'D' Selera Kelate,16th St. Cafe,1919 Restaurant Ipoh,20 Chulia Lane Cafe,21 Bistro,218 Hainan Lor Mee,27@cove,28 Food Centre,3 :15 Auntie Hong's Cooking,33 Blue Room,...,寒舍 HANSHE @Perling,心安素食斋料食馆 Xin An Vegetarian Cafe,怡保古早味猪肠粉 Ipoh Traditional Style Chee Cheong Fun,我家餐館Our Kitchen Nyonya Restaurant,海皇粿条仔Restaurant Hi Wan,田園粥火锅 Farmland Porridge Steamboat,相聚火锅 The Gather BBQ Steamboat(新犀鸟阁 New Hornbill BBQ Steamboat）,越南小廚 V NAM KITCHEN,青山角 / Green Hill Corner,食得福美食中心Cedar Point Food Centre
Author,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
# cikgusally,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#Ativ Mindworks,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#GJBlane RICE,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#JL_King_Of_Music,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#MyNameIsMuna #MUNALICIOUS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
#Building Item-based Collaborative Filtering 
author_ratings = author_restaurant_matrix['越南小廚 V NAM KITCHEN']
author_ratings.head(10)

Author
# cikgusally                  0.0
#Ativ Mindworks               0.0
#GJBlane RICE                 0.0
#JL_King_Of_Music             0.0
#MyNameIsMuna #MUNALICIOUS    0.0
#Ramen #Mee                   0.0
#SamuelLabo                   0.0
#SuPeR# (SuPerPauL)           0.0
#ryuki2517                    0.0
#바나나스프                        0.0
Name: 越南小廚 V NAM KITCHEN, dtype: float64

In [9]:
author_ratings = author_restaurant_matrix['我家餐館Our Kitchen Nyonya Restaurant']
author_ratings.head(10)

Author
# cikgusally                  0.0
#Ativ Mindworks               0.0
#GJBlane RICE                 0.0
#JL_King_Of_Music             0.0
#MyNameIsMuna #MUNALICIOUS    0.0
#Ramen #Mee                   0.0
#SamuelLabo                   0.0
#SuPeR# (SuPerPauL)           0.0
#ryuki2517                    0.0
#바나나스프                        0.0
Name: 我家餐館Our Kitchen Nyonya Restaurant, dtype: float64

In [10]:
# Finding Correlations Between A Selected Restaurant And All Other Restaurant
similar_restaurants = author_restaurant_matrix.corrwith(author_ratings)
similar_restaurants

Restaurant
'D' Selera Kelate                                                -0.000284
16th St. Cafe                                                    -0.001025
1919 Restaurant Ipoh                                              0.003344
20 Chulia Lane Cafe                                              -0.000703
21 Bistro                                                        -0.000090
                                                                    ...   
田園粥火锅 Farmland Porridge Steamboat                                -0.000466
相聚火锅 The Gather BBQ Steamboat(新犀鸟阁 New Hornbill BBQ Steamboat）   -0.000504
越南小廚 V NAM KITCHEN                                               -0.001184
青山角 / Green Hill Corner                                          -0.000767
食得福美食中心Cedar Point Food Centre                                    0.006794
Length: 1322, dtype: float64

In [11]:
# Create a dataframe with similar restaurants as the index column and name another column as correlation
similar_restaurants = pd.DataFrame(similar_restaurants, columns = ['correlation'])
similar_restaurants.head(10)

Unnamed: 0_level_0,correlation
Restaurant,Unnamed: 1_level_1
'D' Selera Kelate,-0.000284
16th St. Cafe,-0.001025
1919 Restaurant Ipoh,0.003344
20 Chulia Lane Cafe,-0.000703
21 Bistro,-9e-05
218 Hainan Lor Mee,-0.000764
27@cove,-0.00065
28 Food Centre,-0.001017
3 :15 Auntie Hong's Cooking,-0.000351
33 Blue Room,-0.001527


In [12]:
# Recommending the Most Similar Restaurants
similar_restaurants.sort_values('correlation', ascending = False).head(10)

Unnamed: 0_level_0,correlation
Restaurant,Unnamed: 1_level_1
我家餐館Our Kitchen Nyonya Restaurant,1.0
Breeks Cafe,0.037679
Thai Hou Sek @ 1 Utama,0.027584
Choco Choco,0.026757
Wood's,0.022542
Seng Huat,0.021644
Moh Teng Pheow Nyonya Koay,0.020016
Restoran Wong Koh Kee,0.018593
"Favola , Le Méridien Kuala Lumpur",0.016747
Restoran Fishing Village Seafood,0.016336


In [13]:
# Recommending the Most Similar Popular Restaurants
df_rating = pd.DataFrame(df_data.groupby('Restaurant')['Rating'].count())  
df_rating.head(10)

Unnamed: 0_level_0,Rating
Restaurant,Unnamed: 1_level_1
'D' Selera Kelate,11
16th St. Cafe,134
1919 Restaurant Ipoh,142
20 Chulia Lane Cafe,62
21 Bistro,1
218 Hainan Lor Mee,75
27@cove,53
28 Food Centre,136
3 :15 Auntie Hong's Cooking,18
33 Blue Room,300


In [14]:
similar_restaurants = similar_restaurants.join(df_rating['Rating']).sort_values('correlation', ascending = False)
similar_restaurants

Unnamed: 0_level_0,correlation,Rating
Restaurant,Unnamed: 1_level_1,Unnamed: 2_level_1
我家餐館Our Kitchen Nyonya Restaurant,1.000000,133
Breeks Cafe,0.037679,9
Thai Hou Sek @ 1 Utama,0.027584,263
Choco Choco,0.026757,13
Wood's,0.022542,74
...,...,...
Khan’s Indian Cuisine,-0.002729,1887
Topspot Food Court,-0.002744,964
Miker Food,-0.002751,999
Antipodean @ Atria,-0.002753,971


In [15]:
# Recommending 20 Most Similar Popular Movies 
# Similar restaurants like 我家餐館Our Kitchen Nyonya Restaurant 
similar_movies_2 = similar_restaurants[similar_restaurants['Rating'] > 100].sort_values('correlation', ascending = False)
similar_movies_2.head(20)

Unnamed: 0_level_0,correlation,Rating
Restaurant,Unnamed: 1_level_1,Unnamed: 2_level_1
我家餐館Our Kitchen Nyonya Restaurant,1.0,133
Thai Hou Sek @ 1 Utama,0.027584,263
Moh Teng Pheow Nyonya Koay,0.020016,300
"Favola , Le Méridien Kuala Lumpur",0.016747,197
Restoran Fishing Village Seafood,0.016336,149
北栈食馆 Bei Zhan Restaurant,0.014567,300
Monterios Portuguese Seafood Stall,0.014421,108
SamFu Restaurant,0.014213,282
Bloom by Mokmok,0.01416,209
Low Yong Moh Restaurant,0.013638,300
