In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.corpus import stopwords 
from nltk.tokenize import WordPunctTokenizer

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
df = pd.read_csv('GoogleReview_data_cleaned.csv')
df = df[['Author', 'Rating', 'Review', 'Restaurant', 'Location']]

df.head()

Unnamed: 0,Author,Rating,Review,Restaurant,Location
0,Jia Pin Lee,4.0,Came here for the High Tea. Great service espe...,Cuisines Restaurant,Ipoh
1,Chui Yi Lum,2.0,"5 stars for the service, even though some of t...",Cuisines Restaurant,Ipoh
2,liezel wong,1.0,"Hi, thank you for your service. But! i feel so...",Cuisines Restaurant,Ipoh
3,Nazri Nor,1.0,I have the worse buffer dinner ever so far. Th...,Cuisines Restaurant,Ipoh
4,Fakru Imran's Channel,5.0,"That's are Known 5 Elmark "" 9H72 "" & KDK "" 3 K...",Cuisines Restaurant,Ipoh


In [5]:
#Check Null values in Dataframe
df.isnull().sum()

Author        0
Rating        0
Review        0
Restaurant    0
Location      0
dtype: int64

In [6]:
df.shape

(222020, 5)

In [7]:
# Copy/Prepare data
df_data = df[['Author', 'Review', 'Rating', 'Restaurant', 'Location']]
df.head()

Unnamed: 0,Author,Rating,Review,Restaurant,Location
0,Jia Pin Lee,4.0,Came here for the High Tea. Great service espe...,Cuisines Restaurant,Ipoh
1,Chui Yi Lum,2.0,"5 stars for the service, even though some of t...",Cuisines Restaurant,Ipoh
2,liezel wong,1.0,"Hi, thank you for your service. But! i feel so...",Cuisines Restaurant,Ipoh
3,Nazri Nor,1.0,I have the worse buffer dinner ever so far. Th...,Cuisines Restaurant,Ipoh
4,Fakru Imran's Channel,5.0,"That's are Known 5 Elmark "" 9H72 "" & KDK "" 3 K...",Cuisines Restaurant,Ipoh


In [9]:
# Building User-based Collaborative Filtering
author_restaurant_matrix = df_data.pivot_table(index = 'Author', columns = ['Restaurant'], values = 'Rating').fillna(0)
author_restaurant_matrix.head()

Restaurant,'D' Selera Kelate,16th St. Cafe,1919 Restaurant Ipoh,20 Chulia Lane Cafe,21 Bistro,218 Hainan Lor Mee,27@cove,28 Food Centre,3 :15 Auntie Hong's Cooking,33 Blue Room,...,寒舍 HANSHE @Perling,心安素食斋料食馆 Xin An Vegetarian Cafe,怡保古早味猪肠粉 Ipoh Traditional Style Chee Cheong Fun,我家餐館Our Kitchen Nyonya Restaurant,海皇粿条仔Restaurant Hi Wan,田園粥火锅 Farmland Porridge Steamboat,相聚火锅 The Gather BBQ Steamboat(新犀鸟阁 New Hornbill BBQ Steamboat）,越南小廚 V NAM KITCHEN,青山角 / Green Hill Corner,食得福美食中心Cedar Point Food Centre
Author,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
# cikgusally,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#Ativ Mindworks,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#GJBlane RICE,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#JL_King_Of_Music,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#MyNameIsMuna #MUNALICIOUS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
restaurant_author_matrix = author_restaurant_matrix.transpose()
restaurant_author_matrix.head(10)

Author,# cikgusally,#Ativ Mindworks,#GJBlane RICE,#JL_King_Of_Music,#MyNameIsMuna #MUNALICIOUS,#Ramen #Mee,#SamuelLabo,#SuPeR# (SuPerPauL),#ryuki2517,#바나나스프,...,홍성훈,홍우진,화성인,황명설,황무환,효니의 관심사Honey’s interest,흰호랑이,ﹰﹰChoon Woo,Ｎａｋａ Ｍｕｒａ,ｊ ｍ
Restaurant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'D' Selera Kelate,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16th St. Cafe,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1919 Restaurant Ipoh,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
20 Chulia Lane Cafe,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
21 Bistro,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
218 Hainan Lor Mee,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
27@cove,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
28 Food Centre,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3 :15 Auntie Hong's Cooking,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
33 Blue Room,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# Identify Ratings For Restaurants Per Author
restaurant_ratings = restaurant_author_matrix['5525 Gunner']
restaurant_ratings.head(10)

In [None]:
restaurant_ratings = restaurant_author_matrix['几米林Jimmy']
restaurant_ratings.head(10)

In [None]:
# Find correlation between authors based on the ratings of restaurants
similar_authors = restaurant_author_matrix.corrwith(restaurant_ratings)

# Create a dataframe with similar movies as the index column and correlation as another column
similar_authors = pd.DataFrame(similar_authors, columns = ['correlation'])
similar_authors.head(10)

In [None]:
# Recommending Restaurants Based on The Most Similar Authors
most_similar_authors = similar_authors.sort_values('correlation', ascending = False).iloc[1:11]
most_similar_authors

In [None]:
# Extract Author of the most similar users
authors = most_similar_authors.index.values.tolist()
authors[0]

In [None]:
recommendation = df_data[df_data['Author'] == authors[0]]
recommendation.head(10)

In [None]:
# Data Frame Slicing by Condition
recommendation = df_data.loc[(df_data['Author'] == authors[0]) & (df_data['Rating'] > 0), ['Restaurant', 'Rating']]
recommendation.head(10)