In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.corpus import stopwords 
from nltk.tokenize import WordPunctTokenizer

In [6]:
import warnings
warnings.filterwarnings('ignore')

In [7]:
df = pd.read_csv('GoogleReview_data_cleaned.csv')
df = df[['Author', 'Rating', 'Review', 'Restaurant', 'Location']]

df_business = pd.read_csv('Restaurants_KL.csv')
df.head()

Unnamed: 0,Author,Rating,Review,Restaurant,Location
0,Jia Pin Lee,4.0,Came here for the High Tea. Great service espe...,Cuisines Restaurant,Ipoh
1,Chui Yi Lum,2.0,"5 stars for the service, even though some of t...",Cuisines Restaurant,Ipoh
2,liezel wong,1.0,"Hi, thank you for your service. But! i feel so...",Cuisines Restaurant,Ipoh
3,Nazri Nor,1.0,I have the worse buffer dinner ever so far. Th...,Cuisines Restaurant,Ipoh
4,Fakru Imran's Channel,5.0,"That's are Known 5 Elmark "" 9H72 "" & KDK "" 3 K...",Cuisines Restaurant,Ipoh


In [4]:
#Check Null values in Dataframe
df.isnull().sum()

Author        0
Rating        0
Review        0
Restaurant    0
Location      0
dtype: int64

In [5]:
df.shape


(222020, 5)

In [7]:
#Select only star and text, so to remove unnecessary words/punctiations/stopwords such as I, You, We, They
#library from nltk.corpus will be used for the stopwords

df_data = df[['Author', 'Review', 'Rating', 'Restaurant', 'Location']]

In [8]:
import string
from nltk.corpus import stopwords
stop = []
for word in stopwords.words('english'):
    s = [char for char in word if char not in string.punctuation]
    stop.append(''.join(s))

In [363]:
def text_process(mess):
    """
    Takes in a string of text, then performs the following:
    1. Remove all punctuation
    2. Remove all stopwords
    3. Returns a list of the cleaned text
    """
    # Check characters to see if they are in punctuation
    nopunc = [char for char in mess if char not in string.punctuation]

    # Join the characters again to form the string.
    nopunc = ''.join(nopunc)
    
    # Now just remove any stopwords
    return " ".join([word for word in nopunc.split() if word.lower() not in stop])

In [9]:
#Table for author
author_df = df_data[['Author','Review']]

#Table for restaurant
restaurant_df = df_data[['Restaurant', 'Review']]

In [10]:
author_df.head()

Unnamed: 0,Author,Review
0,Jia Pin Lee,Came here for the High Tea. Great service espe...
1,Chui Yi Lum,"5 stars for the service, even though some of t..."
2,liezel wong,"Hi, thank you for your service. But! i feel so..."
3,Nazri Nor,I have the worse buffer dinner ever so far. Th...
4,Fakru Imran's Channel,"That's are Known 5 Elmark "" 9H72 "" & KDK "" 3 K..."


In [11]:
restaurant_df.head()

Unnamed: 0,Restaurant,Review
0,Cuisines Restaurant,Came here for the High Tea. Great service espe...
1,Cuisines Restaurant,"5 stars for the service, even though some of t..."
2,Cuisines Restaurant,"Hi, thank you for your service. But! i feel so..."
3,Cuisines Restaurant,I have the worse buffer dinner ever so far. Th...
4,Cuisines Restaurant,"That's are Known 5 Elmark "" 9H72 "" & KDK "" 3 K..."


In [12]:
author_df[author_df['Author']=='Suhana S']['Review']

108899                                       My usual place
111312    A great breakfast plce for everyone. Chold fri...
123071    By reccomendation we went. Despite all the tro...
137395    We are meat lover. LOVED our Black Angus Tomah...
156859    Fine dining restaurant. My 1st time here. Got ...
159636    Food is decent italian. Waiting time is quite ...
175350    We are meat lover. LOVED our Black Angus Tomah...
188649    Its fresh from the water, freshly cooked to pe...
203416     Absolute is better.. price is on the higher side
210183    A bowl of happiness. Yes it is. It makes me ha...
216540    Its been years since i've not been here. Used ...
Name: Review, dtype: object

In [13]:
#Holding
#Join the Review with for each author and restauranat
# author_df = author_df.groupby('Author').agg({'Review': ' '.join(str(v) for v in author_df)})
# author_df.groupby(['Author']).agg({join(author_df['Review'])})

# restaurant_df = restaurant_df.groupby('Restaurant').agg({'Review': ' '.join})

In [14]:
from sklearn.feature_extraction.text import TfidfVectorizer
#author vectorizer
author_vectorizer = TfidfVectorizer(tokenizer = WordPunctTokenizer().tokenize, max_features=5000)
author_vectors = author_vectorizer.fit_transform(author_df['Review'])
author_vectors.shape

(222020, 5000)

In [15]:
author_vectors

<222020x5000 sparse matrix of type '<class 'numpy.float64'>'
	with 3740425 stored elements in Compressed Sparse Row format>

In [16]:
#Restaurant vectorizer
restaurant_vectorizer = TfidfVectorizer(tokenizer = WordPunctTokenizer().tokenize, max_features=5000)
restaurant_vectors = restaurant_vectorizer.fit_transform(restaurant_df['Review'])
restaurant_vectors.shape

(222020, 5000)

In [18]:
#Matrix Factorization
author_rating_matrix = pd.pivot_table(df_data, values='Rating', index=['Author'], columns=['Restaurant'])

In [390]:
author_rating_matrix.shape

(1321, 1226)

In [19]:
author_rating_matrix.head()

Restaurant,'D' Selera Kelate,16th St. Cafe,1919 Restaurant Ipoh,20 Chulia Lane Cafe,21 Bistro,218 Hainan Lor Mee,27@cove,28 Food Centre,3 :15 Auntie Hong's Cooking,33 Blue Room,...,寒舍 HANSHE @Perling,心安素食斋料食馆 Xin An Vegetarian Cafe,怡保古早味猪肠粉 Ipoh Traditional Style Chee Cheong Fun,我家餐館Our Kitchen Nyonya Restaurant,海皇粿条仔Restaurant Hi Wan,田園粥火锅 Farmland Porridge Steamboat,相聚火锅 The Gather BBQ Steamboat(新犀鸟阁 New Hornbill BBQ Steamboat）,越南小廚 V NAM KITCHEN,青山角 / Green Hill Corner,食得福美食中心Cedar Point Food Centre
Author,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
# cikgusally,,,,,,,,,,,...,,,,,,,,,,
#Ativ Mindworks,,,,,,,,,,,...,,,,,,,,,,
#GJBlane RICE,,,,,,,,,,,...,,,,,,,,,,
#JL_King_Of_Music,,,,,,,,,,,...,,,,,,,,,,
#MyNameIsMuna #MUNALICIOUS,,,,,,,,,,,...,,,,,,,,,,


In [26]:
# P = pd.DataFrame(author_vectors.toarray(), index=author_df.index, columns=author_vectorizer.get_feature_names())
# Q = pd.DataFrame(restaurant_vectors.toarray(), index=restaurant_df.index, columns=restaurant_vectorizer.get_feature_names())

In [None]:
# Q.head()

In [22]:
# Gradient Decent Optimization
def matrix_factorization(R, P, Q, steps=25, gamma=0.001,lamda=0.02):
    for step in range(steps):
        for i in R.index:
            for j in R.columns:
                if R.loc[i,j]>0:
                    eij=R.loc[i,j]-np.dot(P.loc[i],Q.loc[j])
                    P.loc[i]=P.loc[i]+gamma*(eij*Q.loc[j]-lamda*P.loc[i])
                    Q.loc[j]=Q.loc[j]+gamma*(eij*P.loc[i]-lamda*Q.loc[j])
        e=0
        for i in R.index:
            for j in R.columns:
                if R.loc[i,j]>0:
                    e= e + pow(R.loc[i,j]-np.dot(P.loc[i],Q.loc[j]),2)+lamda*(pow(np.linalg.norm(P.loc[i]),2)+pow(np.linalg.norm(Q.loc[j]),2))
        if e<0.001:
            break
        
    return P,Q

In [27]:
# %%time
# P, Q = matrix_factorization(author_rating_matrix, P, Q, steps=25, gamma=0.001,lamda=0.02)

In [310]:
# Popularity-based filtering
restaurant_num_ratings = df_data.groupby('Restaurant')['Rating'].count().reset_index().rename(columns = {'Rating': 'Num-Ratings' })
restaurant_avg_ratings = df_data.groupby('Restaurant')['Rating'].mean().reset_index().rename(columns = {'Rating': 'Avg-Ratings' })
final_rating = restaurant_num_ratings.merge(restaurant_avg_ratings , on = 'Restaurant')

In [311]:
final_rating.head(10)

Unnamed: 0,Restaurant,Num-Ratings,Avg-Ratings
0,'D' Selera Kelate,11,3.636364
1,16th St. Cafe,134,4.38806
2,1919 Restaurant Ipoh,142,4.28169
3,20 Chulia Lane Cafe,62,4.725806
4,21 Bistro,1,5.0
5,218 Hainan Lor Mee,75,4.36
6,27@cove,53,4.735849
7,28 Food Centre,136,4.102941
8,3 :15 Auntie Hong's Cooking,18,4.055556
9,33 Blue Room,300,4.376667


In [312]:
# Select Restaurant with more than 50 ratings sorted in descending order by avg-ratings
popular_restaurant = final_rating[final_rating['Num-Ratings'] > 50].sort_values(by = 'Avg-Ratings'  , ascending= False).reset_index(drop = True).head(50)

popular_restaurant.head(20)

Unnamed: 0,Restaurant,Num-Ratings,Avg-Ratings
0,CoffeeNuts Cafe,73,4.986301
1,Puree Juice,69,4.942029
2,Korean Seoul Restaurant,116,4.913793
3,"Nasi Lemak Ultra, Imut's Hall & The Black Kitchen",65,4.892308
4,AIN- ARABIA Restaurant & Cafe Langkawi,258,4.821705
5,Rubin Mardini Cafe & Restaurant,200,4.82
6,The Grand Getaway,88,4.818182
7,Sushi Hibiki,88,4.806818
8,The Argan Trees Restaurant-Moroccan and Medite...,296,4.793919
9,Feringgi Grill at Shangri-La's Rasa Sayang Res...,65,4.769231


In [313]:
# Collaborative filtering
# Consider only those author who have rated more than 10 restaurants and those restaurant which are having at least 10 ratings
x = df_data.groupby('Author').count()['Rating'] > 10
quality_author  = x[x].index

df_data = df_data[df_data['Author'].isin(quality_author)]

y = df_data.groupby('Restaurant')['Rating'].count() >= 20
famous_restaurants = y[y].index

final = df_data[df_data['Restaurant'].isin(famous_restaurants)]

final.head(20)

Unnamed: 0,Author,Review,Rating,Restaurant,Location
69,Secret Moments,Nice foods with comfort environment in Tandoor...,5.0,Tandoor Grill,Ipoh
85,Adelena Dass,Best north Indian dishes in town!! Best place ...,5.0,Tandoor Grill,Ipoh
93,Andrew Lee,Fantastic Indian establishment! I highly recom...,5.0,Tandoor Grill,Ipoh
98,Inês Pereira,"Delicious food! Required more staff, when crow...",4.0,Tandoor Grill,Ipoh
108,Benjamin Bromberg,Fantastic food and great service. Will definit...,5.0,Tandoor Grill,Ipoh
109,Kames Logan,This is review 1.1 an update from my previous ...,5.0,Tandoor Grill,Ipoh
143,Victor Lim,Nice ambience for a big or small groups as it ...,3.0,Tandoor Grill,Ipoh
148,ck lee,Nice food with good dining ambiance,4.0,Tandoor Grill,Ipoh
159,Lisa Khor,Food was good. But the services no1. Table of ...,1.0,Tandoor Grill,Ipoh
192,Adr ian,A good place for northern indian food. Classy ...,4.0,Tandoor Grill,Ipoh


In [314]:
# Create a pivot table: Restaurant as index, Author as column, Rating as value
# Calculate similarity score between restaurants using cosine_similarity function

pt = final.pivot_table(index = 'Restaurant', columns = 'Author', values = 'Rating').fillna(0)
pt.head(5)

Author,5525 Gunner,6od5p33d,A 10,A K,A L,A P,A Y,A.,A.L Lim,AL Lim,...,κεηηγsκ,さなえ,パイパイ,レミィRemmy,兴哥Heng Gor,几米林Jimmy,小虫WeiXiang,暝纥Enoch,洪佳武,纯粹享
Restaurant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
16th St. Cafe,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
28 Food Centre,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
33 Blue Room,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
362 Heong Peah 362炭烧香饼,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7 Spice,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [315]:
from sklearn.metrics.pairwise import cosine_similarity
similarity_scores = cosine_similarity(pt)

def recommend(restaurantName):
    index = np.where(pt.index == restaurantName)[0][0]
    similar_restaurants = sorted(enumerate(similarity_scores[index]),key= lambda x: x[1], reverse =True)[1:6]
    
    for i in similar_restaurants:
        print(pt.index[i[0]])

In [316]:
similarity_scores

array([[1.        , 0.02563365, 0.        , ..., 0.        , 0.        ,
        0.03137508],
       [0.02563365, 1.        , 0.03147948, ..., 0.04542827, 0.04084478,
        0.        ],
       [0.        , 0.03147948, 1.        , ..., 0.04118568, 0.08678963,
        0.        ],
       ...,
       [0.        , 0.04542827, 0.04118568, ..., 1.        , 0.        ,
        0.04337058],
       [0.        , 0.04084478, 0.08678963, ..., 0.        , 1.        ,
        0.        ],
       [0.03137508, 0.        , 0.        , ..., 0.04337058, 0.        ,
        1.        ]])

In [317]:
similarity_scores.shape

(420, 420)

In [318]:
recommend("Din Tai Fung 鼎泰豐 at The Gardens Mall")

The Han Room
Tiffin's By Chef Korn Restaurant
Din Tai Fung 鼎泰豐 at 1 Utama Shopping Centre
Dancing Fish
Din Tai Fung 鼎泰豐 at Pavilion KL


In [319]:
recommend("After Black")

Restaurant Ban Lee Siang
Antipodean @ Atria
Nancy's Kitchen
March Azalea Kitchen
Antipodean Cafe


In [320]:
# Testing
# Building User-Item Interactions Matrix
author_restaurant_matrix = df_data.pivot_table(index = 'Author', columns = ['Restaurant'], values = 'Rating').fillna(0)
author_restaurant_matrix.head()

MemoryError: Unable to allocate 12.4 MiB for an array with shape (1321, 1226) and data type float64

In [321]:
#Building Item-based Collaborative Filtering 
author_ratings = author_restaurant_matrix['越南小廚 V NAM KITCHEN']
author_ratings.head(10)

Author
5525 Gunner    0.0
6od5p33d       0.0
A 10           0.0
A K            0.0
A L            0.0
A P            0.0
A Y            0.0
A.             0.0
A.L Lim        0.0
AL Lim         0.0
Name: 越南小廚 V NAM KITCHEN, dtype: float64

In [322]:
author_ratings = author_restaurant_matrix['我家餐館Our Kitchen Nyonya Restaurant']
author_ratings.head(10)

Author
5525 Gunner    0.0
6od5p33d       0.0
A 10           0.0
A K            0.0
A L            0.0
A P            0.0
A Y            0.0
A.             0.0
A.L Lim        0.0
AL Lim         0.0
Name: 我家餐館Our Kitchen Nyonya Restaurant, dtype: float64

In [323]:
# Finding Correlations Between A Selected Restaurant And All Other Restaurant
similar_restaurants = author_restaurant_matrix.corrwith(author_ratings)
similar_restaurants

Restaurant
'D' Selera Kelate                                                -0.002265
16th St. Cafe                                                    -0.011501
1919 Restaurant Ipoh                                             -0.008465
20 Chulia Lane Cafe                                              -0.006661
21 Bistro                                                        -0.002265
                                                                    ...   
田園粥火锅 Farmland Porridge Steamboat                                -0.003205
相聚火锅 The Gather BBQ Steamboat(新犀鸟阁 New Hornbill BBQ Steamboat）   -0.003994
越南小廚 V NAM KITCHEN                                               -0.010257
青山角 / Green Hill Corner                                          -0.009433
食得福美食中心Cedar Point Food Centre                                    0.039886
Length: 1226, dtype: float64

In [324]:
# Create a dataframe with similar restaurants as the index column and name another column as correlation
similar_restaurants = pd.DataFrame(similar_restaurants, columns = ['correlation'])
similar_restaurants.head(10)

Unnamed: 0_level_0,correlation
Restaurant,Unnamed: 1_level_1
'D' Selera Kelate,-0.002265
16th St. Cafe,-0.011501
1919 Restaurant Ipoh,-0.008465
20 Chulia Lane Cafe,-0.006661
21 Bistro,-0.002265
218 Hainan Lor Mee,-0.006625
27@cove,-0.003907
28 Food Centre,-0.011424
3 :15 Auntie Hong's Cooking,-0.002265
33 Blue Room,-0.015783


In [325]:
# Recommending the Most Similar Restaurants
similar_restaurants.sort_values('correlation', ascending = False).head(10)

Unnamed: 0_level_0,correlation
Restaurant,Unnamed: 1_level_1
我家餐館Our Kitchen Nyonya Restaurant,1.0
Nasi Atan,0.317769
Breeks Cafe,0.236325
Wood's,0.224082
Temptations Kitchen + Bar,0.202274
New Water Garden Hawker Centre LGK,0.186918
Terragrill Satay Cafe,0.180415
Eastern Dragon Restaurant,0.175666
"Favola , Le Méridien Kuala Lumpur",0.155517
Restoran Krishna Bhawan,0.14047


In [328]:
# Recommending the Most Similar Popular Restaurants
df_rating = pd.DataFrame(data.groupby('Restaurant')['Rating'].count())  
df_rating.head(10)

Unnamed: 0_level_0,Rating
Restaurant,Unnamed: 1_level_1
'D' Selera Kelate,11
16th St. Cafe,134
1919 Restaurant Ipoh,142
20 Chulia Lane Cafe,62
21 Bistro,1
218 Hainan Lor Mee,75
27@cove,53
28 Food Centre,136
3 :15 Auntie Hong's Cooking,18
33 Blue Room,300


In [329]:
similar_restaurants = similar_restaurants.join(df_rating['Rating']).sort_values('correlation', ascending = False)
similar_restaurants

ValueError: columns overlap but no suffix specified: Index(['Rating'], dtype='object')

In [286]:
# Recommending 20 Most Similar Popular Movies 
# Similar restaurants like 我家餐館Our Kitchen Nyonya Restaurant 
similar_movies_2 = similar_restaurants[similar_restaurants['Rating'] > 100].sort_values('correlation', ascending = False)
similar_movies_2.head(20)

Unnamed: 0_level_0,correlation,Rating
Restaurant,Unnamed: 1_level_1,Unnamed: 2_level_1
我家餐館Our Kitchen Nyonya Restaurant,1.0,133
"Favola , Le Méridien Kuala Lumpur",0.155517,197
Presgrave Street Hawker Centre,0.139298,256
Moh Teng Pheow Nyonya Koay,0.134354,300
Mighty Monster IPC,0.133745,112
Restaurant Caterbest Sdn. Bhd.,0.123925,114
Restoran Shabu Shabu Ipoh,0.123675,254
My Own Cafe,0.12278,222
MAYHIANG SEAFOOD RESTAURANT,0.121773,128
Swood Smokehouse Texas BBQ,0.11606,155


In [330]:
# Building User-based Collaborative Filtering
author_restaurant_matrix = df_data.pivot_table(index = 'Author', columns = ['Restaurant'], values = 'Rating').fillna(0)
author_restaurant_matrix.head()

Restaurant,'D' Selera Kelate,16th St. Cafe,1919 Restaurant Ipoh,20 Chulia Lane Cafe,21 Bistro,218 Hainan Lor Mee,27@cove,28 Food Centre,3 :15 Auntie Hong's Cooking,33 Blue Room,...,寒舍 HANSHE @Perling,心安素食斋料食馆 Xin An Vegetarian Cafe,怡保古早味猪肠粉 Ipoh Traditional Style Chee Cheong Fun,我家餐館Our Kitchen Nyonya Restaurant,海皇粿条仔Restaurant Hi Wan,田園粥火锅 Farmland Porridge Steamboat,相聚火锅 The Gather BBQ Steamboat(新犀鸟阁 New Hornbill BBQ Steamboat）,越南小廚 V NAM KITCHEN,青山角 / Green Hill Corner,食得福美食中心Cedar Point Food Centre
Author,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5525 Gunner,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6od5p33d,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A 10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A K,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A L,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [331]:
restaurant_author_matrix = author_restaurant_matrix.transpose()
restaurant_author_matrix.head(10)

Author,5525 Gunner,6od5p33d,A 10,A K,A L,A P,A Y,A.,A.L Lim,AL Lim,...,κεηηγsκ,さなえ,パイパイ,レミィRemmy,兴哥Heng Gor,几米林Jimmy,小虫WeiXiang,暝纥Enoch,洪佳武,纯粹享
Restaurant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'D' Selera Kelate,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16th St. Cafe,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1919 Restaurant Ipoh,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
20 Chulia Lane Cafe,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
21 Bistro,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
218 Hainan Lor Mee,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0
27@cove,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
28 Food Centre,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3 :15 Auntie Hong's Cooking,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
33 Blue Room,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [332]:
# Identify Ratings For Restaurants Per Author
restaurant_ratings = restaurant_author_matrix['5525 Gunner']
restaurant_ratings.head(10)

Restaurant
'D' Selera Kelate              0.0
16th St. Cafe                  0.0
1919 Restaurant Ipoh           0.0
20 Chulia Lane Cafe            0.0
21 Bistro                      0.0
218 Hainan Lor Mee             0.0
27@cove                        0.0
28 Food Centre                 0.0
3 :15 Auntie Hong's Cooking    0.0
33 Blue Room                   0.0
Name: 5525 Gunner, dtype: float64

In [333]:
restaurant_ratings = restaurant_author_matrix['几米林Jimmy']
restaurant_ratings.head(10)

Restaurant
'D' Selera Kelate              0.0
16th St. Cafe                  0.0
1919 Restaurant Ipoh           0.0
20 Chulia Lane Cafe            0.0
21 Bistro                      0.0
218 Hainan Lor Mee             5.0
27@cove                        0.0
28 Food Centre                 0.0
3 :15 Auntie Hong's Cooking    0.0
33 Blue Room                   0.0
Name: 几米林Jimmy, dtype: float64

In [334]:
# Find correlation between authors based on the ratings of restaurants
similar_authors = restaurant_author_matrix.corrwith(restaurant_ratings)

# Create a dataframe with similar movies as the index column and correlation as another column
similar_authors = pd.DataFrame(similar_authors, columns = ['correlation'])
similar_authors.head(10)

Unnamed: 0_level_0,correlation
Author,Unnamed: 1_level_1
5525 Gunner,0.01503
6od5p33d,-0.016364
A 10,-0.014831
A K,-0.016404
A L,-0.015046
A P,-0.01921
A Y,-0.013407
A.,-0.015955
A.L Lim,0.062688
AL Lim,-0.018188


In [335]:
# Recommending Restaurants Based on The Most Similar Authors
most_similar_authors = similar_authors.sort_values('correlation', ascending = False).iloc[1:11]
most_similar_authors

Unnamed: 0_level_0,correlation
Author,Unnamed: 1_level_1
Ai Li Yeap,0.329091
Jamie X.M Lee,0.324361
es L,0.321067
Goon Hoong Tatt,0.311494
Kah Min,0.265913
洪佳武,0.261874
Andus Yeap,0.254917
H007 Tan,0.248574
hongkee fong,0.245699
Sirhc,0.240313


In [336]:
# Extract Author of the most similar users
authors = most_similar_authors.index.values.tolist()
authors[0]

'Ai Li Yeap'

In [337]:
recommendation = df_data[df_data['Author'] == authors[0]]
recommendation.head(10)

Unnamed: 0,Author,Review,Rating,Restaurant,Location
106528,Ai Li Yeap,Nice ambience with the red bricks industrial f...,5.0,Cheang Kee Restaurant,Penang
106671,Ai Li Yeap,A pleasant discovery. Serves nice steamboat wi...,5.0,Cheang Kee Restaurant,Penang
110948,Ai Li Yeap,"If you are a duck fan, this is the place to go...",5.0,What The Duck Restaurant,Penang
115391,Ai Li Yeap,Pleasant discovery with nice Instagramable sur...,4.0,Bean Sprout Café,Penang
116497,Ai Li Yeap,If you're looking for a place that serves the ...,5.0,Moh Teng Pheow Nyonya Koay,Penang
116720,Ai Li Yeap,Some say old is gold. I love this place that s...,5.0,Yeng Keng Café,Penang
118566,Ai Li Yeap,All time favourite place for their sour & hot ...,4.0,Din Tai Fung 鼎泰豐 At Penang Gurney Plaza,Penang
120426,Ai Li Yeap,My to go place for authentic Nyonya food. Alwa...,5.0,Winn's Cafe,Penang
124552,Ai Li Yeap,One of my favourite Japanese restaurant. They ...,5.0,Miraku G Kelawai,Penang
126050,Ai Li Yeap,Service was excellent. However in terms of foo...,3.0,CREAM by ChinChin,Penang


In [338]:
# Data Frame Slicing by Condition
recommendation = df_data.loc[(df_data['Author'] == authors[0]) & (df_data['Rating'] > 0), ['Restaurant', 'Rating']]
recommendation.head(10)

Unnamed: 0,Restaurant,Rating
106528,Cheang Kee Restaurant,5.0
106671,Cheang Kee Restaurant,5.0
110948,What The Duck Restaurant,5.0
115391,Bean Sprout Café,4.0
116497,Moh Teng Pheow Nyonya Koay,5.0
116720,Yeng Keng Café,5.0
118566,Din Tai Fung 鼎泰豐 At Penang Gurney Plaza,4.0
120426,Winn's Cafe,5.0
124552,Miraku G Kelawai,5.0
126050,CREAM by ChinChin,3.0
